Passed
Push — master ( 8e64a6...bca20c )
by Lars
03:44
created

UTF8::min()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 2
Bugs 1 Features 0
Metric Value
cc 3
eloc 7
c 2
b 1
f 0
nc 4
nop 1
dl 0
loc 14
ccs 7
cts 7
cp 1
crap 3
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 13
            if (self::$CHR === null) {
556
                self::$CHR = self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 13
            $chr = self::$CHR[$code_point];
563
564 13
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 13
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regex = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808
        /** @noinspection NotOptimalRegularExpressionsInspection */
809 114
        $str = (string) \preg_replace($regex, '$1', $str);
810
811 114
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 114
        if ($remove_invisible_characters === true) {
816 114
            $str = self::remove_invisible_characters($str);
817
        }
818
819 114
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 114
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 114
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 114
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 19
    public static function decimal_to_chr($int): string
993
    {
994 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1026
     *
1027
     * @param string $str                        <p>The input string.</p>
1028
     * @param bool   $useReversibleStringMapping [optional] <p>
1029
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1030
     *                                           between "emoji_encode" and "emoji_decode".</p>
1031
     *
1032
     * @return string
1033
     */
1034 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1035
    {
1036 9
        self::initEmojiData();
1037
1038 9
        if ($useReversibleStringMapping === true) {
1039 9
            return (string) \str_replace(
1040 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1041 9
                (array) self::$EMOJI_VALUES_CACHE,
1042 9
                $str
1043
            );
1044
        }
1045
1046 1
        return (string) \str_replace(
1047 1
            (array) self::$EMOJI_KEYS_CACHE,
1048 1
            (array) self::$EMOJI_VALUES_CACHE,
1049 1
            $str
1050
        );
1051
    }
1052
1053
    /**
1054
     * Encode a string with emoji chars into a non-emoji string.
1055
     *
1056
     * @param string $str                        <p>The input string</p>
1057
     * @param bool   $useReversibleStringMapping [optional] <p>
1058
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1059
     *                                           between "emoji_encode" and "emoji_decode"</p>
1060
     *
1061
     * @return string
1062
     */
1063 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1064
    {
1065 12
        self::initEmojiData();
1066
1067 12
        if ($useReversibleStringMapping === true) {
1068 9
            return (string) \str_replace(
1069 9
                (array) self::$EMOJI_VALUES_CACHE,
1070 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1071 9
                $str
1072
            );
1073
        }
1074
1075 4
        return (string) \str_replace(
1076 4
            (array) self::$EMOJI_VALUES_CACHE,
1077 4
            (array) self::$EMOJI_KEYS_CACHE,
1078 4
            $str
1079
        );
1080
    }
1081
1082
    /**
1083
     * Encode a string with a new charset-encoding.
1084
     *
1085
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1086
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1087
     *
1088
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1089
     * @param string $str                    <p>The input string</p>
1090
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1091
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1092
     *                                       string-encoding</p>
1093
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1094
     *                                       A empty string will trigger the autodetect anyway.</p>
1095
     *
1096
     * @return string
1097
     *
1098
     * @psalm-suppress InvalidReturnStatement
1099
     */
1100 28
    public static function encode(
1101
        string $toEncoding,
1102
        string $str,
1103
        bool $autodetectFromEncoding = true,
1104
        string $fromEncoding = ''
1105
    ): string {
1106 28
        if ($str === '' || $toEncoding === '') {
1107 13
            return $str;
1108
        }
1109
1110 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1111 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1112
        }
1113
1114 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1115 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1116
        }
1117
1118
        if (
1119 28
            $toEncoding
1120
            &&
1121 28
            $fromEncoding
1122
            &&
1123 28
            $fromEncoding === $toEncoding
1124
        ) {
1125
            return $str;
1126
        }
1127
1128 28
        if ($toEncoding === 'JSON') {
1129 1
            $return = self::json_encode($str);
1130 1
            if ($return === false) {
1131
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1132
            }
1133
1134 1
            return $return;
1135
        }
1136 28
        if ($fromEncoding === 'JSON') {
1137 1
            $str = self::json_decode($str);
1138 1
            $fromEncoding = '';
1139
        }
1140
1141 28
        if ($toEncoding === 'BASE64') {
1142 2
            return \base64_encode($str);
1143
        }
1144 28
        if ($fromEncoding === 'BASE64') {
1145 2
            $str = \base64_decode($str, true);
1146 2
            $fromEncoding = '';
1147
        }
1148
1149 28
        if ($toEncoding === 'HTML-ENTITIES') {
1150 2
            return self::html_encode($str, true, 'UTF-8');
1151
        }
1152 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1153 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1154 2
            $fromEncoding = '';
1155
        }
1156
1157 28
        $fromEncodingDetected = false;
1158
        if (
1159 28
            $autodetectFromEncoding === true
1160
            ||
1161 28
            !$fromEncoding
1162
        ) {
1163 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1164
        }
1165
1166
        // DEBUG
1167
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1168
1169 28
        if ($fromEncodingDetected !== false) {
1170 24
            $fromEncoding = $fromEncodingDetected;
1171 7
        } elseif ($autodetectFromEncoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$fromEncoding
1178
            ||
1179 24
            $fromEncoding === $toEncoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $toEncoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $fromEncoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $fromEncoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $toEncoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $fromEncoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $fromEncoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $toEncoding !== 'UTF-8'
1210
            &&
1211 10
            $toEncoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $toEncoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $strEncoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $toEncoding,
1225 10
                $fromEncoding
1226
            );
1227
1228 10
            if ($strEncoded) {
1229 10
                return $strEncoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($fromEncoding, $toEncoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1244
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $fromCharset = 'UTF-8',
1256
        $toCharset = 'UTF-8',
1257
        $transferEncoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1262
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1263
        }
1264
1265
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1266
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transferEncoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $fromCharset,
1276
                'output-charset'   => $toCharset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                    <p>The input string.</p>
1286
     * @param string   $search                 <p>The searched string.</p>
1287
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1289
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacerForSkippedText = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $stringLength = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $stringLength = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $strSub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $strSub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($strSub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $wordPos = (int) \mb_stripos($str, $search);
1360 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1363
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($halfSide > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $halfText = \mb_substr($str, 0, $halfSide);
1370
            } else {
1371
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1372
            }
1373 1
            if ($halfText !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($halfText, ' '),
1377 1
                        \mb_strrpos($halfText, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($halfText, ' ', 0, $encoding),
1382
                        self::strrpos($halfText, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($wordPos && $halfSide > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $realLength = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $realLength) {
1393
                $offset = $realLength;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                    ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                    ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($strSub !== false) {
1415 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($strSub !== false) {
1426 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $trueLength = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $trueLength) {
1436
                $offset = $trueLength;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($strSub !== false) {
1458 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $maxLength        [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1507
     */
1508 12
    public static function file_get_contents(
1509
        string $filename,
1510
        bool $use_include_path = false,
1511
        $context = null,
1512
        int $offset = null,
1513
        int $maxLength = null,
1514
        int $timeout = 10,
1515
        bool $convertToUtf8 = true,
1516
        string $fromEncoding = ''
1517
    ) {
1518
        // init
1519 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) !== true
1552
                ||
1553 9
                self::is_utf16($data, false) !== false
1554
                ||
1555 12
                self::is_utf32($data, false) !== false
1556
            ) {
1557 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1558 9
                $data = self::cleanup($data);
1559
            }
1560
        }
1561
1562 12
        return $data;
1563
    }
1564
1565
    /**
1566
     * Checks if a file starts with BOM (Byte Order Mark) character.
1567
     *
1568
     * @param string $file_path <p>Path to a valid file.</p>
1569
     *
1570
     * @throws \RuntimeException if file_get_contents() returned false
1571
     *
1572
     * @return bool
1573
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1574
     */
1575 2
    public static function file_has_bom(string $file_path): bool
1576
    {
1577 2
        $file_content = \file_get_contents($file_path);
1578 2
        if ($file_content === false) {
1579
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1580
        }
1581
1582 2
        return self::string_has_bom($file_content);
1583
    }
1584
1585
    /**
1586
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
     *
1588
     * @param mixed  $var
1589
     * @param int    $normalization_form
1590
     * @param string $leading_combining
1591
     *
1592
     * @return mixed
1593
     */
1594 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1595
    {
1596 62
        switch (\gettype($var)) {
1597 62
            case 'array':
1598
                /** @noinspection ForeachSourceInspection */
1599 6
                foreach ($var as $k => &$v) {
1600 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1601
                }
1602 6
                unset($v);
1603
1604 6
                break;
1605 62
            case 'object':
1606
                /** @noinspection ForeachSourceInspection */
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png, jpg)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143
        // DEBUG
2144
        //var_dump($str_info);
2145
2146 35
        $str_info = \unpack('C2chars', $str_info);
2147 35
        if ($str_info === false) {
2148
            return $fallback;
2149
        }
2150
        /** @noinspection OffsetOperationsInspection */
2151 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2152
2153
        // DEBUG
2154
        //var_dump($type_code);
2155
2156
        //
2157
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2158
        //
2159
        switch ($type_code) {
2160
            // WARNING: do not add too simple comparisons, because of false-positive results:
2161
            //
2162
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2163
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2164
            //
2165 35
            case 255216:
2166
                $ext = 'jpg';
2167
                $mime = 'image/jpeg';
2168
                $type = 'binary';
2169
2170
                break;
2171 35
            case 13780:
2172 7
                $ext = 'png';
2173 7
                $mime = 'image/png';
2174 7
                $type = 'binary';
2175
2176 7
                break;
2177
            default:
2178 34
                return $fallback;
2179
        }
2180
2181
        return [
2182 7
            'ext'  => $ext,
2183 7
            'mime' => $mime,
2184 7
            'type' => $type,
2185
        ];
2186
    }
2187
2188
    /**
2189
     * @param int    $length        <p>Length of the random string.</p>
2190
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2191
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2192
     *
2193
     * @return string
2194
     */
2195 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2196
    {
2197
        // init
2198 1
        $i = 0;
2199 1
        $str = '';
2200
2201
        //
2202
        // add random chars
2203
        //
2204
2205 1
        if ($encoding === 'UTF-8') {
2206 1
            $maxlength = (int) \mb_strlen($possibleChars);
2207 1
            if ($maxlength === 0) {
2208 1
                return '';
2209
            }
2210
2211 1
            while ($i < $length) {
2212
                try {
2213 1
                    $randInt = \random_int(0, $maxlength - 1);
2214
                } catch (\Exception $e) {
2215
                    /** @noinspection RandomApiMigrationInspection */
2216
                    $randInt = \mt_rand(0, $maxlength - 1);
2217
                }
2218 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2219 1
                if ($char !== false) {
2220 1
                    $str .= $char;
2221 1
                    ++$i;
2222
                }
2223
            }
2224
        } else {
2225
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2226
2227
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2228
            if ($maxlength === 0) {
2229
                return '';
2230
            }
2231
2232
            while ($i < $length) {
2233
                try {
2234
                    $randInt = \random_int(0, $maxlength - 1);
2235
                } catch (\Exception $e) {
2236
                    /** @noinspection RandomApiMigrationInspection */
2237
                    $randInt = \mt_rand(0, $maxlength - 1);
2238
                }
2239
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2240
                if ($char !== false) {
2241
                    $str .= $char;
2242
                    ++$i;
2243
                }
2244
            }
2245
        }
2246
2247 1
        return $str;
2248
    }
2249
2250
    /**
2251
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2252
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2253
     *
2254
     * @return string
2255
     */
2256 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2257
    {
2258 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2259 1
                        \session_id() .
2260 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2261 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2262 1
                        $entropyExtra;
2263
2264 1
        $uniqueString = \uniqid($uniqueHelper, true);
2265
2266 1
        if ($md5) {
2267 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2268
        }
2269
2270 1
        return $uniqueString;
2271
    }
2272
2273
    /**
2274
     * alias for "UTF8::string_has_bom()"
2275
     *
2276
     * @param string $str
2277
     *
2278
     * @return bool
2279
     *
2280
     * @see UTF8::string_has_bom()
2281
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2282
     */
2283 2
    public static function hasBom(string $str): bool
2284
    {
2285 2
        return self::string_has_bom($str);
2286
    }
2287
2288
    /**
2289
     * Returns true if the string contains a lower case char, false otherwise.
2290
     *
2291
     * @param string $str <p>The input string.</p>
2292
     *
2293
     * @return bool whether or not the string contains a lower case character
2294
     */
2295 47
    public static function has_lowercase(string $str): bool
2296
    {
2297 47
        if (self::$SUPPORT['mbstring'] === true) {
2298
            /** @noinspection PhpComposerExtensionStubsInspection */
2299 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2300
        }
2301
2302
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2303
    }
2304
2305
    /**
2306
     * Returns true if the string contains an upper case char, false otherwise.
2307
     *
2308
     * @param string $str <p>The input string.</p>
2309
     *
2310
     * @return bool whether or not the string contains an upper case character
2311
     */
2312 12
    public static function has_uppercase(string $str): bool
2313
    {
2314 12
        if (self::$SUPPORT['mbstring'] === true) {
2315
            /** @noinspection PhpComposerExtensionStubsInspection */
2316 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2317
        }
2318
2319
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2320
    }
2321
2322
    /**
2323
     * Converts a hexadecimal-value into an UTF-8 character.
2324
     *
2325
     * @param string $hexdec <p>The hexadecimal value.</p>
2326
     *
2327
     * @return false|string one single UTF-8 character
2328
     */
2329 4
    public static function hex_to_chr(string $hexdec)
2330
    {
2331 4
        return self::decimal_to_chr(\hexdec($hexdec));
2332
    }
2333
2334
    /**
2335
     * Converts hexadecimal U+xxxx code point representation to integer.
2336
     *
2337
     * INFO: opposite to UTF8::int_to_hex()
2338
     *
2339
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2340
     *
2341
     * @return false|int the code point, or false on failure
2342
     */
2343 2
    public static function hex_to_int($hexDec)
2344
    {
2345
        // init
2346 2
        $hexDec = (string) $hexDec;
2347
2348 2
        if ($hexDec === '') {
2349 2
            return false;
2350
        }
2351
2352 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2353 2
            return \intval($match[1], 16);
2354
        }
2355
2356 2
        return false;
2357
    }
2358
2359
    /**
2360
     * alias for "UTF8::html_entity_decode()"
2361
     *
2362
     * @param string $str
2363
     * @param int    $flags
2364
     * @param string $encoding
2365
     *
2366
     * @return string
2367
     *
2368
     * @see UTF8::html_entity_decode()
2369
     */
2370 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2371
    {
2372 4
        return self::html_entity_decode($str, $flags, $encoding);
2373
    }
2374
2375
    /**
2376
     * Converts a UTF-8 string to a series of HTML numbered entities.
2377
     *
2378
     * INFO: opposite to UTF8::html_decode()
2379
     *
2380
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2381
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2382
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2383
     *
2384
     * @return string HTML numbered entities
2385
     */
2386 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2387
    {
2388 14
        if ($str === '') {
2389 4
            return '';
2390
        }
2391
2392 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2393 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2394
        }
2395
2396
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2397 14
        if (self::$SUPPORT['mbstring'] === true) {
2398 14
            $startCode = 0x00;
2399 14
            if ($keepAsciiChars === true) {
2400 13
                $startCode = 0x80;
2401
            }
2402
2403 14
            if ($encoding === 'UTF-8') {
2404 14
                return \mb_encode_numericentity(
2405 14
                    $str,
2406 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2407
                );
2408
            }
2409
2410 4
            return \mb_encode_numericentity(
2411 4
                $str,
2412 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2413 4
                $encoding
2414
            );
2415
        }
2416
2417
        //
2418
        // fallback via vanilla php
2419
        //
2420
2421
        return \implode(
2422
            '',
2423
            \array_map(
2424
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2425
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2426
                },
2427
                self::str_split($str)
2428
            )
2429
        );
2430
    }
2431
2432
    /**
2433
     * UTF-8 version of html_entity_decode()
2434
     *
2435
     * The reason we are not using html_entity_decode() by itself is because
2436
     * while it is not technically correct to leave out the semicolon
2437
     * at the end of an entity most browsers will still interpret the entity
2438
     * correctly. html_entity_decode() does not convert entities without
2439
     * semicolons, so we are left with our own little solution here. Bummer.
2440
     *
2441
     * Convert all HTML entities to their applicable characters
2442
     *
2443
     * INFO: opposite to UTF8::html_encode()
2444
     *
2445
     * @see http://php.net/manual/en/function.html-entity-decode.php
2446
     *
2447
     * @param string $str      <p>
2448
     *                         The input string.
2449
     *                         </p>
2450
     * @param int    $flags    [optional] <p>
2451
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2452
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2453
     *                         <table>
2454
     *                         Available <i>flags</i> constants
2455
     *                         <tr valign="top">
2456
     *                         <td>Constant Name</td>
2457
     *                         <td>Description</td>
2458
     *                         </tr>
2459
     *                         <tr valign="top">
2460
     *                         <td><b>ENT_COMPAT</b></td>
2461
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2462
     *                         </tr>
2463
     *                         <tr valign="top">
2464
     *                         <td><b>ENT_QUOTES</b></td>
2465
     *                         <td>Will convert both double and single quotes.</td>
2466
     *                         </tr>
2467
     *                         <tr valign="top">
2468
     *                         <td><b>ENT_NOQUOTES</b></td>
2469
     *                         <td>Will leave both double and single quotes unconverted.</td>
2470
     *                         </tr>
2471
     *                         <tr valign="top">
2472
     *                         <td><b>ENT_HTML401</b></td>
2473
     *                         <td>
2474
     *                         Handle code as HTML 4.01.
2475
     *                         </td>
2476
     *                         </tr>
2477
     *                         <tr valign="top">
2478
     *                         <td><b>ENT_XML1</b></td>
2479
     *                         <td>
2480
     *                         Handle code as XML 1.
2481
     *                         </td>
2482
     *                         </tr>
2483
     *                         <tr valign="top">
2484
     *                         <td><b>ENT_XHTML</b></td>
2485
     *                         <td>
2486
     *                         Handle code as XHTML.
2487
     *                         </td>
2488
     *                         </tr>
2489
     *                         <tr valign="top">
2490
     *                         <td><b>ENT_HTML5</b></td>
2491
     *                         <td>
2492
     *                         Handle code as HTML 5.
2493
     *                         </td>
2494
     *                         </tr>
2495
     *                         </table>
2496
     *                         </p>
2497
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2498
     *
2499
     * @return string the decoded string
2500
     */
2501 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2502
    {
2503
        if (
2504 46
            !isset($str[3]) // examples: &; || &x;
2505
            ||
2506 46
            \strpos($str, '&') === false // no "&"
2507
        ) {
2508 23
            return $str;
2509
        }
2510
2511 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2512 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2513
        }
2514
2515 44
        if ($flags === null) {
2516 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2517
        }
2518
2519
        if (
2520 44
            $encoding !== 'UTF-8'
2521
            &&
2522 44
            $encoding !== 'ISO-8859-1'
2523
            &&
2524 44
            $encoding !== 'WINDOWS-1252'
2525
            &&
2526 44
            self::$SUPPORT['mbstring'] === false
2527
        ) {
2528
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2529
        }
2530
2531
        do {
2532 44
            $str_compare = $str;
2533
2534
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2535 44
            if (self::$SUPPORT['mbstring'] === true) {
2536 44
                if ($encoding === 'UTF-8') {
2537 44
                    $str = \mb_decode_numericentity(
2538 44
                        $str,
2539 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2540
                    );
2541
                } else {
2542 4
                    $str = \mb_decode_numericentity(
2543 4
                        $str,
2544 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2545 44
                        $encoding
2546
                    );
2547
                }
2548
            } else {
2549
                $str = (string) \preg_replace_callback(
2550
                    "/&#\d{2,6};/",
2551
                    /**
2552
                     * @param string[] $matches
2553
                     *
2554
                     * @return string
2555
                     */
2556
                    static function (array $matches) use ($encoding): string {
2557
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2558
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2559
                            return $returnTmp;
2560
                        }
2561
2562
                        return $matches[0];
2563
                    },
2564
                    $str
2565
                );
2566
            }
2567
2568 44
            if (\strpos($str, '&') !== false) {
2569 40
                if (\strpos($str, '&#') !== false) {
2570
                    // decode also numeric & UTF16 two byte entities
2571 32
                    $str = (string) \preg_replace(
2572 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2573 32
                        '$1;',
2574 32
                        $str
2575
                    );
2576
                }
2577
2578 40
                $str = \html_entity_decode(
2579 40
                    $str,
2580 40
                    $flags,
2581 40
                    $encoding
2582
                );
2583
            }
2584 44
        } while ($str_compare !== $str);
2585
2586 44
        return $str;
2587
    }
2588
2589
    /**
2590
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2591
     *
2592
     * @param string $str
2593
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2594
     *
2595
     * @return string
2596
     */
2597 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2598
    {
2599 6
        return self::htmlspecialchars(
2600 6
            $str,
2601 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2602 6
            $encoding
2603
        );
2604
    }
2605
2606
    /**
2607
     * Remove empty html-tag.
2608
     *
2609
     * e.g.: <tag></tag>
2610
     *
2611
     * @param string $str
2612
     *
2613
     * @return string
2614
     */
2615 1
    public static function html_stripe_empty_tags(string $str): string
2616
    {
2617 1
        return (string) \preg_replace(
2618 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2619 1
            '',
2620 1
            $str
2621
        );
2622
    }
2623
2624
    /**
2625
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2626
     *
2627
     * @see http://php.net/manual/en/function.htmlentities.php
2628
     *
2629
     * @param string $str           <p>
2630
     *                              The input string.
2631
     *                              </p>
2632
     * @param int    $flags         [optional] <p>
2633
     *                              A bitmask of one or more of the following flags, which specify how to handle
2634
     *                              quotes, invalid code unit sequences and the used document type. The default is
2635
     *                              ENT_COMPAT | ENT_HTML401.
2636
     *                              <table>
2637
     *                              Available <i>flags</i> constants
2638
     *                              <tr valign="top">
2639
     *                              <td>Constant Name</td>
2640
     *                              <td>Description</td>
2641
     *                              </tr>
2642
     *                              <tr valign="top">
2643
     *                              <td><b>ENT_COMPAT</b></td>
2644
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2645
     *                              </tr>
2646
     *                              <tr valign="top">
2647
     *                              <td><b>ENT_QUOTES</b></td>
2648
     *                              <td>Will convert both double and single quotes.</td>
2649
     *                              </tr>
2650
     *                              <tr valign="top">
2651
     *                              <td><b>ENT_NOQUOTES</b></td>
2652
     *                              <td>Will leave both double and single quotes unconverted.</td>
2653
     *                              </tr>
2654
     *                              <tr valign="top">
2655
     *                              <td><b>ENT_IGNORE</b></td>
2656
     *                              <td>
2657
     *                              Silently discard invalid code unit sequences instead of returning
2658
     *                              an empty string. Using this flag is discouraged as it
2659
     *                              may have security implications.
2660
     *                              </td>
2661
     *                              </tr>
2662
     *                              <tr valign="top">
2663
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2664
     *                              <td>
2665
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2666
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2667
     *                              string.
2668
     *                              </td>
2669
     *                              </tr>
2670
     *                              <tr valign="top">
2671
     *                              <td><b>ENT_DISALLOWED</b></td>
2672
     *                              <td>
2673
     *                              Replace invalid code points for the given document type with a
2674
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2675
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2676
     *                              instance, to ensure the well-formedness of XML documents with
2677
     *                              embedded external content.
2678
     *                              </td>
2679
     *                              </tr>
2680
     *                              <tr valign="top">
2681
     *                              <td><b>ENT_HTML401</b></td>
2682
     *                              <td>
2683
     *                              Handle code as HTML 4.01.
2684
     *                              </td>
2685
     *                              </tr>
2686
     *                              <tr valign="top">
2687
     *                              <td><b>ENT_XML1</b></td>
2688
     *                              <td>
2689
     *                              Handle code as XML 1.
2690
     *                              </td>
2691
     *                              </tr>
2692
     *                              <tr valign="top">
2693
     *                              <td><b>ENT_XHTML</b></td>
2694
     *                              <td>
2695
     *                              Handle code as XHTML.
2696
     *                              </td>
2697
     *                              </tr>
2698
     *                              <tr valign="top">
2699
     *                              <td><b>ENT_HTML5</b></td>
2700
     *                              <td>
2701
     *                              Handle code as HTML 5.
2702
     *                              </td>
2703
     *                              </tr>
2704
     *                              </table>
2705
     *                              </p>
2706
     * @param string $encoding      [optional] <p>
2707
     *                              Like <b>htmlspecialchars</b>,
2708
     *                              <b>htmlentities</b> takes an optional third argument
2709
     *                              <i>encoding</i> which defines encoding used in
2710
     *                              conversion.
2711
     *                              Although this argument is technically optional, you are highly
2712
     *                              encouraged to specify the correct value for your code.
2713
     *                              </p>
2714
     * @param bool   $double_encode [optional] <p>
2715
     *                              When <i>double_encode</i> is turned off PHP will not
2716
     *                              encode existing html entities. The default is to convert everything.
2717
     *                              </p>
2718
     *
2719
     * @return string
2720
     *                <p>
2721
     *                The encoded string.
2722
     *                <br><br>
2723
     *                If the input <i>string</i> contains an invalid code unit
2724
     *                sequence within the given <i>encoding</i> an empty string
2725
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2726
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2727
     *                </p>
2728
     */
2729 9
    public static function htmlentities(
2730
        string $str,
2731
        int $flags = \ENT_COMPAT,
2732
        string $encoding = 'UTF-8',
2733
        bool $double_encode = true
2734
    ): string {
2735 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2736 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2737
        }
2738
2739 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2740
2741
        /**
2742
         * PHP doesn't replace a backslash to its html entity since this is something
2743
         * that's mostly used to escape characters when inserting in a database. Since
2744
         * we're using a decent database layer, we don't need this shit and we're replacing
2745
         * the double backslashes by its' html entity equivalent.
2746
         *
2747
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2748
         */
2749 9
        $str = \str_replace('\\', '&#92;', $str);
2750
2751 9
        return self::html_encode($str, true, $encoding);
2752
    }
2753
2754
    /**
2755
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2756
     *
2757
     * INFO: Take a look at "UTF8::htmlentities()"
2758
     *
2759
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2760
     *
2761
     * @param string $str           <p>
2762
     *                              The string being converted.
2763
     *                              </p>
2764
     * @param int    $flags         [optional] <p>
2765
     *                              A bitmask of one or more of the following flags, which specify how to handle
2766
     *                              quotes, invalid code unit sequences and the used document type. The default is
2767
     *                              ENT_COMPAT | ENT_HTML401.
2768
     *                              <table>
2769
     *                              Available <i>flags</i> constants
2770
     *                              <tr valign="top">
2771
     *                              <td>Constant Name</td>
2772
     *                              <td>Description</td>
2773
     *                              </tr>
2774
     *                              <tr valign="top">
2775
     *                              <td><b>ENT_COMPAT</b></td>
2776
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2777
     *                              </tr>
2778
     *                              <tr valign="top">
2779
     *                              <td><b>ENT_QUOTES</b></td>
2780
     *                              <td>Will convert both double and single quotes.</td>
2781
     *                              </tr>
2782
     *                              <tr valign="top">
2783
     *                              <td><b>ENT_NOQUOTES</b></td>
2784
     *                              <td>Will leave both double and single quotes unconverted.</td>
2785
     *                              </tr>
2786
     *                              <tr valign="top">
2787
     *                              <td><b>ENT_IGNORE</b></td>
2788
     *                              <td>
2789
     *                              Silently discard invalid code unit sequences instead of returning
2790
     *                              an empty string. Using this flag is discouraged as it
2791
     *                              may have security implications.
2792
     *                              </td>
2793
     *                              </tr>
2794
     *                              <tr valign="top">
2795
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2796
     *                              <td>
2797
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2798
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2799
     *                              string.
2800
     *                              </td>
2801
     *                              </tr>
2802
     *                              <tr valign="top">
2803
     *                              <td><b>ENT_DISALLOWED</b></td>
2804
     *                              <td>
2805
     *                              Replace invalid code points for the given document type with a
2806
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2807
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2808
     *                              instance, to ensure the well-formedness of XML documents with
2809
     *                              embedded external content.
2810
     *                              </td>
2811
     *                              </tr>
2812
     *                              <tr valign="top">
2813
     *                              <td><b>ENT_HTML401</b></td>
2814
     *                              <td>
2815
     *                              Handle code as HTML 4.01.
2816
     *                              </td>
2817
     *                              </tr>
2818
     *                              <tr valign="top">
2819
     *                              <td><b>ENT_XML1</b></td>
2820
     *                              <td>
2821
     *                              Handle code as XML 1.
2822
     *                              </td>
2823
     *                              </tr>
2824
     *                              <tr valign="top">
2825
     *                              <td><b>ENT_XHTML</b></td>
2826
     *                              <td>
2827
     *                              Handle code as XHTML.
2828
     *                              </td>
2829
     *                              </tr>
2830
     *                              <tr valign="top">
2831
     *                              <td><b>ENT_HTML5</b></td>
2832
     *                              <td>
2833
     *                              Handle code as HTML 5.
2834
     *                              </td>
2835
     *                              </tr>
2836
     *                              </table>
2837
     *                              </p>
2838
     * @param string $encoding      [optional] <p>
2839
     *                              Defines encoding used in conversion.
2840
     *                              </p>
2841
     *                              <p>
2842
     *                              For the purposes of this function, the encodings
2843
     *                              ISO-8859-1, ISO-8859-15,
2844
     *                              UTF-8, cp866,
2845
     *                              cp1251, cp1252, and
2846
     *                              KOI8-R are effectively equivalent, provided the
2847
     *                              <i>string</i> itself is valid for the encoding, as
2848
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2849
     *                              the same positions in all of these encodings.
2850
     *                              </p>
2851
     * @param bool   $double_encode [optional] <p>
2852
     *                              When <i>double_encode</i> is turned off PHP will not
2853
     *                              encode existing html entities, the default is to convert everything.
2854
     *                              </p>
2855
     *
2856
     * @return string the converted string.
2857
     *                </p>
2858
     *                <p>
2859
     *                If the input <i>string</i> contains an invalid code unit
2860
     *                sequence within the given <i>encoding</i> an empty string
2861
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2862
     *                <b>ENT_SUBSTITUTE</b> flags are set
2863
     */
2864 8
    public static function htmlspecialchars(
2865
        string $str,
2866
        int $flags = \ENT_COMPAT,
2867
        string $encoding = 'UTF-8',
2868
        bool $double_encode = true
2869
    ): string {
2870 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2871 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2872
        }
2873
2874 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2875
    }
2876
2877
    /**
2878
     * Checks whether iconv is available on the server.
2879
     *
2880
     * @return bool
2881
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2882
     */
2883
    public static function iconv_loaded(): bool
2884
    {
2885
        return \extension_loaded('iconv');
2886
    }
2887
2888
    /**
2889
     * alias for "UTF8::decimal_to_chr()"
2890
     *
2891
     * @param mixed $int
2892
     *
2893
     * @return string
2894
     *
2895
     * @see UTF8::decimal_to_chr()
2896
     */
2897 4
    public static function int_to_chr($int): string
2898
    {
2899 4
        return self::decimal_to_chr($int);
2900
    }
2901
2902
    /**
2903
     * Converts Integer to hexadecimal U+xxxx code point representation.
2904
     *
2905
     * INFO: opposite to UTF8::hex_to_int()
2906
     *
2907
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2908
     * @param string $pfix [optional]
2909
     *
2910
     * @return string the code point, or empty string on failure
2911
     */
2912 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2913
    {
2914 6
        $hex = \dechex($int);
2915
2916 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2917
2918 6
        return $pfix . $hex . '';
2919
    }
2920
2921
    /**
2922
     * Checks whether intl-char is available on the server.
2923
     *
2924
     * @return bool
2925
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2926
     */
2927
    public static function intlChar_loaded(): bool
2928
    {
2929
        return \class_exists('IntlChar');
2930
    }
2931
2932
    /**
2933
     * Checks whether intl is available on the server.
2934
     *
2935
     * @return bool
2936
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2937
     */
2938 5
    public static function intl_loaded(): bool
2939
    {
2940 5
        return \extension_loaded('intl');
2941
    }
2942
2943
    /**
2944
     * alias for "UTF8::is_ascii()"
2945
     *
2946
     * @param string $str
2947
     *
2948
     * @return bool
2949
     *
2950
     * @see UTF8::is_ascii()
2951
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2952
     */
2953 2
    public static function isAscii(string $str): bool
2954
    {
2955 2
        return self::is_ascii($str);
2956
    }
2957
2958
    /**
2959
     * alias for "UTF8::is_base64()"
2960
     *
2961
     * @param string $str
2962
     *
2963
     * @return bool
2964
     *
2965
     * @see UTF8::is_base64()
2966
     * @deprecated <p>use "UTF8::is_base64()"</p>
2967
     */
2968 2
    public static function isBase64($str): bool
2969
    {
2970 2
        return self::is_base64($str);
2971
    }
2972
2973
    /**
2974
     * alias for "UTF8::is_binary()"
2975
     *
2976
     * @param mixed $str
2977
     * @param bool  $strict
2978
     *
2979
     * @return bool
2980
     *
2981
     * @see UTF8::is_binary()
2982
     * @deprecated <p>use "UTF8::is_binary()"</p>
2983
     */
2984 4
    public static function isBinary($str, $strict = false): bool
2985
    {
2986 4
        return self::is_binary($str, $strict);
2987
    }
2988
2989
    /**
2990
     * alias for "UTF8::is_bom()"
2991
     *
2992
     * @param string $utf8_chr
2993
     *
2994
     * @return bool
2995
     *
2996
     * @see UTF8::is_bom()
2997
     * @deprecated <p>use "UTF8::is_bom()"</p>
2998
     */
2999 2
    public static function isBom(string $utf8_chr): bool
3000
    {
3001 2
        return self::is_bom($utf8_chr);
3002
    }
3003
3004
    /**
3005
     * alias for "UTF8::is_html()"
3006
     *
3007
     * @param string $str
3008
     *
3009
     * @return bool
3010
     *
3011
     * @see UTF8::is_html()
3012
     * @deprecated <p>use "UTF8::is_html()"</p>
3013
     */
3014 2
    public static function isHtml(string $str): bool
3015
    {
3016 2
        return self::is_html($str);
3017
    }
3018
3019
    /**
3020
     * alias for "UTF8::is_json()"
3021
     *
3022
     * @param string $str
3023
     *
3024
     * @return bool
3025
     *
3026
     * @see UTF8::is_json()
3027
     * @deprecated <p>use "UTF8::is_json()"</p>
3028
     */
3029
    public static function isJson(string $str): bool
3030
    {
3031
        return self::is_json($str);
3032
    }
3033
3034
    /**
3035
     * alias for "UTF8::is_utf16()"
3036
     *
3037
     * @param mixed $str
3038
     *
3039
     * @return false|int
3040
     *                   <strong>false</strong> if is't not UTF16,<br>
3041
     *                   <strong>1</strong> for UTF-16LE,<br>
3042
     *                   <strong>2</strong> for UTF-16BE
3043
     *
3044
     * @see UTF8::is_utf16()
3045
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3046
     */
3047 2
    public static function isUtf16($str)
3048
    {
3049 2
        return self::is_utf16($str);
3050
    }
3051
3052
    /**
3053
     * alias for "UTF8::is_utf32()"
3054
     *
3055
     * @param mixed $str
3056
     *
3057
     * @return false|int
3058
     *                   <strong>false</strong> if is't not UTF16,
3059
     *                   <strong>1</strong> for UTF-32LE,
3060
     *                   <strong>2</strong> for UTF-32BE
3061
     *
3062
     * @see UTF8::is_utf32()
3063
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3064
     */
3065 2
    public static function isUtf32($str)
3066
    {
3067 2
        return self::is_utf32($str);
3068
    }
3069
3070
    /**
3071
     * alias for "UTF8::is_utf8()"
3072
     *
3073
     * @param string $str
3074
     * @param bool   $strict
3075
     *
3076
     * @return bool
3077
     *
3078
     * @see UTF8::is_utf8()
3079
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3080
     */
3081 17
    public static function isUtf8($str, $strict = false): bool
3082
    {
3083 17
        return self::is_utf8($str, $strict);
3084
    }
3085
3086
    /**
3087
     * Returns true if the string contains only alphabetic chars, false otherwise.
3088
     *
3089
     * @param string $str
3090
     *
3091
     * @return bool
3092
     *              Whether or not $str contains only alphabetic chars
3093
     */
3094 10
    public static function is_alpha(string $str): bool
3095
    {
3096 10
        if (self::$SUPPORT['mbstring'] === true) {
3097
            /** @noinspection PhpComposerExtensionStubsInspection */
3098 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3099
        }
3100
3101
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3102
    }
3103
3104
    /**
3105
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3106
     *
3107
     * @param string $str
3108
     *
3109
     * @return bool
3110
     *              Whether or not $str contains only alphanumeric chars
3111
     */
3112 13
    public static function is_alphanumeric(string $str): bool
3113
    {
3114 13
        if (self::$SUPPORT['mbstring'] === true) {
3115
            /** @noinspection PhpComposerExtensionStubsInspection */
3116 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3117
        }
3118
3119
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3120
    }
3121
3122
    /**
3123
     * Checks if a string is 7 bit ASCII.
3124
     *
3125
     * @param string $str <p>The string to check.</p>
3126
     *
3127
     * @return bool
3128
     *              <strong>true</strong> if it is ASCII<br>
3129
     *              <strong>false</strong> otherwise
3130
     */
3131 137
    public static function is_ascii(string $str): bool
3132
    {
3133 137
        if ($str === '') {
3134 10
            return true;
3135
        }
3136
3137 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3138
    }
3139
3140
    /**
3141
     * Returns true if the string is base64 encoded, false otherwise.
3142
     *
3143
     * @param mixed|string $str                <p>The input string.</p>
3144
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3145
     *
3146
     * @return bool whether or not $str is base64 encoded
3147
     */
3148 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3149
    {
3150 16
        if ($emptyStringIsValid === false && $str === '') {
3151 3
            return false;
3152
        }
3153
3154
        /**
3155
         * @psalm-suppress RedundantConditionGivenDocblockType
3156
         */
3157 15
        if (\is_string($str) === false) {
3158 2
            return false;
3159
        }
3160
3161 15
        $base64String = \base64_decode($str, true);
3162
3163 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3164
    }
3165
3166
    /**
3167
     * Check if the input is binary... (is look like a hack).
3168
     *
3169
     * @param mixed $input
3170
     * @param bool  $strict
3171
     *
3172
     * @return bool
3173
     */
3174 39
    public static function is_binary($input, bool $strict = false): bool
3175
    {
3176 39
        $input = (string) $input;
3177 39
        if ($input === '') {
3178 10
            return false;
3179
        }
3180
3181 39
        if (\preg_match('~^[01]+$~', $input)) {
3182 13
            return true;
3183
        }
3184
3185 39
        $ext = self::get_file_type($input);
3186 39
        if ($ext['type'] === 'binary') {
3187 7
            return true;
3188
        }
3189
3190 38
        $testLength = \strlen($input);
3191 38
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3192 38
        if (($testNull / $testLength) > 0.25) {
3193 15
            return true;
3194
        }
3195
3196 34
        if ($strict === true) {
3197 34
            if (self::$SUPPORT['finfo'] === false) {
3198
                throw new \RuntimeException('ext-fileinfo: is not installed');
3199
            }
3200
3201
            /** @noinspection PhpComposerExtensionStubsInspection */
3202 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3203 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3204 15
                return true;
3205
            }
3206
        }
3207
3208 30
        return false;
3209
    }
3210
3211
    /**
3212
     * Check if the file is binary.
3213
     *
3214
     * @param string $file
3215
     *
3216
     * @return bool
3217
     */
3218 6
    public static function is_binary_file($file): bool
3219
    {
3220
        // init
3221 6
        $block = '';
3222
3223 6
        $fp = \fopen($file, 'rb');
3224 6
        if (\is_resource($fp)) {
3225 6
            $block = \fread($fp, 512);
3226 6
            \fclose($fp);
3227
        }
3228
3229 6
        if ($block === '') {
3230 2
            return false;
3231
        }
3232
3233 6
        return self::is_binary($block, true);
3234
    }
3235
3236
    /**
3237
     * Returns true if the string contains only whitespace chars, false otherwise.
3238
     *
3239
     * @param string $str
3240
     *
3241
     * @return bool
3242
     *              Whether or not $str contains only whitespace characters
3243
     */
3244 15
    public static function is_blank(string $str): bool
3245
    {
3246 15
        if (self::$SUPPORT['mbstring'] === true) {
3247
            /** @noinspection PhpComposerExtensionStubsInspection */
3248 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3249
        }
3250
3251
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3252
    }
3253
3254
    /**
3255
     * Checks if the given string is equal to any "Byte Order Mark".
3256
     *
3257
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3258
     *
3259
     * @param string $str <p>The input string.</p>
3260
     *
3261
     * @return bool
3262
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3263
     */
3264 2
    public static function is_bom($str): bool
3265
    {
3266
        /** @noinspection PhpUnusedLocalVariableInspection */
3267 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3268 2
            if ($str === $bomString) {
3269 2
                return true;
3270
            }
3271
        }
3272
3273 2
        return false;
3274
    }
3275
3276
    /**
3277
     * Determine whether the string is considered to be empty.
3278
     *
3279
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3280
     * empty() does not generate a warning if the variable does not exist.
3281
     *
3282
     * @param mixed $str
3283
     *
3284
     * @return bool whether or not $str is empty()
3285
     */
3286
    public static function is_empty($str): bool
3287
    {
3288
        return empty($str);
3289
    }
3290
3291
    /**
3292
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3293
     *
3294
     * @param string $str
3295
     *
3296
     * @return bool
3297
     *              Whether or not $str contains only hexadecimal chars
3298
     */
3299 13
    public static function is_hexadecimal(string $str): bool
3300
    {
3301 13
        if (self::$SUPPORT['mbstring'] === true) {
3302
            /** @noinspection PhpComposerExtensionStubsInspection */
3303 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3304
        }
3305
3306
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3307
    }
3308
3309
    /**
3310
     * Check if the string contains any html-tags <lall>.
3311
     *
3312
     * @param string $str <p>The input string.</p>
3313
     *
3314
     * @return bool
3315
     */
3316 3
    public static function is_html(string $str): bool
3317
    {
3318 3
        if ($str === '') {
3319 3
            return false;
3320
        }
3321
3322
        // init
3323 3
        $matches = [];
3324
3325 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3326
3327 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3328
3329 3
        return \count($matches) !== 0;
3330
    }
3331
3332
    /**
3333
     * Try to check if "$str" is an json-string.
3334
     *
3335
     * @param string $str                              <p>The input string.</p>
3336
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3337
     *
3338
     * @return bool
3339
     */
3340 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3341
    {
3342 42
        if ($str === '') {
3343 4
            return false;
3344
        }
3345
3346 40
        if (self::$SUPPORT['json'] === false) {
3347
            throw new \RuntimeException('ext-json: is not installed');
3348
        }
3349
3350 40
        $json = self::json_decode($str);
3351 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3352 18
            return false;
3353
        }
3354
3355
        if (
3356 24
            $onlyArrayOrObjectResultsAreValid === true
3357
            &&
3358 24
            \is_object($json) === false
3359
            &&
3360 24
            \is_array($json) === false
3361
        ) {
3362 5
            return false;
3363
        }
3364
3365
        /** @noinspection PhpComposerExtensionStubsInspection */
3366 19
        return \json_last_error() === \JSON_ERROR_NONE;
3367
    }
3368
3369
    /**
3370
     * @param string $str
3371
     *
3372
     * @return bool
3373
     */
3374 8
    public static function is_lowercase(string $str): bool
3375
    {
3376 8
        if (self::$SUPPORT['mbstring'] === true) {
3377
            /** @noinspection PhpComposerExtensionStubsInspection */
3378 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3379
        }
3380
3381
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3382
    }
3383
3384
    /**
3385
     * Returns true if the string is serialized, false otherwise.
3386
     *
3387
     * @param string $str
3388
     *
3389
     * @return bool whether or not $str is serialized
3390
     */
3391 7
    public static function is_serialized(string $str): bool
3392
    {
3393 7
        if ($str === '') {
3394 1
            return false;
3395
        }
3396
3397
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3398
        /** @noinspection UnserializeExploitsInspection */
3399 6
        return $str === 'b:0;'
3400
               ||
3401 6
               @\unserialize($str) !== false;
3402
    }
3403
3404
    /**
3405
     * Returns true if the string contains only lower case chars, false
3406
     * otherwise.
3407
     *
3408
     * @param string $str <p>The input string.</p>
3409
     *
3410
     * @return bool
3411
     *              Whether or not $str contains only lower case characters
3412
     */
3413 8
    public static function is_uppercase(string $str): bool
3414
    {
3415 8
        if (self::$SUPPORT['mbstring'] === true) {
3416
            /** @noinspection PhpComposerExtensionStubsInspection */
3417 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3418
        }
3419
3420
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3421
    }
3422
3423
    /**
3424
     * Check if the string is UTF-16.
3425
     *
3426
     * @param mixed $str                   <p>The input string.</p>
3427
     * @param bool  $checkIfStringIsBinary
3428
     *
3429
     * @return false|int
3430
     *                   <strong>false</strong> if is't not UTF-16,<br>
3431
     *                   <strong>1</strong> for UTF-16LE,<br>
3432
     *                   <strong>2</strong> for UTF-16BE
3433
     */
3434 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3435
    {
3436
        // init
3437 22
        $str = (string) $str;
3438 22
        $strChars = [];
3439
3440
        if (
3441 22
            $checkIfStringIsBinary === true
3442
            &&
3443 22
            self::is_binary($str, true) === false
3444
        ) {
3445 2
            return false;
3446
        }
3447
3448 22
        if (self::$SUPPORT['mbstring'] === false) {
3449 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3450
        }
3451
3452 22
        $str = self::remove_bom($str);
3453
3454 22
        $maybeUTF16LE = 0;
3455 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3456 22
        if ($test) {
3457 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3458 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3459 15
            if ($test3 === $test) {
3460 15
                if (\count($strChars) === 0) {
3461 15
                    $strChars = self::count_chars($str, true, false);
3462
                }
3463 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3464 15
                    if (\in_array($test3char, $strChars, true) === true) {
3465 15
                        ++$maybeUTF16LE;
3466
                    }
3467
                }
3468 15
                unset($test3charEmpty);
3469
            }
3470
        }
3471
3472 22
        $maybeUTF16BE = 0;
3473 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3474 22
        if ($test) {
3475 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3476 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3477 15
            if ($test3 === $test) {
3478 15
                if (\count($strChars) === 0) {
3479 7
                    $strChars = self::count_chars($str, true, false);
3480
                }
3481 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3482 15
                    if (\in_array($test3char, $strChars, true) === true) {
3483 15
                        ++$maybeUTF16BE;
3484
                    }
3485
                }
3486 15
                unset($test3charEmpty);
3487
            }
3488
        }
3489
3490 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3491 7
            if ($maybeUTF16LE > $maybeUTF16BE) {
3492 5
                return 1;
3493
            }
3494
3495 6
            return 2;
3496
        }
3497
3498 18
        return false;
3499
    }
3500
3501
    /**
3502
     * Check if the string is UTF-32.
3503
     *
3504
     * @param mixed $str                   <p>The input string.</p>
3505
     * @param bool  $checkIfStringIsBinary
3506
     *
3507
     * @return false|int
3508
     *                   <strong>false</strong> if is't not UTF-32,<br>
3509
     *                   <strong>1</strong> for UTF-32LE,<br>
3510
     *                   <strong>2</strong> for UTF-32BE
3511
     */
3512 20
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3513
    {
3514
        // init
3515 20
        $str = (string) $str;
3516 20
        $strChars = [];
3517
3518
        if (
3519 20
            $checkIfStringIsBinary === true
3520
            &&
3521 20
            self::is_binary($str, true) === false
3522
        ) {
3523 2
            return false;
3524
        }
3525
3526 20
        if (self::$SUPPORT['mbstring'] === false) {
3527 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3528
        }
3529
3530 20
        $str = self::remove_bom($str);
3531
3532 20
        $maybeUTF32LE = 0;
3533 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3534 20
        if ($test) {
3535 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3536 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3537 13
            if ($test3 === $test) {
3538 13
                if (\count($strChars) === 0) {
3539 13
                    $strChars = self::count_chars($str, true, false);
3540
                }
3541 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3542 13
                    if (\in_array($test3char, $strChars, true) === true) {
3543 13
                        ++$maybeUTF32LE;
3544
                    }
3545
                }
3546 13
                unset($test3charEmpty);
3547
            }
3548
        }
3549
3550 20
        $maybeUTF32BE = 0;
3551 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3552 20
        if ($test) {
3553 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3554 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3555 13
            if ($test3 === $test) {
3556 13
                if (\count($strChars) === 0) {
3557 7
                    $strChars = self::count_chars($str, true, false);
3558
                }
3559 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3560 13
                    if (\in_array($test3char, $strChars, true) === true) {
3561 13
                        ++$maybeUTF32BE;
3562
                    }
3563
                }
3564 13
                unset($test3charEmpty);
3565
            }
3566
        }
3567
3568 20
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3569 3
            if ($maybeUTF32LE > $maybeUTF32BE) {
3570 2
                return 1;
3571
            }
3572
3573 3
            return 2;
3574
        }
3575
3576 20
        return false;
3577
    }
3578
3579
    /**
3580
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3581
     *
3582
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3583
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3584
     *
3585
     * @return bool
3586
     */
3587 82
    public static function is_utf8($str, bool $strict = false): bool
3588
    {
3589 82
        if (\is_array($str) === true) {
3590 2
            foreach ($str as &$v) {
3591 2
                if (self::is_utf8($v, $strict) === false) {
3592 2
                    return false;
3593
                }
3594
            }
3595
3596
            return true;
3597
        }
3598
3599 82
        return self::is_utf8_string((string) $str, $strict);
3600
    }
3601
3602
    /**
3603
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3604
     * Decodes a JSON string
3605
     *
3606
     * @see http://php.net/manual/en/function.json-decode.php
3607
     *
3608
     * @param string $json    <p>
3609
     *                        The <i>json</i> string being decoded.
3610
     *                        </p>
3611
     *                        <p>
3612
     *                        This function only works with UTF-8 encoded strings.
3613
     *                        </p>
3614
     *                        <p>PHP implements a superset of
3615
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3616
     *                        only supports these values when they are nested inside an array or an object.
3617
     *                        </p>
3618
     * @param bool   $assoc   [optional] <p>
3619
     *                        When <b>TRUE</b>, returned objects will be converted into
3620
     *                        associative arrays.
3621
     *                        </p>
3622
     * @param int    $depth   [optional] <p>
3623
     *                        User specified recursion depth.
3624
     *                        </p>
3625
     * @param int    $options [optional] <p>
3626
     *                        Bitmask of JSON decode options. Currently only
3627
     *                        <b>JSON_BIGINT_AS_STRING</b>
3628
     *                        is supported (default is to cast large integers as floats)
3629
     *                        </p>
3630
     *
3631
     * @return mixed
3632
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3633
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3634
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3635
     *               is deeper than the recursion limit.
3636
     */
3637 43
    public static function json_decode(
3638
        string $json,
3639
        bool $assoc = false,
3640
        int $depth = 512,
3641
        int $options = 0
3642
    ) {
3643 43
        $json = self::filter($json);
3644
3645 43
        if (self::$SUPPORT['json'] === false) {
3646
            throw new \RuntimeException('ext-json: is not installed');
3647
        }
3648
3649
        /** @noinspection PhpComposerExtensionStubsInspection */
3650 43
        return \json_decode($json, $assoc, $depth, $options);
3651
    }
3652
3653
    /**
3654
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3655
     * Returns the JSON representation of a value.
3656
     *
3657
     * @see http://php.net/manual/en/function.json-encode.php
3658
     *
3659
     * @param mixed $value   <p>
3660
     *                       The <i>value</i> being encoded. Can be any type except
3661
     *                       a resource.
3662
     *                       </p>
3663
     *                       <p>
3664
     *                       All string data must be UTF-8 encoded.
3665
     *                       </p>
3666
     *                       <p>PHP implements a superset of
3667
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3668
     *                       only supports these values when they are nested inside an array or an object.
3669
     *                       </p>
3670
     * @param int   $options [optional] <p>
3671
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3672
     *                       <b>JSON_HEX_TAG</b>,
3673
     *                       <b>JSON_HEX_AMP</b>,
3674
     *                       <b>JSON_HEX_APOS</b>,
3675
     *                       <b>JSON_NUMERIC_CHECK</b>,
3676
     *                       <b>JSON_PRETTY_PRINT</b>,
3677
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3678
     *                       <b>JSON_FORCE_OBJECT</b>,
3679
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3680
     *                       constants is described on
3681
     *                       the JSON constants page.
3682
     *                       </p>
3683
     * @param int   $depth   [optional] <p>
3684
     *                       Set the maximum depth. Must be greater than zero.
3685
     *                       </p>
3686
     *
3687
     * @return false|string
3688
     *                      A JSON encoded <strong>string</strong> on success or<br>
3689
     *                      <strong>FALSE</strong> on failure
3690
     */
3691 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3692
    {
3693 5
        $value = self::filter($value);
3694
3695 5
        if (self::$SUPPORT['json'] === false) {
3696
            throw new \RuntimeException('ext-json: is not installed');
3697
        }
3698
3699
        /** @noinspection PhpComposerExtensionStubsInspection */
3700 5
        return \json_encode($value, $options, $depth);
3701
    }
3702
3703
    /**
3704
     * Checks whether JSON is available on the server.
3705
     *
3706
     * @return bool
3707
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3708
     */
3709
    public static function json_loaded(): bool
3710
    {
3711
        return \function_exists('json_decode');
3712
    }
3713
3714
    /**
3715
     * Makes string's first char lowercase.
3716
     *
3717
     * @param string      $str                   <p>The input string</p>
3718
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3719
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3720
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3721
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3722
     *
3723
     * @return string the resulting string
3724
     */
3725 46
    public static function lcfirst(
3726
        string $str,
3727
        string $encoding = 'UTF-8',
3728
        bool $cleanUtf8 = false,
3729
        string $lang = null,
3730
        bool $tryToKeepStringLength = false
3731
    ): string {
3732 46
        if ($cleanUtf8 === true) {
3733
            $str = self::clean($str);
3734
        }
3735
3736 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3737
3738 46
        if ($encoding === 'UTF-8') {
3739 43
            $strPartTwo = (string) \mb_substr($str, 1);
3740
3741 43
            if ($useMbFunction === true) {
3742 43
                $strPartOne = \mb_strtolower(
3743 43
                    (string) \mb_substr($str, 0, 1)
3744
                );
3745
            } else {
3746
                $strPartOne = self::strtolower(
3747
                    (string) \mb_substr($str, 0, 1),
3748
                    $encoding,
3749
                    false,
3750
                    $lang,
3751 43
                    $tryToKeepStringLength
3752
                );
3753
            }
3754
        } else {
3755 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3756
3757 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3758
3759 3
            $strPartOne = self::strtolower(
3760 3
                (string) self::substr($str, 0, 1, $encoding),
3761 3
                $encoding,
3762 3
                false,
3763 3
                $lang,
3764 3
                $tryToKeepStringLength
3765
            );
3766
        }
3767
3768 46
        return $strPartOne . $strPartTwo;
3769
    }
3770
3771
    /**
3772
     * alias for "UTF8::lcfirst()"
3773
     *
3774
     * @param string      $str
3775
     * @param string      $encoding
3776
     * @param bool        $cleanUtf8
3777
     * @param string|null $lang
3778
     * @param bool        $tryToKeepStringLength
3779
     *
3780
     * @return string
3781
     *
3782
     * @see UTF8::lcfirst()
3783
     */
3784 2
    public static function lcword(
3785
        string $str,
3786
        string $encoding = 'UTF-8',
3787
        bool $cleanUtf8 = false,
3788
        string $lang = null,
3789
        bool $tryToKeepStringLength = false
3790
    ): string {
3791 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3792
    }
3793
3794
    /**
3795
     * Lowercase for all words in the string.
3796
     *
3797
     * @param string      $str                   <p>The input string.</p>
3798
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3799
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3800
     *                                           a new word.</p>
3801
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3802
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3803
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3804
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3805
     *
3806
     * @return string
3807
     */
3808 2
    public static function lcwords(
3809
        string $str,
3810
        array $exceptions = [],
3811
        string $charlist = '',
3812
        string $encoding = 'UTF-8',
3813
        bool $cleanUtf8 = false,
3814
        string $lang = null,
3815
        bool $tryToKeepStringLength = false
3816
    ): string {
3817 2
        if (!$str) {
3818 2
            return '';
3819
        }
3820
3821 2
        $words = self::str_to_words($str, $charlist);
3822 2
        $useExceptions = \count($exceptions) > 0;
3823
3824 2
        foreach ($words as &$word) {
3825 2
            if (!$word) {
3826 2
                continue;
3827
            }
3828
3829
            if (
3830 2
                $useExceptions === false
3831
                ||
3832 2
                !\in_array($word, $exceptions, true)
3833
            ) {
3834 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3835
            }
3836
        }
3837
3838 2
        return \implode('', $words);
3839
    }
3840
3841
    /**
3842
     * alias for "UTF8::lcfirst()"
3843
     *
3844
     * @param string      $str
3845
     * @param string      $encoding
3846
     * @param bool        $cleanUtf8
3847
     * @param string|null $lang
3848
     * @param bool        $tryToKeepStringLength
3849
     *
3850
     * @return string
3851
     *
3852
     * @see UTF8::lcfirst()
3853
     */
3854 5
    public static function lowerCaseFirst(
3855
        string $str,
3856
        string $encoding = 'UTF-8',
3857
        bool $cleanUtf8 = false,
3858
        string $lang = null,
3859
        bool $tryToKeepStringLength = false
3860
    ): string {
3861 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3862
    }
3863
3864
    /**
3865
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3866
     *
3867
     * @param string      $str   <p>The string to be trimmed</p>
3868
     * @param string|null $chars <p>Optional characters to be stripped</p>
3869
     *
3870
     * @return string the string with unwanted characters stripped from the left
3871
     */
3872 22
    public static function ltrim(string $str = '', string $chars = null): string
3873
    {
3874 22
        if ($str === '') {
3875 3
            return '';
3876
        }
3877
3878 21
        if ($chars) {
3879 10
            $chars = \preg_quote($chars, '/');
3880 10
            $pattern = "^[${chars}]+";
3881
        } else {
3882
            $pattern = '^[\\s]+';
3883
        }
3884
3885
        if (self::$SUPPORT['mbstring'] === true) {
3886
            /** @noinspection PhpComposerExtensionStubsInspection */
3887
            return (string) \mb_ereg_replace($pattern, '', $str);
3888
        }
3889
3890
        return self::regex_replace($str, $pattern, '', '', '/');
3891
    }
3892
3893
    /**
3894
     * Returns the UTF-8 character with the maximum code point in the given data.
3895
     *
3896
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3897
     *
3898
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3899
     */
3900
    public static function max($arg)
3901
    {
3902 2
        if (\is_array($arg) === true) {
3903 2
            $arg = \implode('', $arg);
3904
        }
3905
3906 2
        $codepoints = self::codepoints($arg, false);
3907 2
        if (\count($codepoints) === 0) {
3908 2
            return null;
3909
        }
3910
3911 2
        $codepoint_max = \max($codepoints);
3912
3913 2
        return self::chr($codepoint_max);
3914
    }
3915
3916
    /**
3917
     * Calculates and returns the maximum number of bytes taken by any
3918
     * UTF-8 encoded character in the given string.
3919
     *
3920
     * @param string $str <p>The original Unicode string.</p>
3921
     *
3922
     * @return int max byte lengths of the given chars
3923
     */
3924
    public static function max_chr_width(string $str): int
3925
    {
3926 2
        $bytes = self::chr_size_list($str);
3927 2
        if (\count($bytes) > 0) {
3928 2
            return (int) \max($bytes);
3929
        }
3930
3931 2
        return 0;
3932
    }
3933
3934
    /**
3935
     * Checks whether mbstring is available on the server.
3936
     *
3937
     * @return bool
3938
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3939
     */
3940
    public static function mbstring_loaded(): bool
3941
    {
3942 27
        return \extension_loaded('mbstring');
3943
    }
3944
3945
    /**
3946
     * Returns the UTF-8 character with the minimum code point in the given data.
3947
     *
3948
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3949
     *
3950
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3951
     */
3952
    public static function min($arg)
3953
    {
3954 2
        if (\is_array($arg) === true) {
3955 2
            $arg = \implode('', $arg);
3956
        }
3957
3958 2
        $codepoints = self::codepoints($arg, false);
3959 2
        if (\count($codepoints) === 0) {
3960 2
            return null;
3961
        }
3962
3963 2
        $codepoint_min = \min($codepoints);
3964
3965 2
        return self::chr($codepoint_min);
3966
    }
3967
3968
    /**
3969
     * alias for "UTF8::normalize_encoding()"
3970
     *
3971
     * @param mixed $encoding
3972
     * @param mixed $fallback
3973
     *
3974
     * @return mixed
3975
     *
3976
     * @see UTF8::normalize_encoding()
3977
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3978
     */
3979
    public static function normalizeEncoding($encoding, $fallback = '')
3980
    {
3981 2
        return self::normalize_encoding($encoding, $fallback);
3982
    }
3983
3984
    /**
3985
     * Normalize the encoding-"name" input.
3986
     *
3987
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3988
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3989
     *
3990
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3991
     */
3992
    public static function normalize_encoding($encoding, $fallback = '')
3993
    {
3994 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3995
3996
        // init
3997 331
        $encoding = (string) $encoding;
3998
3999 331
        if (!$encoding) {
4000 285
            return $fallback;
4001
        }
4002
4003
        if (
4004 51
            $encoding === 'UTF-8'
4005
            ||
4006 51
            $encoding === 'UTF8'
4007
        ) {
4008 26
            return 'UTF-8';
4009
        }
4010
4011
        if (
4012 43
            $encoding === '8BIT'
4013
            ||
4014 43
            $encoding === 'BINARY'
4015
        ) {
4016
            return 'CP850';
4017
        }
4018
4019
        if (
4020 43
            $encoding === 'HTML'
4021
            ||
4022 43
            $encoding === 'HTML-ENTITIES'
4023
        ) {
4024 2
            return 'HTML-ENTITIES';
4025
        }
4026
4027
        if (
4028 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4029
            ||
4030 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4031
        ) {
4032 1
            return $fallback;
4033
        }
4034
4035 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4036 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4037
        }
4038
4039 6
        if (self::$ENCODINGS === null) {
4040 1
            self::$ENCODINGS = self::getData('encodings');
4041
        }
4042
4043 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4044 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4045
4046 4
            return $encoding;
4047
        }
4048
4049 5
        $encodingOrig = $encoding;
4050 5
        $encoding = \strtoupper($encoding);
4051 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4052
4053
        $equivalences = [
4054 5
            'ISO8859'     => 'ISO-8859-1',
4055
            'ISO88591'    => 'ISO-8859-1',
4056
            'ISO'         => 'ISO-8859-1',
4057
            'LATIN'       => 'ISO-8859-1',
4058
            'LATIN1'      => 'ISO-8859-1', // Western European
4059
            'ISO88592'    => 'ISO-8859-2',
4060
            'LATIN2'      => 'ISO-8859-2', // Central European
4061
            'ISO88593'    => 'ISO-8859-3',
4062
            'LATIN3'      => 'ISO-8859-3', // Southern European
4063
            'ISO88594'    => 'ISO-8859-4',
4064
            'LATIN4'      => 'ISO-8859-4', // Northern European
4065
            'ISO88595'    => 'ISO-8859-5',
4066
            'ISO88596'    => 'ISO-8859-6', // Greek
4067
            'ISO88597'    => 'ISO-8859-7',
4068
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4069
            'ISO88599'    => 'ISO-8859-9',
4070
            'LATIN5'      => 'ISO-8859-9', // Turkish
4071
            'ISO885911'   => 'ISO-8859-11',
4072
            'TIS620'      => 'ISO-8859-11', // Thai
4073
            'ISO885910'   => 'ISO-8859-10',
4074
            'LATIN6'      => 'ISO-8859-10', // Nordic
4075
            'ISO885913'   => 'ISO-8859-13',
4076
            'LATIN7'      => 'ISO-8859-13', // Baltic
4077
            'ISO885914'   => 'ISO-8859-14',
4078
            'LATIN8'      => 'ISO-8859-14', // Celtic
4079
            'ISO885915'   => 'ISO-8859-15',
4080
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4081
            'ISO885916'   => 'ISO-8859-16',
4082
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4083
            'CP1250'      => 'WINDOWS-1250',
4084
            'WIN1250'     => 'WINDOWS-1250',
4085
            'WINDOWS1250' => 'WINDOWS-1250',
4086
            'CP1251'      => 'WINDOWS-1251',
4087
            'WIN1251'     => 'WINDOWS-1251',
4088
            'WINDOWS1251' => 'WINDOWS-1251',
4089
            'CP1252'      => 'WINDOWS-1252',
4090
            'WIN1252'     => 'WINDOWS-1252',
4091
            'WINDOWS1252' => 'WINDOWS-1252',
4092
            'CP1253'      => 'WINDOWS-1253',
4093
            'WIN1253'     => 'WINDOWS-1253',
4094
            'WINDOWS1253' => 'WINDOWS-1253',
4095
            'CP1254'      => 'WINDOWS-1254',
4096
            'WIN1254'     => 'WINDOWS-1254',
4097
            'WINDOWS1254' => 'WINDOWS-1254',
4098
            'CP1255'      => 'WINDOWS-1255',
4099
            'WIN1255'     => 'WINDOWS-1255',
4100
            'WINDOWS1255' => 'WINDOWS-1255',
4101
            'CP1256'      => 'WINDOWS-1256',
4102
            'WIN1256'     => 'WINDOWS-1256',
4103
            'WINDOWS1256' => 'WINDOWS-1256',
4104
            'CP1257'      => 'WINDOWS-1257',
4105
            'WIN1257'     => 'WINDOWS-1257',
4106
            'WINDOWS1257' => 'WINDOWS-1257',
4107
            'CP1258'      => 'WINDOWS-1258',
4108
            'WIN1258'     => 'WINDOWS-1258',
4109
            'WINDOWS1258' => 'WINDOWS-1258',
4110
            'UTF16'       => 'UTF-16',
4111
            'UTF32'       => 'UTF-32',
4112
            'UTF8'        => 'UTF-8',
4113
            'UTF'         => 'UTF-8',
4114
            'UTF7'        => 'UTF-7',
4115
            '8BIT'        => 'CP850',
4116
            'BINARY'      => 'CP850',
4117
        ];
4118
4119 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4120 4
            $encoding = $equivalences[$encodingUpperHelper];
4121
        }
4122
4123 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4124
4125 5
        return $encoding;
4126
    }
4127
4128
    /**
4129
     * Standardize line ending to unix-like.
4130
     *
4131
     * @param string $str
4132
     *
4133
     * @return string
4134
     */
4135
    public static function normalize_line_ending(string $str): string
4136
    {
4137 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4138
    }
4139
4140
    /**
4141
     * Normalize some MS Word special characters.
4142
     *
4143
     * @param string $str <p>The string to be normalized.</p>
4144
     *
4145
     * @return string
4146
     */
4147
    public static function normalize_msword(string $str): string
4148
    {
4149 38
        if ($str === '') {
4150 2
            return '';
4151
        }
4152
4153
        $keys = [
4154 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4155
            "\xc2\xbb", // » (U+00BB) in UTF-8
4156
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4157
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4158
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4159
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4160
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4161
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4162
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4163
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4164
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4165
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4166
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4167
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4168
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4169
        ];
4170
4171
        $values = [
4172 38
            '"', // « (U+00AB) in UTF-8
4173
            '"', // » (U+00BB) in UTF-8
4174
            "'", // ‘ (U+2018) in UTF-8
4175
            "'", // ’ (U+2019) in UTF-8
4176
            "'", // ‚ (U+201A) in UTF-8
4177
            "'", // ‛ (U+201B) in UTF-8
4178
            '"', // “ (U+201C) in UTF-8
4179
            '"', // ” (U+201D) in UTF-8
4180
            '"', // „ (U+201E) in UTF-8
4181
            '"', // ‟ (U+201F) in UTF-8
4182
            "'", // ‹ (U+2039) in UTF-8
4183
            "'", // › (U+203A) in UTF-8
4184
            '-', // – (U+2013) in UTF-8
4185
            '-', // — (U+2014) in UTF-8
4186
            '...', // … (U+2026) in UTF-8
4187
        ];
4188
4189 38
        return \str_replace($keys, $values, $str);
4190
    }
4191
4192
    /**
4193
     * Normalize the whitespace.
4194
     *
4195
     * @param string $str                     <p>The string to be normalized.</p>
4196
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4197
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4198
     *                                        bidirectional text chars.</p>
4199
     *
4200
     * @return string
4201
     */
4202
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4203
    {
4204 88
        if ($str === '') {
4205 9
            return '';
4206
        }
4207
4208 88
        static $WHITESPACE_CACHE = [];
4209 88
        $cacheKey = (int) $keepNonBreakingSpace;
4210
4211 88
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4212 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4213
4214 2
            if ($keepNonBreakingSpace === true) {
4215 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4216
            }
4217
4218 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4219
        }
4220
4221 88
        if ($keepBidiUnicodeControls === false) {
4222 88
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4223
4224 88
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4225 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4226
            }
4227
4228 88
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4229
        }
4230
4231 88
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4232
    }
4233
4234
    /**
4235
     * Calculates Unicode code point of the given UTF-8 encoded character.
4236
     *
4237
     * INFO: opposite to UTF8::chr()
4238
     *
4239
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4240
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4241
     *
4242
     * @return int
4243
     *             Unicode code point of the given character,<br>
4244
     *             0 on invalid UTF-8 byte sequence
4245
     */
4246
    public static function ord($chr, string $encoding = 'UTF-8'): int
4247
    {
4248 30
        static $CHAR_CACHE = [];
4249
4250
        // init
4251 30
        $chr = (string) $chr;
4252
4253 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4254 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4255
        }
4256
4257 30
        $cacheKey = $chr . $encoding;
4258 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4259 30
            return $CHAR_CACHE[$cacheKey];
4260
        }
4261
4262
        // check again, if it's still not UTF-8
4263 12
        if ($encoding !== 'UTF-8') {
4264 3
            $chr = self::encode($encoding, $chr);
4265
        }
4266
4267 12
        if (self::$ORD === null) {
4268
            self::$ORD = self::getData('ord');
4269
        }
4270
4271 12
        if (isset(self::$ORD[$chr])) {
4272 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4273
        }
4274
4275
        //
4276
        // fallback via "IntlChar"
4277
        //
4278
4279 6
        if (self::$SUPPORT['intlChar'] === true) {
4280
            /** @noinspection PhpComposerExtensionStubsInspection */
4281 5
            $code = \IntlChar::ord($chr);
4282 5
            if ($code) {
4283 5
                return $CHAR_CACHE[$cacheKey] = $code;
4284
            }
4285
        }
4286
4287
        //
4288
        // fallback via vanilla php
4289
        //
4290
4291
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4292 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4293
        /** @noinspection OffsetOperationsInspection */
4294 1
        $code = $chr ? $chr[1] : 0;
4295
4296
        /** @noinspection OffsetOperationsInspection */
4297 1
        if ($code >= 0xF0 && isset($chr[4])) {
4298
            /** @noinspection UnnecessaryCastingInspection */
4299
            /** @noinspection OffsetOperationsInspection */
4300
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4301
        }
4302
4303
        /** @noinspection OffsetOperationsInspection */
4304 1
        if ($code >= 0xE0 && isset($chr[3])) {
4305
            /** @noinspection UnnecessaryCastingInspection */
4306
            /** @noinspection OffsetOperationsInspection */
4307 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4308
        }
4309
4310
        /** @noinspection OffsetOperationsInspection */
4311 1
        if ($code >= 0xC0 && isset($chr[2])) {
4312
            /** @noinspection UnnecessaryCastingInspection */
4313
            /** @noinspection OffsetOperationsInspection */
4314 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4315
        }
4316
4317
        return $CHAR_CACHE[$cacheKey] = $code;
4318
    }
4319
4320
    /**
4321
     * Parses the string into an array (into the the second parameter).
4322
     *
4323
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4324
     *          if the second parameter is not set!
4325
     *
4326
     * @see http://php.net/manual/en/function.parse-str.php
4327
     *
4328
     * @param string $str       <p>The input string.</p>
4329
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4330
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4331
     *
4332
     * @return bool
4333
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4334
     */
4335
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4336
    {
4337 2
        if ($cleanUtf8 === true) {
4338 2
            $str = self::clean($str);
4339
        }
4340
4341 2
        if (self::$SUPPORT['mbstring'] === true) {
4342 2
            $return = \mb_parse_str($str, $result);
4343
4344 2
            return $return !== false && $result !== [];
4345
        }
4346
4347
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4348
        \parse_str($str, $result);
4349
4350
        return $result !== [];
4351
    }
4352
4353
    /**
4354
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4355
     *
4356
     * @return bool
4357
     *              <strong>true</strong> if support is available,<br>
4358
     *              <strong>false</strong> otherwise
4359
     */
4360
    public static function pcre_utf8_support(): bool
4361
    {
4362
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4363 102
        return (bool) @\preg_match('//u', '');
4364
    }
4365
4366
    /**
4367
     * Create an array containing a range of UTF-8 characters.
4368
     *
4369
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4370
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4371
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4372
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4373
     * @param float|int $step      [optional] <p>
4374
     *                             If a step value is given, it will be used as the
4375
     *                             increment between elements in the sequence. step
4376
     *                             should be given as a positive number. If not specified,
4377
     *                             step will default to 1.
4378
     *                             </p>
4379
     *
4380
     * @return string[]
4381
     */
4382
    public static function range(
4383
        $var1,
4384
        $var2,
4385
        bool $use_ctype = true,
4386
        string $encoding = 'UTF-8',
4387
        $step = 1
4388
    ): array {
4389 2
        if (!$var1 || !$var2) {
4390 2
            return [];
4391
        }
4392
4393 2
        if ($step !== 1) {
4394 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4395
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4396
            }
4397
4398 1
            if ($step <= 0) {
4399
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4400
            }
4401
        }
4402
4403 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4404
            throw new \RuntimeException('ext-ctype: is not installed');
4405
        }
4406
4407 2
        $is_digit = false;
4408 2
        $is_xdigit = false;
4409
4410
        /** @noinspection PhpComposerExtensionStubsInspection */
4411 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4412 2
            $is_digit = true;
4413 2
            $start = (int) $var1;
4414 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4415
            $is_xdigit = true;
4416
            $start = (int) self::hex_to_int($var1);
4417 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4418 1
            $start = (int) $var1;
4419
        } else {
4420 2
            $start = self::ord($var1);
4421
        }
4422
4423 2
        if (!$start) {
4424
            return [];
4425
        }
4426
4427 2
        if ($is_digit) {
4428 2
            $end = (int) $var2;
4429 2
        } elseif ($is_xdigit) {
4430
            $end = (int) self::hex_to_int($var2);
4431 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4432 1
            $end = (int) $var2;
4433
        } else {
4434 2
            $end = self::ord($var2);
4435
        }
4436
4437 2
        if (!$end) {
4438
            return [];
4439
        }
4440
4441 2
        $array = [];
4442 2
        foreach (\range($start, $end, $step) as $i) {
4443 2
            $array[] = (string) self::chr((int) $i, $encoding);
4444
        }
4445
4446 2
        return $array;
4447
    }
4448
4449
    /**
4450
     * Multi decode html entity & fix urlencoded-win1252-chars.
4451
     *
4452
     * e.g:
4453
     * 'test+test'                     => 'test+test'
4454
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4455
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4456
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4457
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4458
     * 'Düsseldorf'                   => 'Düsseldorf'
4459
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4460
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4461
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4462
     *
4463
     * @param string $str          <p>The input string.</p>
4464
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4465
     *
4466
     * @return string
4467
     */
4468
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4469
    {
4470 6
        if ($str === '') {
4471 4
            return '';
4472
        }
4473
4474
        if (
4475 6
            \strpos($str, '&') === false
4476
            &&
4477 6
            \strpos($str, '%') === false
4478
            &&
4479 6
            \strpos($str, '+') === false
4480
            &&
4481 6
            \strpos($str, '\u') === false
4482
        ) {
4483 4
            return self::fix_simple_utf8($str);
4484
        }
4485
4486 6
        $str = self::urldecode_unicode_helper($str);
4487
4488
        do {
4489 6
            $str_compare = $str;
4490
4491
            /**
4492
             * @psalm-suppress PossiblyInvalidArgument
4493
             */
4494 6
            $str = self::fix_simple_utf8(
4495 6
                \rawurldecode(
4496 6
                    self::html_entity_decode(
4497 6
                        self::to_utf8($str),
4498 6
                        \ENT_QUOTES | \ENT_HTML5
4499
                    )
4500
                )
4501
            );
4502 6
        } while ($multi_decode === true && $str_compare !== $str);
4503
4504 6
        return $str;
4505
    }
4506
4507
    /**
4508
     * Replaces all occurrences of $pattern in $str by $replacement.
4509
     *
4510
     * @param string $str         <p>The input string.</p>
4511
     * @param string $pattern     <p>The regular expression pattern.</p>
4512
     * @param string $replacement <p>The string to replace with.</p>
4513
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4514
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4515
     *
4516
     * @return string
4517
     */
4518
    public static function regex_replace(
4519
        string $str,
4520
        string $pattern,
4521
        string $replacement,
4522
        string $options = '',
4523
        string $delimiter = '/'
4524
    ): string {
4525 18
        if ($options === 'msr') {
4526 9
            $options = 'ms';
4527
        }
4528
4529
        // fallback
4530 18
        if (!$delimiter) {
4531
            $delimiter = '/';
4532
        }
4533
4534 18
        return (string) \preg_replace(
4535 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4536 18
            $replacement,
4537 18
            $str
4538
        );
4539
    }
4540
4541
    /**
4542
     * alias for "UTF8::remove_bom()"
4543
     *
4544
     * @param string $str
4545
     *
4546
     * @return string
4547
     *
4548
     * @see UTF8::remove_bom()
4549
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4550
     */
4551
    public static function removeBOM(string $str): string
4552
    {
4553
        return self::remove_bom($str);
4554
    }
4555
4556
    /**
4557
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4558
     *
4559
     * @param string $str <p>The input string.</p>
4560
     *
4561
     * @return string string without UTF-BOM
4562
     */
4563
    public static function remove_bom(string $str): string
4564
    {
4565 82
        if ($str === '') {
4566 9
            return '';
4567
        }
4568
4569 82
        $strLength = \strlen($str);
4570 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4571 82
            if (\strpos($str, $bomString, 0) === 0) {
4572 11
                $strTmp = \substr($str, $bomByteLength, $strLength);
4573 11
                if ($strTmp === false) {
4574
                    return '';
4575
                }
4576
4577 11
                $strLength -= (int) $bomByteLength;
4578
4579 82
                $str = (string) $strTmp;
4580
            }
4581
        }
4582
4583 82
        return $str;
4584
    }
4585
4586
    /**
4587
     * Removes duplicate occurrences of a string in another string.
4588
     *
4589
     * @param string          $str  <p>The base string.</p>
4590
     * @param string|string[] $what <p>String to search for in the base string.</p>
4591
     *
4592
     * @return string the result string with removed duplicates
4593
     */
4594
    public static function remove_duplicates(string $str, $what = ' '): string
4595
    {
4596 2
        if (\is_string($what) === true) {
4597 2
            $what = [$what];
4598
        }
4599
4600 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4601
            /** @noinspection ForeachSourceInspection */
4602 2
            foreach ($what as $item) {
4603 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4604
            }
4605
        }
4606
4607 2
        return $str;
4608
    }
4609
4610
    /**
4611
     * Remove html via "strip_tags()" from the string.
4612
     *
4613
     * @param string $str
4614
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4615
     *                              not be stripped. Default: null
4616
     *                              </p>
4617
     *
4618
     * @return string
4619
     */
4620
    public static function remove_html(string $str, string $allowableTags = ''): string
4621
    {
4622 6
        return \strip_tags($str, $allowableTags);
4623
    }
4624
4625
    /**
4626
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4627
     *
4628
     * @param string $str
4629
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4630
     *
4631
     * @return string
4632
     */
4633
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4634
    {
4635 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4636
    }
4637
4638
    /**
4639
     * Remove invisible characters from a string.
4640
     *
4641
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4642
     *
4643
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4644
     *
4645
     * @param string $str
4646
     * @param bool   $url_encoded
4647
     * @param string $replacement
4648
     *
4649
     * @return string
4650
     */
4651
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4652
    {
4653
        // init
4654 116
        $non_displayables = [];
4655
4656
        // every control character except newline (dec 10),
4657
        // carriage return (dec 13) and horizontal tab (dec 09)
4658 116
        if ($url_encoded) {
4659 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4660 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4661
        }
4662
4663 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4664
4665
        do {
4666 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4667 116
        } while ($count !== 0);
4668
4669 116
        return $str;
4670
    }
4671
4672
    /**
4673
     * Returns a new string with the prefix $substring removed, if present.
4674
     *
4675
     * @param string $str
4676
     * @param string $substring <p>The prefix to remove.</p>
4677
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4678
     *
4679
     * @return string string without the prefix $substring
4680
     */
4681
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4682
    {
4683 12
        if ($substring && \strpos($str, $substring) === 0) {
4684 6
            if ($encoding === 'UTF-8') {
4685 4
                return (string) \mb_substr(
4686 4
                    $str,
4687 4
                    (int) \mb_strlen($substring)
4688
                );
4689
            }
4690
4691 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4692
4693 2
            return (string) self::substr(
4694 2
                $str,
4695 2
                (int) self::strlen($substring, $encoding),
4696 2
                null,
4697 2
                $encoding
4698
            );
4699
        }
4700
4701 6
        return $str;
4702
    }
4703
4704
    /**
4705
     * Returns a new string with the suffix $substring removed, if present.
4706
     *
4707
     * @param string $str
4708
     * @param string $substring <p>The suffix to remove.</p>
4709
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4710
     *
4711
     * @return string string having a $str without the suffix $substring
4712
     */
4713
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4714
    {
4715 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4716 6
            if ($encoding === 'UTF-8') {
4717 4
                return (string) \mb_substr(
4718 4
                    $str,
4719 4
                    0,
4720 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4721
                );
4722
            }
4723
4724 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4725
4726 2
            return (string) self::substr(
4727 2
                $str,
4728 2
                0,
4729 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4730 2
                $encoding
4731
            );
4732
        }
4733
4734 6
        return $str;
4735
    }
4736
4737
    /**
4738
     * Replaces all occurrences of $search in $str by $replacement.
4739
     *
4740
     * @param string $str           <p>The input string.</p>
4741
     * @param string $search        <p>The needle to search for.</p>
4742
     * @param string $replacement   <p>The string to replace with.</p>
4743
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4744
     *
4745
     * @return string string after the replacements
4746
     */
4747
    public static function replace(
4748
        string $str,
4749
        string $search,
4750
        string $replacement,
4751
        bool $caseSensitive = true
4752
    ): string {
4753 29
        if ($caseSensitive) {
4754 22
            return \str_replace($search, $replacement, $str);
4755
        }
4756
4757 7
        return self::str_ireplace($search, $replacement, $str);
4758
    }
4759
4760
    /**
4761
     * Replaces all occurrences of $search in $str by $replacement.
4762
     *
4763
     * @param string       $str           <p>The input string.</p>
4764
     * @param array        $search        <p>The elements to search for.</p>
4765
     * @param array|string $replacement   <p>The string to replace with.</p>
4766
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4767
     *
4768
     * @return string string after the replacements
4769
     */
4770
    public static function replace_all(
4771
        string $str,
4772
        array $search,
4773
        $replacement,
4774
        bool $caseSensitive = true
4775
    ): string {
4776 30
        if ($caseSensitive) {
4777 23
            return \str_replace($search, $replacement, $str);
4778
        }
4779
4780 7
        return self::str_ireplace($search, $replacement, $str);
4781
    }
4782
4783
    /**
4784
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4785
     *
4786
     * @param string $str                <p>The input string</p>
4787
     * @param string $replacementChar    <p>The replacement character.</p>
4788
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4789
     *
4790
     * @return string
4791
     */
4792
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4793
    {
4794 62
        if ($str === '') {
4795 9
            return '';
4796
        }
4797
4798 62
        if ($processInvalidUtf8 === true) {
4799 62
            $replacementCharHelper = $replacementChar;
4800 62
            if ($replacementChar === '') {
4801 62
                $replacementCharHelper = 'none';
4802
            }
4803
4804 62
            if (self::$SUPPORT['mbstring'] === false) {
4805
                // if there is no native support for "mbstring",
4806
                // then we need to clean the string before ...
4807
                $str = self::clean($str);
4808
            }
4809
4810 62
            $save = \mb_substitute_character();
4811 62
            \mb_substitute_character($replacementCharHelper);
4812
            // the polyfill maybe return false, so cast to string
4813 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4814 62
            \mb_substitute_character($save);
4815
        }
4816
4817 62
        return \str_replace(
4818
            [
4819 62
                "\xEF\xBF\xBD",
4820
                '�',
4821
            ],
4822
            [
4823 62
                $replacementChar,
4824 62
                $replacementChar,
4825
            ],
4826 62
            $str
4827
        );
4828
    }
4829
4830
    /**
4831
     * Strip whitespace or other characters from end of a UTF-8 string.
4832
     *
4833
     * @param string      $str   <p>The string to be trimmed.</p>
4834
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4835
     *
4836
     * @return string the string with unwanted characters stripped from the right
4837
     */
4838
    public static function rtrim(string $str = '', string $chars = null): string
4839
    {
4840 20
        if ($str === '') {
4841 3
            return '';
4842
        }
4843
4844 19
        if ($chars) {
4845 8
            $chars = \preg_quote($chars, '/');
4846 8
            $pattern = "[${chars}]+$";
4847
        } else {
4848 14
            $pattern = '[\\s]+$';
4849
        }
4850
4851 19
        if (self::$SUPPORT['mbstring'] === true) {
4852
            /** @noinspection PhpComposerExtensionStubsInspection */
4853 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4854
        }
4855
4856
        return self::regex_replace($str, $pattern, '', '', '/');
4857
    }
4858
4859
    /**
4860
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4861
     *
4862
     * @psalm-suppress MissingReturnType
4863
     */
4864
    public static function showSupport()
4865
    {
4866 2
        echo '<pre>';
4867 2
        foreach (self::$SUPPORT as $key => &$value) {
4868 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4869
        }
4870 2
        unset($value);
4871 2
        echo '</pre>';
4872 2
    }
4873
4874
    /**
4875
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4876
     *
4877
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4878
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4879
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4880
     *
4881
     * @return string the HTML numbered entity
4882
     */
4883
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4884
    {
4885 2
        if ($char === '') {
4886 2
            return '';
4887
        }
4888
4889
        if (
4890 2
            $keepAsciiChars === true
4891
            &&
4892 2
            self::is_ascii($char) === true
4893
        ) {
4894 2
            return $char;
4895
        }
4896
4897 2
        return '&#' . self::ord($char, $encoding) . ';';
4898
    }
4899
4900
    /**
4901
     * @param string $str
4902
     * @param int    $tabLength
4903
     *
4904
     * @return string
4905
     */
4906
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4907
    {
4908 5
        if ($tabLength === 4) {
4909 3
            $tab = '    ';
4910 2
        } elseif ($tabLength === 2) {
4911 1
            $tab = '  ';
4912
        } else {
4913 1
            $tab = \str_repeat(' ', $tabLength);
4914
        }
4915
4916 5
        return \str_replace($tab, "\t", $str);
4917
    }
4918
4919
    /**
4920
     * alias for "UTF8::str_split()"
4921
     *
4922
     * @param string|string[] $str
4923
     * @param int             $length
4924
     * @param bool            $cleanUtf8
4925
     *
4926
     * @return string[]
4927
     *
4928
     * @see UTF8::str_split()
4929
     */
4930
    public static function split(
4931
        $str,
4932
        int $length = 1,
4933
        bool $cleanUtf8 = false
4934
    ): array {
4935 9
        return self::str_split($str, $length, $cleanUtf8);
4936
    }
4937
4938
    /**
4939
     * alias for "UTF8::str_starts_with()"
4940
     *
4941
     * @param string $haystack
4942
     * @param string $needle
4943
     *
4944
     * @return bool
4945
     *
4946
     * @see UTF8::str_starts_with()
4947
     */
4948
    public static function str_begins(string $haystack, string $needle): bool
4949
    {
4950
        return self::str_starts_with($haystack, $needle);
4951
    }
4952
4953
    /**
4954
     * Returns a camelCase version of the string. Trims surrounding spaces,
4955
     * capitalizes letters following digits, spaces, dashes and underscores,
4956
     * and removes spaces, dashes, as well as underscores.
4957
     *
4958
     * @param string      $str                   <p>The input string.</p>
4959
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
4960
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4961
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4962
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4963
     *
4964
     * @return string
4965
     */
4966
    public static function str_camelize(
4967
        string $str,
4968
        string $encoding = 'UTF-8',
4969
        bool $cleanUtf8 = false,
4970
        string $lang = null,
4971
        bool $tryToKeepStringLength = false
4972
    ): string {
4973 32
        if ($cleanUtf8 === true) {
4974
            $str = self::clean($str);
4975
        }
4976
4977 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4978 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4979
        }
4980
4981 32
        $str = self::lcfirst(
4982 32
            \trim($str),
4983 32
            $encoding,
4984 32
            false,
4985 32
            $lang,
4986 32
            $tryToKeepStringLength
4987
        );
4988 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4989
4990 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
4991
4992 32
        $str = (string) \preg_replace_callback(
4993 32
            '/[-_\\s]+(.)?/u',
4994
            /**
4995
             * @param array $match
4996
             *
4997
             * @return string
4998
             */
4999
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5000 27
                if (isset($match[1])) {
5001 27
                    if ($useMbFunction === true) {
5002 27
                        if ($encoding === 'UTF-8') {
5003 27
                            return \mb_strtoupper($match[1]);
5004
                        }
5005
5006
                        return \mb_strtoupper($match[1], $encoding);
5007
                    }
5008
5009
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5010
                }
5011
5012 1
                return '';
5013 32
            },
5014 32
            $str
5015
        );
5016
5017 32
        return (string) \preg_replace_callback(
5018 32
            '/[\\p{N}]+(.)?/u',
5019
            /**
5020
             * @param array $match
5021
             *
5022
             * @return string
5023
             */
5024
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5025 6
                if ($useMbFunction === true) {
5026 6
                    if ($encoding === 'UTF-8') {
5027 6
                        return \mb_strtoupper($match[0]);
5028
                    }
5029
5030
                    return \mb_strtoupper($match[0], $encoding);
5031
                }
5032
5033
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5034 32
            },
5035 32
            $str
5036
        );
5037
    }
5038
5039
    /**
5040
     * Returns the string with the first letter of each word capitalized,
5041
     * except for when the word is a name which shouldn't be capitalized.
5042
     *
5043
     * @param string $str
5044
     *
5045
     * @return string string with $str capitalized
5046
     */
5047
    public static function str_capitalize_name(string $str): string
5048
    {
5049 1
        return self::str_capitalize_name_helper(
5050 1
            self::str_capitalize_name_helper(
5051 1
                self::collapse_whitespace($str),
5052 1
                ' '
5053
            ),
5054 1
            '-'
5055
        );
5056
    }
5057
5058
    /**
5059
     * Returns true if the string contains $needle, false otherwise. By default
5060
     * the comparison is case-sensitive, but can be made insensitive by setting
5061
     * $caseSensitive to false.
5062
     *
5063
     * @param string $haystack      <p>The input string.</p>
5064
     * @param string $needle        <p>Substring to look for.</p>
5065
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5066
     *
5067
     * @return bool whether or not $haystack contains $needle
5068
     */
5069
    public static function str_contains(
5070
        string $haystack,
5071
        string $needle,
5072
        bool $caseSensitive = true
5073
    ): bool {
5074 21
        if ($caseSensitive) {
5075 11
            return \strpos($haystack, $needle) !== false;
5076
        }
5077
5078 10
        return \mb_stripos($haystack, $needle) !== false;
5079
    }
5080
5081
    /**
5082
     * Returns true if the string contains all $needles, false otherwise. By
5083
     * default the comparison is case-sensitive, but can be made insensitive by
5084
     * setting $caseSensitive to false.
5085
     *
5086
     * @param string $haystack      <p>The input string.</p>
5087
     * @param array  $needles       <p>SubStrings to look for.</p>
5088
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5089
     *
5090
     * @return bool whether or not $haystack contains $needle
5091
     */
5092
    public static function str_contains_all(
5093
        string $haystack,
5094
        array $needles,
5095
        bool $caseSensitive = true
5096
    ): bool {
5097 44
        if ($haystack === '' || $needles === []) {
5098 1
            return false;
5099
        }
5100
5101
        /** @noinspection LoopWhichDoesNotLoopInspection */
5102 43
        foreach ($needles as &$needle) {
5103 43
            if (!$needle) {
5104 1
                return false;
5105
            }
5106
5107 42
            if ($caseSensitive) {
5108 22
                return \strpos($haystack, $needle) !== false;
5109
            }
5110
5111 20
            return \mb_stripos($haystack, $needle) !== false;
5112
        }
5113
5114
        return true;
5115
    }
5116
5117
    /**
5118
     * Returns true if the string contains any $needles, false otherwise. By
5119
     * default the comparison is case-sensitive, but can be made insensitive by
5120
     * setting $caseSensitive to false.
5121
     *
5122
     * @param string $haystack      <p>The input string.</p>
5123
     * @param array  $needles       <p>SubStrings to look for.</p>
5124
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5125
     *
5126
     * @return bool
5127
     *              Whether or not $str contains $needle
5128
     */
5129
    public static function str_contains_any(
5130
        string $haystack,
5131
        array $needles,
5132
        bool $caseSensitive = true
5133
    ): bool {
5134 46
        if ($haystack === '' || $needles === []) {
5135 1
            return false;
5136
        }
5137
5138
        /** @noinspection LoopWhichDoesNotLoopInspection */
5139 45
        foreach ($needles as &$needle) {
5140 45
            if (!$needle) {
5141
                continue;
5142
            }
5143
5144 45
            if ($caseSensitive) {
5145 25
                if (\strpos($haystack, $needle) !== false) {
5146 14
                    return true;
5147
                }
5148
5149 13
                continue;
5150
            }
5151
5152 20
            if (\mb_stripos($haystack, $needle) !== false) {
5153 20
                return true;
5154
            }
5155
        }
5156
5157 19
        return false;
5158
    }
5159
5160
    /**
5161
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5162
     * inserted before uppercase characters (with the exception of the first
5163
     * character of the string), and in place of spaces as well as underscores.
5164
     *
5165
     * @param string $str      <p>The input string.</p>
5166
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5167
     *
5168
     * @return string
5169
     */
5170
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5171
    {
5172 19
        return self::str_delimit($str, '-', $encoding);
5173
    }
5174
5175
    /**
5176
     * Returns a lowercase and trimmed string separated by the given delimiter.
5177
     * Delimiters are inserted before uppercase characters (with the exception
5178
     * of the first character of the string), and in place of spaces, dashes,
5179
     * and underscores. Alpha delimiters are not converted to lowercase.
5180
     *
5181
     * @param string      $str                   <p>The input string.</p>
5182
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5183
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5184
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5185
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5186
     *                                           tr</p>
5187
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5188
     *                                           ß</p>
5189
     *
5190
     * @return string
5191
     */
5192
    public static function str_delimit(
5193
        string $str,
5194
        string $delimiter,
5195
        string $encoding = 'UTF-8',
5196
        bool $cleanUtf8 = false,
5197
        string $lang = null,
5198
        bool $tryToKeepStringLength = false
5199
    ): string {
5200 49
        if (self::$SUPPORT['mbstring'] === true) {
5201
            /** @noinspection PhpComposerExtensionStubsInspection */
5202 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5203
5204 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5205 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5206 22
                $str = \mb_strtolower($str);
5207
            } else {
5208 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5209
            }
5210
5211
            /** @noinspection PhpComposerExtensionStubsInspection */
5212 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5213
        }
5214
5215
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5216
5217
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5218
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5219
            $str = \mb_strtolower($str);
5220
        } else {
5221
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5222
        }
5223
5224
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5225
    }
5226
5227
    /**
5228
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5229
     *
5230
     * @param string $str <p>The input string.</p>
5231
     *
5232
     * @return false|string
5233
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5234
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5235
     */
5236
    public static function str_detect_encoding($str)
5237
    {
5238
        // init
5239 30
        $str = (string) $str;
5240
5241
        //
5242
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5243
        //
5244
5245 30
        if (self::is_binary($str, true) === true) {
5246 11
            $isUtf32 = self::is_utf32($str, false);
5247 11
            if ($isUtf32 === 1) {
5248
                return 'UTF-32LE';
5249
            }
5250 11
            if ($isUtf32 === 2) {
5251 1
                return 'UTF-32BE';
5252
            }
5253
5254 11
            $isUtf16 = self::is_utf16($str, false);
5255 11
            if ($isUtf16 === 1) {
5256 3
                return 'UTF-16LE';
5257
            }
5258 11
            if ($isUtf16 === 2) {
5259 2
                return 'UTF-16BE';
5260
            }
5261
5262
            // is binary but not "UTF-16" or "UTF-32"
5263 9
            return false;
5264
        }
5265
5266
        //
5267
        // 2.) simple check for ASCII chars
5268
        //
5269
5270 26
        if (self::is_ascii($str) === true) {
5271 10
            return 'ASCII';
5272
        }
5273
5274
        //
5275
        // 3.) simple check for UTF-8 chars
5276
        //
5277
5278 26
        if (self::is_utf8_string($str) === true) {
5279 19
            return 'UTF-8';
5280
        }
5281
5282
        //
5283
        // 4.) check via "mb_detect_encoding()"
5284
        //
5285
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5286
5287
        $detectOrder = [
5288 15
            'ISO-8859-1',
5289
            'ISO-8859-2',
5290
            'ISO-8859-3',
5291
            'ISO-8859-4',
5292
            'ISO-8859-5',
5293
            'ISO-8859-6',
5294
            'ISO-8859-7',
5295
            'ISO-8859-8',
5296
            'ISO-8859-9',
5297
            'ISO-8859-10',
5298
            'ISO-8859-13',
5299
            'ISO-8859-14',
5300
            'ISO-8859-15',
5301
            'ISO-8859-16',
5302
            'WINDOWS-1251',
5303
            'WINDOWS-1252',
5304
            'WINDOWS-1254',
5305
            'CP932',
5306
            'CP936',
5307
            'CP950',
5308
            'CP866',
5309
            'CP850',
5310
            'CP51932',
5311
            'CP50220',
5312
            'CP50221',
5313
            'CP50222',
5314
            'ISO-2022-JP',
5315
            'ISO-2022-KR',
5316
            'JIS',
5317
            'JIS-ms',
5318
            'EUC-CN',
5319
            'EUC-JP',
5320
        ];
5321
5322 15
        if (self::$SUPPORT['mbstring'] === true) {
5323
            // info: do not use the symfony polyfill here
5324 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5325 15
            if ($encoding) {
5326 15
                return $encoding;
5327
            }
5328
        }
5329
5330
        //
5331
        // 5.) check via "iconv()"
5332
        //
5333
5334
        if (self::$ENCODINGS === null) {
5335
            self::$ENCODINGS = self::getData('encodings');
5336
        }
5337
5338
        foreach (self::$ENCODINGS as $encodingTmp) {
5339
            // INFO: //IGNORE but still throw notice
5340
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5341
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5342
                return $encodingTmp;
5343
            }
5344
        }
5345
5346
        return false;
5347
    }
5348
5349
    /**
5350
     * alias for "UTF8::str_ends_with()"
5351
     *
5352
     * @param string $haystack
5353
     * @param string $needle
5354
     *
5355
     * @return bool
5356
     *
5357
     * @see UTF8::str_ends_with()
5358
     */
5359
    public static function str_ends(string $haystack, string $needle): bool
5360
    {
5361
        return self::str_ends_with($haystack, $needle);
5362
    }
5363
5364
    /**
5365
     * Check if the string ends with the given substring.
5366
     *
5367
     * @param string $haystack <p>The string to search in.</p>
5368
     * @param string $needle   <p>The substring to search for.</p>
5369
     *
5370
     * @return bool
5371
     */
5372
    public static function str_ends_with(string $haystack, string $needle): bool
5373
    {
5374 9
        if ($needle === '') {
5375 2
            return true;
5376
        }
5377
5378 9
        if ($haystack === '') {
5379
            return false;
5380
        }
5381
5382 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5383
    }
5384
5385
    /**
5386
     * Returns true if the string ends with any of $substrings, false otherwise.
5387
     *
5388
     * - case-sensitive
5389
     *
5390
     * @param string   $str        <p>The input string.</p>
5391
     * @param string[] $substrings <p>Substrings to look for.</p>
5392
     *
5393
     * @return bool whether or not $str ends with $substring
5394
     */
5395
    public static function str_ends_with_any(string $str, array $substrings): bool
5396
    {
5397 7
        if ($substrings === []) {
5398
            return false;
5399
        }
5400
5401 7
        foreach ($substrings as &$substring) {
5402 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5403 7
                return true;
5404
            }
5405
        }
5406
5407 6
        return false;
5408
    }
5409
5410
    /**
5411
     * Ensures that the string begins with $substring. If it doesn't, it's
5412
     * prepended.
5413
     *
5414
     * @param string $str       <p>The input string.</p>
5415
     * @param string $substring <p>The substring to add if not present.</p>
5416
     *
5417
     * @return string
5418
     */
5419
    public static function str_ensure_left(string $str, string $substring): string
5420
    {
5421
        if (
5422 10
            $substring !== ''
5423
            &&
5424 10
            \strpos($str, $substring) === 0
5425
        ) {
5426 6
            return $str;
5427
        }
5428
5429 4
        return $substring . $str;
5430
    }
5431
5432
    /**
5433
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5434
     *
5435
     * @param string $str       <p>The input string.</p>
5436
     * @param string $substring <p>The substring to add if not present.</p>
5437
     *
5438
     * @return string
5439
     */
5440
    public static function str_ensure_right(string $str, string $substring): string
5441
    {
5442
        if (
5443 10
            $str === ''
5444
            ||
5445 10
            $substring === ''
5446
            ||
5447 10
            \substr($str, -\strlen($substring)) !== $substring
5448
        ) {
5449 4
            $str .= $substring;
5450
        }
5451
5452 10
        return $str;
5453
    }
5454
5455
    /**
5456
     * Capitalizes the first word of the string, replaces underscores with
5457
     * spaces, and strips '_id'.
5458
     *
5459
     * @param string $str
5460
     *
5461
     * @return string
5462
     */
5463
    public static function str_humanize($str): string
5464
    {
5465 3
        $str = \str_replace(
5466
            [
5467 3
                '_id',
5468
                '_',
5469
            ],
5470
            [
5471 3
                '',
5472
                ' ',
5473
            ],
5474 3
            $str
5475
        );
5476
5477 3
        return self::ucfirst(\trim($str));
5478
    }
5479
5480
    /**
5481
     * alias for "UTF8::str_istarts_with()"
5482
     *
5483
     * @param string $haystack
5484
     * @param string $needle
5485
     *
5486
     * @return bool
5487
     *
5488
     * @see UTF8::str_istarts_with()
5489
     */
5490
    public static function str_ibegins(string $haystack, string $needle): bool
5491
    {
5492
        return self::str_istarts_with($haystack, $needle);
5493
    }
5494
5495
    /**
5496
     * alias for "UTF8::str_iends_with()"
5497
     *
5498
     * @param string $haystack
5499
     * @param string $needle
5500
     *
5501
     * @return bool
5502
     *
5503
     * @see UTF8::str_iends_with()
5504
     */
5505
    public static function str_iends(string $haystack, string $needle): bool
5506
    {
5507
        return self::str_iends_with($haystack, $needle);
5508
    }
5509
5510
    /**
5511
     * Check if the string ends with the given substring, case insensitive.
5512
     *
5513
     * @param string $haystack <p>The string to search in.</p>
5514
     * @param string $needle   <p>The substring to search for.</p>
5515
     *
5516
     * @return bool
5517
     */
5518
    public static function str_iends_with(string $haystack, string $needle): bool
5519
    {
5520 12
        if ($needle === '') {
5521 2
            return true;
5522
        }
5523
5524 12
        if ($haystack === '') {
5525
            return false;
5526
        }
5527
5528 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5529
    }
5530
5531
    /**
5532
     * Returns true if the string ends with any of $substrings, false otherwise.
5533
     *
5534
     * - case-insensitive
5535
     *
5536
     * @param string   $str        <p>The input string.</p>
5537
     * @param string[] $substrings <p>Substrings to look for.</p>
5538
     *
5539
     * @return bool whether or not $str ends with $substring
5540
     */
5541
    public static function str_iends_with_any(string $str, array $substrings): bool
5542
    {
5543 4
        if ($substrings === []) {
5544
            return false;
5545
        }
5546
5547 4
        foreach ($substrings as &$substring) {
5548 4
            if (self::str_iends_with($str, $substring)) {
5549 4
                return true;
5550
            }
5551
        }
5552
5553
        return false;
5554
    }
5555
5556
    /**
5557
     * Returns the index of the first occurrence of $needle in the string,
5558
     * and false if not found. Accepts an optional offset from which to begin
5559
     * the search.
5560
     *
5561
     * @param string $str      <p>The input string.</p>
5562
     * @param string $needle   <p>Substring to look for.</p>
5563
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5564
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5565
     *
5566
     * @return false|int
5567
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5568
     */
5569
    public static function str_iindex_first(
5570
        string $str,
5571
        string $needle,
5572
        int $offset = 0,
5573
        string $encoding = 'UTF-8'
5574
    ) {
5575 2
        return self::stripos(
5576 2
            $str,
5577 2
            $needle,
5578 2
            $offset,
5579 2
            $encoding
5580
        );
5581
    }
5582
5583
    /**
5584
     * Returns the index of the last occurrence of $needle in the string,
5585
     * and false if not found. Accepts an optional offset from which to begin
5586
     * the search. Offsets may be negative to count from the last character
5587
     * in the string.
5588
     *
5589
     * @param string $str      <p>The input string.</p>
5590
     * @param string $needle   <p>Substring to look for.</p>
5591
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5592
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5593
     *
5594
     * @return false|int
5595
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5596
     */
5597
    public static function str_iindex_last(
5598
        string $str,
5599
        string $needle,
5600
        int $offset = 0,
5601
        string $encoding = 'UTF-8'
5602
    ) {
5603
        return self::strripos(
5604
            $str,
5605
            $needle,
5606
            $offset,
5607
            $encoding
5608
        );
5609
    }
5610
5611
    /**
5612
     * Returns the index of the first occurrence of $needle in the string,
5613
     * and false if not found. Accepts an optional offset from which to begin
5614
     * the search.
5615
     *
5616
     * @param string $str      <p>The input string.</p>
5617
     * @param string $needle   <p>Substring to look for.</p>
5618
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5619
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5620
     *
5621
     * @return false|int
5622
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5623
     */
5624
    public static function str_index_first(
5625
        string $str,
5626
        string $needle,
5627
        int $offset = 0,
5628
        string $encoding = 'UTF-8'
5629
    ) {
5630 10
        return self::strpos(
5631 10
            $str,
5632 10
            $needle,
5633 10
            $offset,
5634 10
            $encoding
5635
        );
5636
    }
5637
5638
    /**
5639
     * Returns the index of the last occurrence of $needle in the string,
5640
     * and false if not found. Accepts an optional offset from which to begin
5641
     * the search. Offsets may be negative to count from the last character
5642
     * in the string.
5643
     *
5644
     * @param string $str      <p>The input string.</p>
5645
     * @param string $needle   <p>Substring to look for.</p>
5646
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5647
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5648
     *
5649
     * @return false|int
5650
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5651
     */
5652
    public static function str_index_last(
5653
        string $str,
5654
        string $needle,
5655
        int $offset = 0,
5656
        string $encoding = 'UTF-8'
5657
    ) {
5658 10
        return self::strrpos(
5659 10
            $str,
5660 10
            $needle,
5661 10
            $offset,
5662 10
            $encoding
5663
        );
5664
    }
5665
5666
    /**
5667
     * Inserts $substring into the string at the $index provided.
5668
     *
5669
     * @param string $str       <p>The input string.</p>
5670
     * @param string $substring <p>String to be inserted.</p>
5671
     * @param int    $index     <p>The index at which to insert the substring.</p>
5672
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5673
     *
5674
     * @return string
5675
     */
5676
    public static function str_insert(
5677
        string $str,
5678
        string $substring,
5679
        int $index,
5680
        string $encoding = 'UTF-8'
5681
    ): string {
5682 8
        if ($encoding === 'UTF-8') {
5683 4
            $len = (int) \mb_strlen($str);
5684 4
            if ($index > $len) {
5685
                return $str;
5686
            }
5687
5688
            /** @noinspection UnnecessaryCastingInspection */
5689 4
            return (string) \mb_substr($str, 0, $index) .
5690 4
                   $substring .
5691 4
                   (string) \mb_substr($str, $index, $len);
5692
        }
5693
5694 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5695
5696 4
        $len = (int) self::strlen($str, $encoding);
5697 4
        if ($index > $len) {
5698 1
            return $str;
5699
        }
5700
5701 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5702 3
               $substring .
5703 3
               ((string) self::substr($str, $index, $len, $encoding));
5704
    }
5705
5706
    /**
5707
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5708
     *
5709
     * @see http://php.net/manual/en/function.str-ireplace.php
5710
     *
5711
     * @param mixed $search  <p>
5712
     *                       Every replacement with search array is
5713
     *                       performed on the result of previous replacement.
5714
     *                       </p>
5715
     * @param mixed $replace <p>
5716
     *                       </p>
5717
     * @param mixed $subject <p>
5718
     *                       If subject is an array, then the search and
5719
     *                       replace is performed with every entry of
5720
     *                       subject, and the return value is an array as
5721
     *                       well.
5722
     *                       </p>
5723
     * @param int   $count   [optional] <p>
5724
     *                       The number of matched and replaced needles will
5725
     *                       be returned in count which is passed by
5726
     *                       reference.
5727
     *                       </p>
5728
     *
5729
     * @return mixed a string or an array of replacements
5730
     */
5731
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5732
    {
5733 29
        $search = (array) $search;
5734
5735
        /** @noinspection AlterInForeachInspection */
5736 29
        foreach ($search as &$s) {
5737 29
            $s = (string) $s;
5738 29
            if ($s === '') {
5739 6
                $s = '/^(?<=.)$/';
5740
            } else {
5741 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5742
            }
5743
        }
5744
5745 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5746 29
        $count = $replace; // used as reference parameter
5747
5748 29
        return $subject;
5749
    }
5750
5751
    /**
5752
     * Replaces $search from the beginning of string with $replacement.
5753
     *
5754
     * @param string $str         <p>The input string.</p>
5755
     * @param string $search      <p>The string to search for.</p>
5756
     * @param string $replacement <p>The replacement.</p>
5757
     *
5758
     * @return string string after the replacements
5759
     */
5760
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5761
    {
5762 17
        if ($str === '') {
5763 4
            if ($replacement === '') {
5764 2
                return '';
5765
            }
5766
5767 2
            if ($search === '') {
5768 2
                return $replacement;
5769
            }
5770
        }
5771
5772 13
        if ($search === '') {
5773 2
            return $str . $replacement;
5774
        }
5775
5776 11
        if (\stripos($str, $search) === 0) {
5777 10
            return $replacement . \substr($str, \strlen($search));
5778
        }
5779
5780 1
        return $str;
5781
    }
5782
5783
    /**
5784
     * Replaces $search from the ending of string with $replacement.
5785
     *
5786
     * @param string $str         <p>The input string.</p>
5787
     * @param string $search      <p>The string to search for.</p>
5788
     * @param string $replacement <p>The replacement.</p>
5789
     *
5790
     * @return string string after the replacements
5791
     */
5792
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5793
    {
5794 17
        if ($str === '') {
5795 4
            if ($replacement === '') {
5796 2
                return '';
5797
            }
5798
5799 2
            if ($search === '') {
5800 2
                return $replacement;
5801
            }
5802
        }
5803
5804 13
        if ($search === '') {
5805 2
            return $str . $replacement;
5806
        }
5807
5808 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5809 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5810
        }
5811
5812 11
        return $str;
5813
    }
5814
5815
    /**
5816
     * Check if the string starts with the given substring, case insensitive.
5817
     *
5818
     * @param string $haystack <p>The string to search in.</p>
5819
     * @param string $needle   <p>The substring to search for.</p>
5820
     *
5821
     * @return bool
5822
     */
5823
    public static function str_istarts_with(string $haystack, string $needle): bool
5824
    {
5825 12
        if ($needle === '') {
5826 2
            return true;
5827
        }
5828
5829 12
        if ($haystack === '') {
5830
            return false;
5831
        }
5832
5833 12
        return self::stripos($haystack, $needle) === 0;
5834
    }
5835
5836
    /**
5837
     * Returns true if the string begins with any of $substrings, false otherwise.
5838
     *
5839
     * - case-insensitive
5840
     *
5841
     * @param string $str        <p>The input string.</p>
5842
     * @param array  $substrings <p>Substrings to look for.</p>
5843
     *
5844
     * @return bool whether or not $str starts with $substring
5845
     */
5846
    public static function str_istarts_with_any(string $str, array $substrings): bool
5847
    {
5848 4
        if ($str === '') {
5849
            return false;
5850
        }
5851
5852 4
        if ($substrings === []) {
5853
            return false;
5854
        }
5855
5856 4
        foreach ($substrings as &$substring) {
5857 4
            if (self::str_istarts_with($str, $substring)) {
5858 4
                return true;
5859
            }
5860
        }
5861
5862
        return false;
5863
    }
5864
5865
    /**
5866
     * Gets the substring after the first occurrence of a separator.
5867
     *
5868
     * @param string $str       <p>The input string.</p>
5869
     * @param string $separator <p>The string separator.</p>
5870
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5871
     *
5872
     * @return string
5873
     */
5874
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5875
    {
5876 1
        if ($separator === '' || $str === '') {
5877 1
            return '';
5878
        }
5879
5880 1
        $offset = self::str_iindex_first($str, $separator);
5881 1
        if ($offset === false) {
5882 1
            return '';
5883
        }
5884
5885 1
        if ($encoding === 'UTF-8') {
5886 1
            return (string) \mb_substr(
5887 1
                $str,
5888 1
                $offset + (int) \mb_strlen($separator)
5889
            );
5890
        }
5891
5892
        return (string) self::substr(
5893
            $str,
5894
            $offset + (int) self::strlen($separator, $encoding),
5895
            null,
5896
            $encoding
5897
        );
5898
    }
5899
5900
    /**
5901
     * Gets the substring after the last occurrence of a separator.
5902
     *
5903
     * @param string $str       <p>The input string.</p>
5904
     * @param string $separator <p>The string separator.</p>
5905
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5906
     *
5907
     * @return string
5908
     */
5909
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5910
    {
5911 1
        if ($separator === '' || $str === '') {
5912 1
            return '';
5913
        }
5914
5915 1
        $offset = self::strripos($str, $separator);
5916 1
        if ($offset === false) {
5917 1
            return '';
5918
        }
5919
5920 1
        if ($encoding === 'UTF-8') {
5921 1
            return (string) \mb_substr(
5922 1
                $str,
5923 1
                $offset + (int) self::strlen($separator)
5924
            );
5925
        }
5926
5927
        return (string) self::substr(
5928
            $str,
5929
            $offset + (int) self::strlen($separator, $encoding),
5930
            null,
5931
            $encoding
5932
        );
5933
    }
5934
5935
    /**
5936
     * Gets the substring before the first occurrence of a separator.
5937
     *
5938
     * @param string $str       <p>The input string.</p>
5939
     * @param string $separator <p>The string separator.</p>
5940
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5941
     *
5942
     * @return string
5943
     */
5944
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5945
    {
5946 1
        if ($separator === '' || $str === '') {
5947 1
            return '';
5948
        }
5949
5950 1
        $offset = self::str_iindex_first($str, $separator);
5951 1
        if ($offset === false) {
5952 1
            return '';
5953
        }
5954
5955 1
        if ($encoding === 'UTF-8') {
5956 1
            return (string) \mb_substr($str, 0, $offset);
5957
        }
5958
5959
        return (string) self::substr($str, 0, $offset, $encoding);
5960
    }
5961
5962
    /**
5963
     * Gets the substring before the last occurrence of a separator.
5964
     *
5965
     * @param string $str       <p>The input string.</p>
5966
     * @param string $separator <p>The string separator.</p>
5967
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5968
     *
5969
     * @return string
5970
     */
5971
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5972
    {
5973 1
        if ($separator === '' || $str === '') {
5974 1
            return '';
5975
        }
5976
5977 1
        if ($encoding === 'UTF-8') {
5978 1
            $offset = \mb_strripos($str, $separator);
5979 1
            if ($offset === false) {
5980 1
                return '';
5981
            }
5982
5983 1
            return (string) \mb_substr($str, 0, $offset);
5984
        }
5985
5986
        $offset = self::strripos($str, $separator, 0, $encoding);
5987
        if ($offset === false) {
5988
            return '';
5989
        }
5990
5991
        return (string) self::substr($str, 0, $offset, $encoding);
5992
    }
5993
5994
    /**
5995
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5996
     *
5997
     * @param string $str          <p>The input string.</p>
5998
     * @param string $needle       <p>The string to look for.</p>
5999
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6000
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6001
     *
6002
     * @return string
6003
     */
6004
    public static function str_isubstr_first(
6005
        string $str,
6006
        string $needle,
6007
        bool $beforeNeedle = false,
6008
        string $encoding = 'UTF-8'
6009
    ): string {
6010
        if (
6011 2
            $needle === ''
6012
            ||
6013 2
            $str === ''
6014
        ) {
6015 2
            return '';
6016
        }
6017
6018 2
        $part = self::stristr(
6019 2
            $str,
6020 2
            $needle,
6021 2
            $beforeNeedle,
6022 2
            $encoding
6023
        );
6024 2
        if ($part === false) {
6025 2
            return '';
6026
        }
6027
6028 2
        return $part;
6029
    }
6030
6031
    /**
6032
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6033
     *
6034
     * @param string $str          <p>The input string.</p>
6035
     * @param string $needle       <p>The string to look for.</p>
6036
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6037
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6038
     *
6039
     * @return string
6040
     */
6041
    public static function str_isubstr_last(
6042
        string $str,
6043
        string $needle,
6044
        bool $beforeNeedle = false,
6045
        string $encoding = 'UTF-8'
6046
    ): string {
6047
        if (
6048 1
            $needle === ''
6049
            ||
6050 1
            $str === ''
6051
        ) {
6052 1
            return '';
6053
        }
6054
6055 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6056 1
        if ($part === false) {
6057 1
            return '';
6058
        }
6059
6060 1
        return $part;
6061
    }
6062
6063
    /**
6064
     * Returns the last $n characters of the string.
6065
     *
6066
     * @param string $str      <p>The input string.</p>
6067
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6068
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6069
     *
6070
     * @return string
6071
     */
6072
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6073
    {
6074 12
        if ($str === '' || $n <= 0) {
6075 4
            return '';
6076
        }
6077
6078 8
        if ($encoding === 'UTF-8') {
6079 4
            return (string) \mb_substr($str, -$n);
6080
        }
6081
6082 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6083
6084 4
        return (string) self::substr($str, -$n, null, $encoding);
6085
    }
6086
6087
    /**
6088
     * Limit the number of characters in a string.
6089
     *
6090
     * @param string $str      <p>The input string.</p>
6091
     * @param int    $length   [optional] <p>Default: 100</p>
6092
     * @param string $strAddOn [optional] <p>Default: …</p>
6093
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6094
     *
6095
     * @return string
6096
     */
6097
    public static function str_limit(
6098
        string $str,
6099
        int $length = 100,
6100
        string $strAddOn = '…',
6101
        string $encoding = 'UTF-8'
6102
    ): string {
6103 2
        if ($str === '' || $length <= 0) {
6104 2
            return '';
6105
        }
6106
6107 2
        if ($encoding === 'UTF-8') {
6108 2
            if ((int) \mb_strlen($str) <= $length) {
6109 2
                return $str;
6110
            }
6111
6112
            /** @noinspection UnnecessaryCastingInspection */
6113 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6114
        }
6115
6116
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6117
6118
        if ((int) self::strlen($str, $encoding) <= $length) {
6119
            return $str;
6120
        }
6121
6122
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6123
    }
6124
6125
    /**
6126
     * Limit the number of characters in a string, but also after the next word.
6127
     *
6128
     * @param string $str      <p>The input string.</p>
6129
     * @param int    $length   [optional] <p>Default: 100</p>
6130
     * @param string $strAddOn [optional] <p>Default: …</p>
6131
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6132
     *
6133
     * @return string
6134
     */
6135
    public static function str_limit_after_word(
6136
        string $str,
6137
        int $length = 100,
6138
        string $strAddOn = '…',
6139
        string $encoding = 'UTF-8'
6140
    ): string {
6141 6
        if ($str === '' || $length <= 0) {
6142 2
            return '';
6143
        }
6144
6145 6
        if ($encoding === 'UTF-8') {
6146
            /** @noinspection UnnecessaryCastingInspection */
6147 2
            if ((int) \mb_strlen($str) <= $length) {
6148 2
                return $str;
6149
            }
6150
6151 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6152 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6153
            }
6154
6155 2
            $str = \mb_substr($str, 0, $length);
6156
6157 2
            $array = \explode(' ', $str);
6158 2
            \array_pop($array);
6159 2
            $new_str = \implode(' ', $array);
6160
6161 2
            if ($new_str === '') {
6162 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6163
            }
6164
        } else {
6165 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6166
                return $str;
6167
            }
6168
6169 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6170 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6171
            }
6172
6173 1
            $str = self::substr($str, 0, $length, $encoding);
6174 1
            if ($str === false) {
6175
                return '' . $strAddOn;
6176
            }
6177
6178 1
            $array = \explode(' ', $str);
6179 1
            \array_pop($array);
6180 1
            $new_str = \implode(' ', $array);
6181
6182 1
            if ($new_str === '') {
6183
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6184
            }
6185
        }
6186
6187 3
        return $new_str . $strAddOn;
6188
    }
6189
6190
    /**
6191
     * Returns the longest common prefix between the string and $otherStr.
6192
     *
6193
     * @param string $str      <p>The input sting.</p>
6194
     * @param string $otherStr <p>Second string for comparison.</p>
6195
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6196
     *
6197
     * @return string
6198
     */
6199
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6200
    {
6201
        // init
6202 10
        $longestCommonPrefix = '';
6203
6204 10
        if ($encoding === 'UTF-8') {
6205 5
            $maxLength = (int) \min(
6206 5
                \mb_strlen($str),
6207 5
                \mb_strlen($otherStr)
6208
            );
6209
6210 5
            for ($i = 0; $i < $maxLength; ++$i) {
6211 4
                $char = \mb_substr($str, $i, 1);
6212
6213
                if (
6214 4
                    $char !== false
6215
                    &&
6216 4
                    $char === \mb_substr($otherStr, $i, 1)
6217
                ) {
6218 3
                    $longestCommonPrefix .= $char;
6219
                } else {
6220 3
                    break;
6221
                }
6222
            }
6223
        } else {
6224 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6225
6226 5
            $maxLength = (int) \min(
6227 5
                self::strlen($str, $encoding),
6228 5
                self::strlen($otherStr, $encoding)
6229
            );
6230
6231 5
            for ($i = 0; $i < $maxLength; ++$i) {
6232 4
                $char = self::substr($str, $i, 1, $encoding);
6233
6234
                if (
6235 4
                    $char !== false
6236
                    &&
6237 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6238
                ) {
6239 3
                    $longestCommonPrefix .= $char;
6240
                } else {
6241 3
                    break;
6242
                }
6243
            }
6244
        }
6245
6246 10
        return $longestCommonPrefix;
6247
    }
6248
6249
    /**
6250
     * Returns the longest common substring between the string and $otherStr.
6251
     * In the case of ties, it returns that which occurs first.
6252
     *
6253
     * @param string $str
6254
     * @param string $otherStr <p>Second string for comparison.</p>
6255
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6256
     *
6257
     * @return string string with its $str being the longest common substring
6258
     */
6259
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6260
    {
6261 11
        if ($str === '' || $otherStr === '') {
6262 2
            return '';
6263
        }
6264
6265
        // Uses dynamic programming to solve
6266
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6267
6268 9
        if ($encoding === 'UTF-8') {
6269 4
            $strLength = (int) \mb_strlen($str);
6270 4
            $otherLength = (int) \mb_strlen($otherStr);
6271
        } else {
6272 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6273
6274 5
            $strLength = (int) self::strlen($str, $encoding);
6275 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6276
        }
6277
6278
        // Return if either string is empty
6279 9
        if ($strLength === 0 || $otherLength === 0) {
6280
            return '';
6281
        }
6282
6283 9
        $len = 0;
6284 9
        $end = 0;
6285 9
        $table = \array_fill(
6286 9
            0,
6287 9
            $strLength + 1,
6288 9
            \array_fill(0, $otherLength + 1, 0)
6289
        );
6290
6291 9
        if ($encoding === 'UTF-8') {
6292 9
            for ($i = 1; $i <= $strLength; ++$i) {
6293 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6294 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6295 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6296
6297 9
                    if ($strChar === $otherChar) {
6298 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6299 8
                        if ($table[$i][$j] > $len) {
6300 8
                            $len = $table[$i][$j];
6301 8
                            $end = $i;
6302
                        }
6303
                    } else {
6304 9
                        $table[$i][$j] = 0;
6305
                    }
6306
                }
6307
            }
6308
        } else {
6309
            for ($i = 1; $i <= $strLength; ++$i) {
6310
                for ($j = 1; $j <= $otherLength; ++$j) {
6311
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6312
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6313
6314
                    if ($strChar === $otherChar) {
6315
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6316
                        if ($table[$i][$j] > $len) {
6317
                            $len = $table[$i][$j];
6318
                            $end = $i;
6319
                        }
6320
                    } else {
6321
                        $table[$i][$j] = 0;
6322
                    }
6323
                }
6324
            }
6325
        }
6326
6327 9
        if ($encoding === 'UTF-8') {
6328 9
            return (string) \mb_substr($str, $end - $len, $len);
6329
        }
6330
6331
        return (string) self::substr($str, $end - $len, $len, $encoding);
6332
    }
6333
6334
    /**
6335
     * Returns the longest common suffix between the string and $otherStr.
6336
     *
6337
     * @param string $str
6338
     * @param string $otherStr <p>Second string for comparison.</p>
6339
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6340
     *
6341
     * @return string
6342
     */
6343
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6344
    {
6345 10
        if ($str === '' || $otherStr === '') {
6346 2
            return '';
6347
        }
6348
6349 8
        if ($encoding === 'UTF-8') {
6350 4
            $maxLength = (int) \min(
6351 4
                \mb_strlen($str, $encoding),
6352 4
                \mb_strlen($otherStr, $encoding)
6353
            );
6354
6355 4
            $longestCommonSuffix = '';
6356 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6357 4
                $char = \mb_substr($str, -$i, 1);
6358
6359
                if (
6360 4
                    $char !== false
6361
                    &&
6362 4
                    $char === \mb_substr($otherStr, -$i, 1)
6363
                ) {
6364 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6365
                } else {
6366 3
                    break;
6367
                }
6368
            }
6369
        } else {
6370 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6371
6372 4
            $maxLength = (int) \min(
6373 4
                self::strlen($str, $encoding),
6374 4
                self::strlen($otherStr, $encoding)
6375
            );
6376
6377 4
            $longestCommonSuffix = '';
6378 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6379 4
                $char = self::substr($str, -$i, 1, $encoding);
6380
6381
                if (
6382 4
                    $char !== false
6383
                    &&
6384 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6385
                ) {
6386 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6387
                } else {
6388 3
                    break;
6389
                }
6390
            }
6391
        }
6392
6393 8
        return $longestCommonSuffix;
6394
    }
6395
6396
    /**
6397
     * Returns true if $str matches the supplied pattern, false otherwise.
6398
     *
6399
     * @param string $str     <p>The input string.</p>
6400
     * @param string $pattern <p>Regex pattern to match against.</p>
6401
     *
6402
     * @return bool whether or not $str matches the pattern
6403
     */
6404
    public static function str_matches_pattern(string $str, string $pattern): bool
6405
    {
6406
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6407
    }
6408
6409
    /**
6410
     * Returns whether or not a character exists at an index. Offsets may be
6411
     * negative to count from the last character in the string. Implements
6412
     * part of the ArrayAccess interface.
6413
     *
6414
     * @param string $str      <p>The input string.</p>
6415
     * @param int    $offset   <p>The index to check.</p>
6416
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6417
     *
6418
     * @return bool whether or not the index exists
6419
     */
6420
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6421
    {
6422
        // init
6423 6
        $length = (int) self::strlen($str, $encoding);
6424
6425 6
        if ($offset >= 0) {
6426 3
            return $length > $offset;
6427
        }
6428
6429 3
        return $length >= \abs($offset);
6430
    }
6431
6432
    /**
6433
     * Returns the character at the given index. Offsets may be negative to
6434
     * count from the last character in the string. Implements part of the
6435
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6436
     * does not exist.
6437
     *
6438
     * @param string $str      <p>The input string.</p>
6439
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6440
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6441
     *
6442
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6443
     *
6444
     * @return string the character at the specified index
6445
     */
6446
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6447
    {
6448
        // init
6449 2
        $length = (int) self::strlen($str);
6450
6451
        if (
6452 2
            ($index >= 0 && $length <= $index)
6453
            ||
6454 2
            $length < \abs($index)
6455
        ) {
6456 1
            throw new \OutOfBoundsException('No character exists at the index');
6457
        }
6458
6459 1
        return self::char_at($str, $index, $encoding);
6460
    }
6461
6462
    /**
6463
     * Pad a UTF-8 string to given length with another string.
6464
     *
6465
     * @param string     $str        <p>The input string.</p>
6466
     * @param int        $pad_length <p>The length of return string.</p>
6467
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6468
     * @param int|string $pad_type   [optional] <p>
6469
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6470
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6471
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6472
     *                               </p>
6473
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6474
     *
6475
     * @return string returns the padded string
6476
     */
6477
    public static function str_pad(
6478
        string $str,
6479
        int $pad_length,
6480
        string $pad_string = ' ',
6481
        $pad_type = \STR_PAD_RIGHT,
6482
        string $encoding = 'UTF-8'
6483
    ): string {
6484 41
        if ($pad_length === 0 || $pad_string === '') {
6485 1
            return $str;
6486
        }
6487
6488 41
        if ($pad_type !== (int) $pad_type) {
6489 13
            if ($pad_type === 'left') {
6490 3
                $pad_type = \STR_PAD_LEFT;
6491 10
            } elseif ($pad_type === 'right') {
6492 6
                $pad_type = \STR_PAD_RIGHT;
6493 4
            } elseif ($pad_type === 'both') {
6494 3
                $pad_type = \STR_PAD_BOTH;
6495
            } else {
6496 1
                throw new \InvalidArgumentException(
6497 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6498
                );
6499
            }
6500
        }
6501
6502 40
        if ($encoding === 'UTF-8') {
6503 25
            $str_length = (int) \mb_strlen($str);
6504
6505 25
            if ($pad_length >= $str_length) {
6506
                switch ($pad_type) {
6507 25
                    case \STR_PAD_LEFT:
6508 8
                        $ps_length = (int) \mb_strlen($pad_string);
6509
6510 8
                        $diff = ($pad_length - $str_length);
6511
6512 8
                        $pre = (string) \mb_substr(
6513 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6514 8
                            0,
6515 8
                            $diff
6516
                        );
6517 8
                        $post = '';
6518
6519 8
                        break;
6520
6521 20
                    case \STR_PAD_BOTH:
6522 14
                        $diff = ($pad_length - $str_length);
6523
6524 14
                        $ps_length_left = (int) \floor($diff / 2);
6525
6526 14
                        $ps_length_right = (int) \ceil($diff / 2);
6527
6528 14
                        $pre = (string) \mb_substr(
6529 14
                            \str_repeat($pad_string, $ps_length_left),
6530 14
                            0,
6531 14
                            $ps_length_left
6532
                        );
6533 14
                        $post = (string) \mb_substr(
6534 14
                            \str_repeat($pad_string, $ps_length_right),
6535 14
                            0,
6536 14
                            $ps_length_right
6537
                        );
6538
6539 14
                        break;
6540
6541 9
                    case \STR_PAD_RIGHT:
6542
                    default:
6543 9
                        $ps_length = (int) \mb_strlen($pad_string);
6544
6545 9
                        $diff = ($pad_length - $str_length);
6546
6547 9
                        $post = (string) \mb_substr(
6548 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6549 9
                            0,
6550 9
                            $diff
6551
                        );
6552 9
                        $pre = '';
6553
                }
6554
6555 25
                return $pre . $str . $post;
6556
            }
6557
6558 3
            return $str;
6559
        }
6560
6561 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6562
6563 15
        $str_length = (int) self::strlen($str, $encoding);
6564
6565 15
        if ($pad_length >= $str_length) {
6566
            switch ($pad_type) {
6567 14
                case \STR_PAD_LEFT:
6568 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6569
6570 5
                    $diff = ($pad_length - $str_length);
6571
6572 5
                    $pre = (string) self::substr(
6573 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6574 5
                        0,
6575 5
                        $diff,
6576 5
                        $encoding
6577
                    );
6578 5
                    $post = '';
6579
6580 5
                    break;
6581
6582 9
                case \STR_PAD_BOTH:
6583 3
                    $diff = ($pad_length - $str_length);
6584
6585 3
                    $ps_length_left = (int) \floor($diff / 2);
6586
6587 3
                    $ps_length_right = (int) \ceil($diff / 2);
6588
6589 3
                    $pre = (string) self::substr(
6590 3
                        \str_repeat($pad_string, $ps_length_left),
6591 3
                        0,
6592 3
                        $ps_length_left,
6593 3
                        $encoding
6594
                    );
6595 3
                    $post = (string) self::substr(
6596 3
                        \str_repeat($pad_string, $ps_length_right),
6597 3
                        0,
6598 3
                        $ps_length_right,
6599 3
                        $encoding
6600
                    );
6601
6602 3
                    break;
6603
6604 6
                case \STR_PAD_RIGHT:
6605
                default:
6606 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6607
6608 6
                    $diff = ($pad_length - $str_length);
6609
6610 6
                    $post = (string) self::substr(
6611 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6612 6
                        0,
6613 6
                        $diff,
6614 6
                        $encoding
6615
                    );
6616 6
                    $pre = '';
6617
            }
6618
6619 14
            return $pre . $str . $post;
6620
        }
6621
6622 1
        return $str;
6623
    }
6624
6625
    /**
6626
     * Returns a new string of a given length such that both sides of the
6627
     * string are padded. Alias for pad() with a $padType of 'both'.
6628
     *
6629
     * @param string $str
6630
     * @param int    $length   <p>Desired string length after padding.</p>
6631
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6632
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6633
     *
6634
     * @return string string with padding applied
6635
     */
6636
    public static function str_pad_both(
6637
        string $str,
6638
        int $length,
6639
        string $padStr = ' ',
6640
        string $encoding = 'UTF-8'
6641
    ): string {
6642 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6643
    }
6644
6645
    /**
6646
     * Returns a new string of a given length such that the beginning of the
6647
     * string is padded. Alias for pad() with a $padType of 'left'.
6648
     *
6649
     * @param string $str
6650
     * @param int    $length   <p>Desired string length after padding.</p>
6651
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6652
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6653
     *
6654
     * @return string string with left padding
6655
     */
6656
    public static function str_pad_left(
6657
        string $str,
6658
        int $length,
6659
        string $padStr = ' ',
6660
        string $encoding = 'UTF-8'
6661
    ): string {
6662 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6663
    }
6664
6665
    /**
6666
     * Returns a new string of a given length such that the end of the string
6667
     * is padded. Alias for pad() with a $padType of 'right'.
6668
     *
6669
     * @param string $str
6670
     * @param int    $length   <p>Desired string length after padding.</p>
6671
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6672
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6673
     *
6674
     * @return string string with right padding
6675
     */
6676
    public static function str_pad_right(
6677
        string $str,
6678
        int $length,
6679
        string $padStr = ' ',
6680
        string $encoding = 'UTF-8'
6681
    ): string {
6682 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6683
    }
6684
6685
    /**
6686
     * Repeat a string.
6687
     *
6688
     * @param string $str        <p>
6689
     *                           The string to be repeated.
6690
     *                           </p>
6691
     * @param int    $multiplier <p>
6692
     *                           Number of time the input string should be
6693
     *                           repeated.
6694
     *                           </p>
6695
     *                           <p>
6696
     *                           multiplier has to be greater than or equal to 0.
6697
     *                           If the multiplier is set to 0, the function
6698
     *                           will return an empty string.
6699
     *                           </p>
6700
     *
6701
     * @return string the repeated string
6702
     */
6703
    public static function str_repeat(string $str, int $multiplier): string
6704
    {
6705 9
        $str = self::filter($str);
6706
6707 9
        return \str_repeat($str, $multiplier);
6708
    }
6709
6710
    /**
6711
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6712
     *
6713
     * Replace all occurrences of the search string with the replacement string
6714
     *
6715
     * @see http://php.net/manual/en/function.str-replace.php
6716
     *
6717
     * @param mixed $search  <p>
6718
     *                       The value being searched for, otherwise known as the needle.
6719
     *                       An array may be used to designate multiple needles.
6720
     *                       </p>
6721
     * @param mixed $replace <p>
6722
     *                       The replacement value that replaces found search
6723
     *                       values. An array may be used to designate multiple replacements.
6724
     *                       </p>
6725
     * @param mixed $subject <p>
6726
     *                       The string or array being searched and replaced on,
6727
     *                       otherwise known as the haystack.
6728
     *                       </p>
6729
     *                       <p>
6730
     *                       If subject is an array, then the search and
6731
     *                       replace is performed with every entry of
6732
     *                       subject, and the return value is an array as
6733
     *                       well.
6734
     *                       </p>
6735
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6736
     *
6737
     * @return mixed this function returns a string or an array with the replaced values
6738
     */
6739
    public static function str_replace(
6740
        $search,
6741
        $replace,
6742
        $subject,
6743
        int &$count = null
6744
    ) {
6745
        /**
6746
         * @psalm-suppress PossiblyNullArgument
6747
         */
6748 12
        return \str_replace(
6749 12
            $search,
6750 12
            $replace,
6751 12
            $subject,
6752 12
            $count
6753
        );
6754
    }
6755
6756
    /**
6757
     * Replaces $search from the beginning of string with $replacement.
6758
     *
6759
     * @param string $str         <p>The input string.</p>
6760
     * @param string $search      <p>The string to search for.</p>
6761
     * @param string $replacement <p>The replacement.</p>
6762
     *
6763
     * @return string string after the replacements
6764
     */
6765
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6766
    {
6767 17
        if ($str === '') {
6768 4
            if ($replacement === '') {
6769 2
                return '';
6770
            }
6771
6772 2
            if ($search === '') {
6773 2
                return $replacement;
6774
            }
6775
        }
6776
6777 13
        if ($search === '') {
6778 2
            return $str . $replacement;
6779
        }
6780
6781 11
        if (\strpos($str, $search) === 0) {
6782 9
            return $replacement . \substr($str, \strlen($search));
6783
        }
6784
6785 2
        return $str;
6786
    }
6787
6788
    /**
6789
     * Replaces $search from the ending of string with $replacement.
6790
     *
6791
     * @param string $str         <p>The input string.</p>
6792
     * @param string $search      <p>The string to search for.</p>
6793
     * @param string $replacement <p>The replacement.</p>
6794
     *
6795
     * @return string string after the replacements
6796
     */
6797
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6798
    {
6799 17
        if ($str === '') {
6800 4
            if ($replacement === '') {
6801 2
                return '';
6802
            }
6803
6804 2
            if ($search === '') {
6805 2
                return $replacement;
6806
            }
6807
        }
6808
6809 13
        if ($search === '') {
6810 2
            return $str . $replacement;
6811
        }
6812
6813 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6814 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6815
        }
6816
6817 11
        return $str;
6818
    }
6819
6820
    /**
6821
     * Replace the first "$search"-term with the "$replace"-term.
6822
     *
6823
     * @param string $search
6824
     * @param string $replace
6825
     * @param string $subject
6826
     *
6827
     * @return string
6828
     *
6829
     * @psalm-suppress InvalidReturnType
6830
     */
6831
    public static function str_replace_first(string $search, string $replace, string $subject): string
6832
    {
6833 2
        $pos = self::strpos($subject, $search);
6834
6835 2
        if ($pos !== false) {
6836
            /**
6837
             * @psalm-suppress InvalidReturnStatement
6838
             */
6839 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6840 2
                $subject,
6841 2
                $replace,
6842 2
                $pos,
6843 2
                (int) self::strlen($search)
6844
            );
6845
        }
6846
6847 2
        return $subject;
6848
    }
6849
6850
    /**
6851
     * Replace the last "$search"-term with the "$replace"-term.
6852
     *
6853
     * @param string $search
6854
     * @param string $replace
6855
     * @param string $subject
6856
     *
6857
     * @return string
6858
     *
6859
     * @psalm-suppress InvalidReturnType
6860
     */
6861
    public static function str_replace_last(
6862
        string $search,
6863
        string $replace,
6864
        string $subject
6865
    ): string {
6866 2
        $pos = self::strrpos($subject, $search);
6867 2
        if ($pos !== false) {
6868
            /**
6869
             * @psalm-suppress InvalidReturnStatement
6870
             */
6871 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6872 2
                $subject,
6873 2
                $replace,
6874 2
                $pos,
6875 2
                (int) self::strlen($search)
6876
            );
6877
        }
6878
6879 2
        return $subject;
6880
    }
6881
6882
    /**
6883
     * Shuffles all the characters in the string.
6884
     *
6885
     * PS: uses random algorithm which is weak for cryptography purposes
6886
     *
6887
     * @param string $str      <p>The input string</p>
6888
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6889
     *
6890
     * @return string the shuffled string
6891
     */
6892
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6893
    {
6894 5
        if ($encoding === 'UTF-8') {
6895 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6896
            /** @noinspection NonSecureShuffleUsageInspection */
6897 5
            \shuffle($indexes);
6898
6899
            // init
6900 5
            $shuffledStr = '';
6901
6902 5
            foreach ($indexes as &$i) {
6903 5
                $tmpSubStr = \mb_substr($str, $i, 1);
6904 5
                if ($tmpSubStr !== false) {
6905 5
                    $shuffledStr .= $tmpSubStr;
6906
                }
6907
            }
6908
        } else {
6909
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6910
6911
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6912
            /** @noinspection NonSecureShuffleUsageInspection */
6913
            \shuffle($indexes);
6914
6915
            // init
6916
            $shuffledStr = '';
6917
6918
            foreach ($indexes as &$i) {
6919
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
6920
                if ($tmpSubStr !== false) {
6921
                    $shuffledStr .= $tmpSubStr;
6922
                }
6923
            }
6924
        }
6925
6926 5
        return $shuffledStr;
6927
    }
6928
6929
    /**
6930
     * Returns the substring beginning at $start, and up to, but not including
6931
     * the index specified by $end. If $end is omitted, the function extracts
6932
     * the remaining string. If $end is negative, it is computed from the end
6933
     * of the string.
6934
     *
6935
     * @param string $str
6936
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6937
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6938
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6939
     *
6940
     * @return false|string
6941
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6942
     *                      characters long, <b>FALSE</b> will be returned.
6943
     */
6944
    public static function str_slice(
6945
        string $str,
6946
        int $start,
6947
        int $end = null,
6948
        string $encoding = 'UTF-8'
6949
    ) {
6950 18
        if ($encoding === 'UTF-8') {
6951 7
            if ($end === null) {
6952 1
                $length = (int) \mb_strlen($str);
6953 6
            } elseif ($end >= 0 && $end <= $start) {
6954 2
                return '';
6955 4
            } elseif ($end < 0) {
6956 1
                $length = (int) \mb_strlen($str) + $end - $start;
6957
            } else {
6958 3
                $length = $end - $start;
6959
            }
6960
6961 5
            return \mb_substr($str, $start, $length);
6962
        }
6963
6964 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6965
6966 11
        if ($end === null) {
6967 5
            $length = (int) self::strlen($str, $encoding);
6968 6
        } elseif ($end >= 0 && $end <= $start) {
6969 2
            return '';
6970 4
        } elseif ($end < 0) {
6971 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
6972
        } else {
6973 3
            $length = $end - $start;
6974
        }
6975
6976 9
        return self::substr($str, $start, $length, $encoding);
6977
    }
6978
6979
    /**
6980
     * Convert a string to e.g.: "snake_case"
6981
     *
6982
     * @param string $str
6983
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6984
     *
6985
     * @return string string in snake_case
6986
     */
6987
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6988
    {
6989 22
        if ($str === '') {
6990
            return '';
6991
        }
6992
6993 22
        $str = \str_replace(
6994 22
            '-',
6995 22
            '_',
6996 22
            self::normalize_whitespace($str)
6997
        );
6998
6999 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7000 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7001
        }
7002
7003 22
        $str = (string) \preg_replace_callback(
7004 22
            '/([\\p{N}|\\p{Lu}])/u',
7005
            /**
7006
             * @param string[] $matches
7007
             *
7008
             * @return string
7009
             */
7010
            static function (array $matches) use ($encoding): string {
7011 9
                $match = $matches[1];
7012 9
                $matchInt = (int) $match;
7013
7014 9
                if ((string) $matchInt === $match) {
7015 4
                    return '_' . $match . '_';
7016
                }
7017
7018 5
                if ($encoding === 'UTF-8') {
7019 5
                    return '_' . \mb_strtolower($match);
7020
                }
7021
7022
                return '_' . self::strtolower($match, $encoding);
7023 22
            },
7024 22
            $str
7025
        );
7026
7027 22
        $str = (string) \preg_replace(
7028
            [
7029 22
                '/\\s+/u',           // convert spaces to "_"
7030
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7031
                '/_+/',                 // remove double "_"
7032
            ],
7033
            [
7034 22
                '_',
7035
                '',
7036
                '_',
7037
            ],
7038 22
            $str
7039
        );
7040
7041 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7042
    }
7043
7044
    /**
7045
     * Sort all characters according to code points.
7046
     *
7047
     * @param string $str    <p>A UTF-8 string.</p>
7048
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7049
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7050
     *
7051
     * @return string string of sorted characters
7052
     */
7053
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7054
    {
7055 2
        $array = self::codepoints($str);
7056
7057 2
        if ($unique) {
7058 2
            $array = \array_flip(\array_flip($array));
7059
        }
7060
7061 2
        if ($desc) {
7062 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7062
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7063
        } else {
7064 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7064
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7065
        }
7066
7067 2
        return self::string($array);
7068
    }
7069
7070
    /**
7071
     * Convert a string to an array of Unicode characters.
7072
     *
7073
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7074
     * @param int                       $length             [optional] <p>Max character length of each array
7075
     *                                                      element.</p>
7076
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7077
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7078
     *                                                      "mb_substr"</p>
7079
     *
7080
     * @return array
7081
     *               <p>An array containing chunks of the input.</p>
7082
     */
7083
    public static function str_split(
7084
        $str,
7085
        int $length = 1,
7086
        bool $cleanUtf8 = false,
7087
        bool $tryToUseMbFunction = true
7088
    ): array {
7089 89
        if ($length <= 0) {
7090 3
            return [];
7091
        }
7092
7093 88
        if (\is_array($str) === true) {
7094 2
            foreach ($str as $k => &$v) {
7095 2
                $v = self::str_split(
7096 2
                    $v,
7097 2
                    $length,
7098 2
                    $cleanUtf8,
7099 2
                    $tryToUseMbFunction
7100
                );
7101
            }
7102
7103 2
            return $str;
7104
        }
7105
7106
        // init
7107 88
        $str = (string) $str;
7108
7109 88
        if ($str === '') {
7110 13
            return [];
7111
        }
7112
7113 85
        if ($cleanUtf8 === true) {
7114 19
            $str = self::clean($str);
7115
        }
7116
7117
        if (
7118 85
            $tryToUseMbFunction === true
7119
            &&
7120 85
            self::$SUPPORT['mbstring'] === true
7121
        ) {
7122 81
            $iMax = \mb_strlen($str);
7123 81
            if ($iMax <= 127) {
7124 75
                $ret = [];
7125 75
                for ($i = 0; $i < $iMax; ++$i) {
7126 75
                    $ret[] = \mb_substr($str, $i, 1);
7127
                }
7128
            } else {
7129 16
                $retArray = [];
7130 16
                \preg_match_all('/./us', $str, $retArray);
7131 81
                $ret = $retArray[0] ?? [];
7132
            }
7133 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7134 17
            $retArray = [];
7135 17
            \preg_match_all('/./us', $str, $retArray);
7136 17
            $ret = $retArray[0] ?? [];
7137
        } else {
7138
7139
            // fallback
7140
7141 8
            $ret = [];
7142 8
            $len = \strlen($str);
7143
7144
            /** @noinspection ForeachInvariantsInspection */
7145 8
            for ($i = 0; $i < $len; ++$i) {
7146 8
                if (($str[$i] & "\x80") === "\x00") {
7147 8
                    $ret[] = $str[$i];
7148
                } elseif (
7149 8
                    isset($str[$i + 1])
7150
                    &&
7151 8
                    ($str[$i] & "\xE0") === "\xC0"
7152
                ) {
7153 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7154 4
                        $ret[] = $str[$i] . $str[$i + 1];
7155
7156 4
                        ++$i;
7157
                    }
7158
                } elseif (
7159 6
                    isset($str[$i + 2])
7160
                    &&
7161 6
                    ($str[$i] & "\xF0") === "\xE0"
7162
                ) {
7163
                    if (
7164 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7165
                        &&
7166 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7167
                    ) {
7168 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7169
7170 6
                        $i += 2;
7171
                    }
7172
                } elseif (
7173
                    isset($str[$i + 3])
7174
                    &&
7175
                    ($str[$i] & "\xF8") === "\xF0"
7176
                ) {
7177
                    if (
7178
                        ($str[$i + 1] & "\xC0") === "\x80"
7179
                        &&
7180
                        ($str[$i + 2] & "\xC0") === "\x80"
7181
                        &&
7182
                        ($str[$i + 3] & "\xC0") === "\x80"
7183
                    ) {
7184
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7185
7186
                        $i += 3;
7187
                    }
7188
                }
7189
            }
7190
        }
7191
7192 85
        if ($length > 1) {
7193 11
            $ret = \array_chunk($ret, $length);
7194
7195 11
            return \array_map(
7196
                static function (array &$item): string {
7197 11
                    return \implode('', $item);
7198 11
                },
7199 11
                $ret
7200
            );
7201
        }
7202
7203 78
        if (isset($ret[0]) && $ret[0] === '') {
7204
            return [];
7205
        }
7206
7207 78
        return $ret;
7208
    }
7209
7210
    /**
7211
     * Splits the string with the provided regular expression, returning an
7212
     * array of Stringy objects. An optional integer $limit will truncate the
7213
     * results.
7214
     *
7215
     * @param string $str
7216
     * @param string $pattern <p>The regex with which to split the string.</p>
7217
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7218
     *
7219
     * @return string[] an array of strings
7220
     */
7221
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7222
    {
7223 16
        if ($limit === 0) {
7224 2
            return [];
7225
        }
7226
7227 14
        if ($pattern === '') {
7228 1
            return [$str];
7229
        }
7230
7231 13
        if (self::$SUPPORT['mbstring'] === true) {
7232 13
            if ($limit >= 0) {
7233
                /** @noinspection PhpComposerExtensionStubsInspection */
7234 8
                $resultTmp = \mb_split($pattern, $str);
7235
7236 8
                $result = [];
7237 8
                foreach ($resultTmp as $itemTmp) {
7238 8
                    if ($limit === 0) {
7239 4
                        break;
7240
                    }
7241 8
                    --$limit;
7242
7243 8
                    $result[] = $itemTmp;
7244
                }
7245
7246 8
                return $result;
7247
            }
7248
7249
            /** @noinspection PhpComposerExtensionStubsInspection */
7250 5
            return \mb_split($pattern, $str);
7251
        }
7252
7253
        if ($limit > 0) {
7254
            ++$limit;
7255
        } else {
7256
            $limit = -1;
7257
        }
7258
7259
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7260
7261
        if ($array === false) {
7262
            return [];
7263
        }
7264
7265
        if ($limit > 0 && \count($array) === $limit) {
7266
            \array_pop($array);
7267
        }
7268
7269
        return $array;
7270
    }
7271
7272
    /**
7273
     * Check if the string starts with the given substring.
7274
     *
7275
     * @param string $haystack <p>The string to search in.</p>
7276
     * @param string $needle   <p>The substring to search for.</p>
7277
     *
7278
     * @return bool
7279
     */
7280
    public static function str_starts_with(string $haystack, string $needle): bool
7281
    {
7282 19
        if ($needle === '') {
7283 2
            return true;
7284
        }
7285
7286 19
        if ($haystack === '') {
7287
            return false;
7288
        }
7289
7290 19
        return \strpos($haystack, $needle) === 0;
7291
    }
7292
7293
    /**
7294
     * Returns true if the string begins with any of $substrings, false otherwise.
7295
     *
7296
     * - case-sensitive
7297
     *
7298
     * @param string $str        <p>The input string.</p>
7299
     * @param array  $substrings <p>Substrings to look for.</p>
7300
     *
7301
     * @return bool whether or not $str starts with $substring
7302
     */
7303
    public static function str_starts_with_any(string $str, array $substrings): bool
7304
    {
7305 8
        if ($str === '') {
7306
            return false;
7307
        }
7308
7309 8
        if ($substrings === []) {
7310
            return false;
7311
        }
7312
7313 8
        foreach ($substrings as &$substring) {
7314 8
            if (self::str_starts_with($str, $substring)) {
7315 8
                return true;
7316
            }
7317
        }
7318
7319 6
        return false;
7320
    }
7321
7322
    /**
7323
     * Gets the substring after the first occurrence of a separator.
7324
     *
7325
     * @param string $str       <p>The input string.</p>
7326
     * @param string $separator <p>The string separator.</p>
7327
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7328
     *
7329
     * @return string
7330
     */
7331
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7332
    {
7333 1
        if ($separator === '' || $str === '') {
7334 1
            return '';
7335
        }
7336
7337 1
        if ($encoding === 'UTF-8') {
7338 1
            $offset = \mb_strpos($str, $separator);
7339 1
            if ($offset === false) {
7340 1
                return '';
7341
            }
7342
7343 1
            return (string) \mb_substr(
7344 1
                $str,
7345 1
                $offset + (int) \mb_strlen($separator)
7346
            );
7347
        }
7348
7349
        $offset = self::strpos($str, $separator, 0, $encoding);
7350
        if ($offset === false) {
7351
            return '';
7352
        }
7353
7354
        return (string) \mb_substr(
7355
            $str,
7356
            $offset + (int) self::strlen($separator, $encoding),
7357
            null,
7358
            $encoding
7359
        );
7360
    }
7361
7362
    /**
7363
     * Gets the substring after the last occurrence of a separator.
7364
     *
7365
     * @param string $str       <p>The input string.</p>
7366
     * @param string $separator <p>The string separator.</p>
7367
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7368
     *
7369
     * @return string
7370
     */
7371
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7372
    {
7373 1
        if ($separator === '' || $str === '') {
7374 1
            return '';
7375
        }
7376
7377 1
        if ($encoding === 'UTF-8') {
7378 1
            $offset = \mb_strrpos($str, $separator);
7379 1
            if ($offset === false) {
7380 1
                return '';
7381
            }
7382
7383 1
            return (string) \mb_substr(
7384 1
                $str,
7385 1
                $offset + (int) \mb_strlen($separator)
7386
            );
7387
        }
7388
7389
        $offset = self::strrpos($str, $separator, 0, $encoding);
7390
        if ($offset === false) {
7391
            return '';
7392
        }
7393
7394
        return (string) self::substr(
7395
            $str,
7396
            $offset + (int) self::strlen($separator, $encoding),
7397
            null,
7398
            $encoding
7399
        );
7400
    }
7401
7402
    /**
7403
     * Gets the substring before the first occurrence of a separator.
7404
     *
7405
     * @param string $str       <p>The input string.</p>
7406
     * @param string $separator <p>The string separator.</p>
7407
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7408
     *
7409
     * @return string
7410
     */
7411
    public static function str_substr_before_first_separator(
7412
        string $str,
7413
        string $separator,
7414
        string $encoding = 'UTF-8'
7415
    ): string {
7416 1
        if ($separator === '' || $str === '') {
7417 1
            return '';
7418
        }
7419
7420 1
        if ($encoding === 'UTF-8') {
7421 1
            $offset = \mb_strpos($str, $separator);
7422 1
            if ($offset === false) {
7423 1
                return '';
7424
            }
7425
7426 1
            return (string) \mb_substr(
7427 1
                $str,
7428 1
                0,
7429 1
                $offset
7430
            );
7431
        }
7432
7433
        $offset = self::strpos($str, $separator, 0, $encoding);
7434
        if ($offset === false) {
7435
            return '';
7436
        }
7437
7438
        return (string) self::substr(
7439
            $str,
7440
            0,
7441
            $offset,
7442
            $encoding
7443
        );
7444
    }
7445
7446
    /**
7447
     * Gets the substring before the last occurrence of a separator.
7448
     *
7449
     * @param string $str       <p>The input string.</p>
7450
     * @param string $separator <p>The string separator.</p>
7451
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7452
     *
7453
     * @return string
7454
     */
7455
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7456
    {
7457 1
        if ($separator === '' || $str === '') {
7458 1
            return '';
7459
        }
7460
7461 1
        if ($encoding === 'UTF-8') {
7462 1
            $offset = \mb_strrpos($str, $separator);
7463 1
            if ($offset === false) {
7464 1
                return '';
7465
            }
7466
7467 1
            return (string) \mb_substr(
7468 1
                $str,
7469 1
                0,
7470 1
                $offset
7471
            );
7472
        }
7473
7474
        $offset = self::strrpos($str, $separator, 0, $encoding);
7475
        if ($offset === false) {
7476
            return '';
7477
        }
7478
7479
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7480
7481
        return (string) self::substr(
7482
            $str,
7483
            0,
7484
            $offset,
7485
            $encoding
7486
        );
7487
    }
7488
7489
    /**
7490
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7491
     *
7492
     * @param string $str          <p>The input string.</p>
7493
     * @param string $needle       <p>The string to look for.</p>
7494
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7495
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7496
     *
7497
     * @return string
7498
     */
7499
    public static function str_substr_first(
7500
        string $str,
7501
        string $needle,
7502
        bool $beforeNeedle = false,
7503
        string $encoding = 'UTF-8'
7504
    ): string {
7505 2
        if ($str === '' || $needle === '') {
7506 2
            return '';
7507
        }
7508
7509 2
        if ($encoding === 'UTF-8') {
7510 2
            if ($beforeNeedle === true) {
7511 1
                $part = \mb_strstr(
7512 1
                    $str,
7513 1
                    $needle,
7514 1
                    $beforeNeedle
7515
                );
7516
            } else {
7517 1
                $part = \mb_strstr(
7518 1
                    $str,
7519 2
                    $needle
7520
                );
7521
            }
7522
        } else {
7523
            $part = self::strstr(
7524
                $str,
7525
                $needle,
7526
                $beforeNeedle,
7527
                $encoding
7528
            );
7529
        }
7530
7531 2
        return $part === false ? '' : $part;
7532
    }
7533
7534
    /**
7535
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7536
     *
7537
     * @param string $str          <p>The input string.</p>
7538
     * @param string $needle       <p>The string to look for.</p>
7539
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7540
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7541
     *
7542
     * @return string
7543
     */
7544
    public static function str_substr_last(
7545
        string $str,
7546
        string $needle,
7547
        bool $beforeNeedle = false,
7548
        string $encoding = 'UTF-8'
7549
    ): string {
7550 2
        if ($str === '' || $needle === '') {
7551 2
            return '';
7552
        }
7553
7554 2
        if ($encoding === 'UTF-8') {
7555 2
            if ($beforeNeedle === true) {
7556 1
                $part = \mb_strrchr(
7557 1
                    $str,
7558 1
                    $needle,
7559 1
                    $beforeNeedle
7560
                );
7561
            } else {
7562 1
                $part = \mb_strrchr(
7563 1
                    $str,
7564 2
                    $needle
7565
                );
7566
            }
7567
        } else {
7568
            $part = self::strrchr(
7569
                $str,
7570
                $needle,
7571
                $beforeNeedle,
7572
                $encoding
7573
            );
7574
        }
7575
7576 2
        return $part === false ? '' : $part;
7577
    }
7578
7579
    /**
7580
     * Surrounds $str with the given substring.
7581
     *
7582
     * @param string $str
7583
     * @param string $substring <p>The substring to add to both sides.</P>
7584
     *
7585
     * @return string string with the substring both prepended and appended
7586
     */
7587
    public static function str_surround(string $str, string $substring): string
7588
    {
7589 5
        return $substring . $str . $substring;
7590
    }
7591
7592
    /**
7593
     * Returns a trimmed string with the first letter of each word capitalized.
7594
     * Also accepts an array, $ignore, allowing you to list words not to be
7595
     * capitalized.
7596
     *
7597
     * @param string              $str
7598
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7599
     *                                                   Default: null</p>
7600
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7601
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7602
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7603
     *                                                   tr</p>
7604
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7605
     *                                                   ß</p>
7606
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7607
     * @param string|null         $word_define_chars     [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7608
     *
7609
     * @return string the titleized string
7610
     */
7611
    public static function str_titleize(
7612
        string $str,
7613
        array $ignore = null,
7614
        string $encoding = 'UTF-8',
7615
        bool $cleanUtf8 = false,
7616
        string $lang = null,
7617
        bool $tryToKeepStringLength = false,
7618
        bool $useTrimFirst = true,
7619
        string $word_define_chars = null
7620
    ): string {
7621 10
        static $UNIQUE_STRING_HELPER = null;
7622
7623 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7624 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7625
        }
7626
7627 10
        if ($useTrimFirst === true) {
7628 10
            $str = \trim($str);
7629
        }
7630
7631 10
        if ($cleanUtf8 === true) {
7632
            $str = self::clean($str);
7633
        }
7634
7635 10
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7636
7637 10
        if ($word_define_chars) {
7638 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7639
        } else {
7640 6
            $word_define_chars = '';
7641
        }
7642
7643 10
        $str = (string) \preg_replace_callback(
7644 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7645
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7646 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7647 4
                    return $match[0];
7648
                }
7649
7650 10
                if ($useMbFunction === true) {
7651 10
                    if ($encoding === 'UTF-8') {
7652 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7653 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7654
                    }
7655
7656
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7657
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7658
                }
7659
7660
                return self::ucfirst(
7661
                    self::strtolower(
7662
                        $match[0],
7663
                        $encoding,
7664
                        false,
7665
                        $lang,
7666
                        $tryToKeepStringLength
7667
                    ),
7668
                    $encoding,
7669
                    false,
7670
                    $lang,
7671
                    $tryToKeepStringLength
7672
                );
7673 10
            },
7674 10
            $str
7675
        );
7676
7677 10
        return $str;
7678
    }
7679
7680
    /**
7681
     * Returns a trimmed string in proper title case.
7682
     *
7683
     * Also accepts an array, $ignore, allowing you to list words not to be
7684
     * capitalized.
7685
     *
7686
     * Adapted from John Gruber's script.
7687
     *
7688
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7689
     *
7690
     * @param string $str
7691
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7693
     *
7694
     * @return string the titleized string
7695
     */
7696
    public static function str_titleize_for_humans(
7697
        string $str,
7698
        array $ignore = [],
7699
        string $encoding = 'UTF-8'
7700
    ): string {
7701 35
        $smallWords = \array_merge(
7702
            [
7703 35
                '(?<!q&)a',
7704
                'an',
7705
                'and',
7706
                'as',
7707
                'at(?!&t)',
7708
                'but',
7709
                'by',
7710
                'en',
7711
                'for',
7712
                'if',
7713
                'in',
7714
                'of',
7715
                'on',
7716
                'or',
7717
                'the',
7718
                'to',
7719
                'v[.]?',
7720
                'via',
7721
                'vs[.]?',
7722
            ],
7723 35
            $ignore
7724
        );
7725
7726 35
        $smallWordsRx = \implode('|', $smallWords);
7727 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7728
7729 35
        $str = \trim($str);
7730
7731 35
        if (self::has_lowercase($str) === false) {
7732 2
            $str = self::strtolower($str, $encoding);
7733
        }
7734
7735
        // the main substitutions
7736 35
        $str = (string) \preg_replace_callback(
7737
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7738
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7739 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7740
                        |
7741 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7742
                        |
7743 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7744
                        |
7745 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7746
                      ) (_*) \\b                                                          # 6. With trailing underscore
7747
                    ~ux',
7748
            /**
7749
             * @param string[] $matches
7750
             *
7751
             * @return string
7752
             */
7753
            static function (array $matches) use ($encoding): string {
7754
                // preserve leading underscore
7755 35
                $str = $matches[1];
7756 35
                if ($matches[2]) {
7757
                    // preserve URLs, domains, emails and file paths
7758 5
                    $str .= $matches[2];
7759 35
                } elseif ($matches[3]) {
7760
                    // lower-case small words
7761 25
                    $str .= self::strtolower($matches[3], $encoding);
7762 35
                } elseif ($matches[4]) {
7763
                    // capitalize word w/o internal caps
7764 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7765
                } else {
7766
                    // preserve other kinds of word (iPhone)
7767 7
                    $str .= $matches[5];
7768
                }
7769
                // Preserve trailing underscore
7770 35
                $str .= $matches[6];
7771
7772 35
                return $str;
7773 35
            },
7774 35
            $str
7775
        );
7776
7777
        // Exceptions for small words: capitalize at start of title...
7778 35
        $str = (string) \preg_replace_callback(
7779
            '~(  \\A [[:punct:]]*            # start of title...
7780
                      |  [:.;?!][ ]+                # or of subsentence...
7781
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7782 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7783
                     ~uxi',
7784
            /**
7785
             * @param string[] $matches
7786
             *
7787
             * @return string
7788
             */
7789
            static function (array $matches) use ($encoding): string {
7790 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7791 35
            },
7792 35
            $str
7793
        );
7794
7795
        // ...and end of title
7796 35
        $str = (string) \preg_replace_callback(
7797 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7798
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7799
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7800
                     ~uxi',
7801
            /**
7802
             * @param string[] $matches
7803
             *
7804
             * @return string
7805
             */
7806
            static function (array $matches) use ($encoding): string {
7807 3
                return static::str_upper_first($matches[1], $encoding);
7808 35
            },
7809 35
            $str
7810
        );
7811
7812
        // Exceptions for small words in hyphenated compound words.
7813
        // e.g. "in-flight" -> In-Flight
7814 35
        $str = (string) \preg_replace_callback(
7815
            '~\\b
7816
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7817 35
                        ( ' . $smallWordsRx . ' )
7818
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7819
                       ~uxi',
7820
            /**
7821
             * @param string[] $matches
7822
             *
7823
             * @return string
7824
             */
7825
            static function (array $matches) use ($encoding): string {
7826
                return static::str_upper_first($matches[1], $encoding);
7827 35
            },
7828 35
            $str
7829
        );
7830
7831
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7832 35
        $str = (string) \preg_replace_callback(
7833
            '~\\b
7834
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7835
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7836 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7837
                      (?!	- )                 # Negative lookahead for another -
7838
                     ~uxi',
7839
            /**
7840
             * @param string[] $matches
7841
             *
7842
             * @return string
7843
             */
7844
            static function (array $matches) use ($encoding): string {
7845
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7846 35
            },
7847 35
            $str
7848
        );
7849
7850 35
        return $str;
7851
    }
7852
7853
    /**
7854
     * Get a binary representation of a specific string.
7855
     *
7856
     * @param string $str <p>The input string.</p>
7857
     *
7858
     * @return false|string
7859
     *                      <p>false on error</p>
7860
     */
7861
    public static function str_to_binary(string $str)
7862
    {
7863 2
        $value = \unpack('H*', $str);
7864 2
        if ($value === false) {
7865
            return false;
7866
        }
7867
7868
        /** @noinspection OffsetOperationsInspection */
7869 2
        return \base_convert($value[1], 16, 2);
7870
    }
7871
7872
    /**
7873
     * @param string   $str
7874
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7875
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7876
     *
7877
     * @return string[]
7878
     */
7879
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7880
    {
7881 17
        if ($str === '') {
7882 1
            return $removeEmptyValues === true ? [] : [''];
7883
        }
7884
7885 16
        if (self::$SUPPORT['mbstring'] === true) {
7886
            /** @noinspection PhpComposerExtensionStubsInspection */
7887 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7888
        } else {
7889
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7890
        }
7891
7892 16
        if ($return === false) {
7893
            return $removeEmptyValues === true ? [] : [''];
7894
        }
7895
7896
        if (
7897 16
            $removeShortValues === null
7898
            &&
7899 16
            $removeEmptyValues === false
7900
        ) {
7901 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7902
        }
7903
7904
        return self::reduce_string_array(
7905
            $return,
7906
            $removeEmptyValues,
7907
            $removeShortValues
7908
        );
7909
    }
7910
7911
    /**
7912
     * Convert a string into an array of words.
7913
     *
7914
     * @param string   $str
7915
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7916
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7917
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7918
     *
7919
     * @return string[]
7920
     */
7921
    public static function str_to_words(
7922
        string $str,
7923
        string $charList = '',
7924
        bool $removeEmptyValues = false,
7925
        int $removeShortValues = null
7926
    ): array {
7927 13
        if ($str === '') {
7928 4
            return $removeEmptyValues === true ? [] : [''];
7929
        }
7930
7931 13
        $charList = self::rxClass($charList, '\pL');
7932
7933 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7934 13
        if ($return === false) {
7935
            return $removeEmptyValues === true ? [] : [''];
7936
        }
7937
7938
        if (
7939 13
            $removeShortValues === null
7940
            &&
7941 13
            $removeEmptyValues === false
7942
        ) {
7943 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7944
        }
7945
7946 2
        $tmpReturn = self::reduce_string_array(
7947 2
            $return,
7948 2
            $removeEmptyValues,
7949 2
            $removeShortValues
7950
        );
7951
7952 2
        foreach ($tmpReturn as &$item) {
7953 2
            $item = (string) $item;
7954
        }
7955
7956 2
        return $tmpReturn;
7957
    }
7958
7959
    /**
7960
     * alias for "UTF8::to_ascii()"
7961
     *
7962
     * @param string $str
7963
     * @param string $unknown
7964
     * @param bool   $strict
7965
     *
7966
     * @return string
7967
     *
7968
     * @see UTF8::to_ascii()
7969
     */
7970
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7971
    {
7972 8
        return self::to_ascii($str, $unknown, $strict);
7973
    }
7974
7975
    /**
7976
     * Truncates the string to a given length. If $substring is provided, and
7977
     * truncating occurs, the string is further truncated so that the substring
7978
     * may be appended without exceeding the desired length.
7979
     *
7980
     * @param string $str
7981
     * @param int    $length    <p>Desired length of the truncated string.</p>
7982
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7983
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7984
     *
7985
     * @return string string after truncating
7986
     */
7987
    public static function str_truncate(
7988
        string $str,
7989
        int $length,
7990
        string $substring = '',
7991
        string $encoding = 'UTF-8'
7992
    ): string {
7993 22
        if ($str === '') {
7994
            return '';
7995
        }
7996
7997 22
        if ($encoding === 'UTF-8') {
7998 10
            if ($length >= (int) \mb_strlen($str)) {
7999 2
                return $str;
8000
            }
8001
8002 8
            if ($substring !== '') {
8003 4
                $length -= (int) \mb_strlen($substring);
8004
8005
                /** @noinspection UnnecessaryCastingInspection */
8006 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8007
            }
8008
8009
            /** @noinspection UnnecessaryCastingInspection */
8010 4
            return (string) \mb_substr($str, 0, $length);
8011
        }
8012
8013 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8014
8015 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8016 2
            return $str;
8017
        }
8018
8019 10
        if ($substring !== '') {
8020 6
            $length -= (int) self::strlen($substring, $encoding);
8021
        }
8022
8023
        return (
8024 10
               (string) self::substr(
8025 10
                   $str,
8026 10
                   0,
8027 10
                   $length,
8028 10
                   $encoding
8029
               )
8030 10
               ) . $substring;
8031
    }
8032
8033
    /**
8034
     * Truncates the string to a given length, while ensuring that it does not
8035
     * split words. If $substring is provided, and truncating occurs, the
8036
     * string is further truncated so that the substring may be appended without
8037
     * exceeding the desired length.
8038
     *
8039
     * @param string $str
8040
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8041
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8042
     *                                                ''</p>
8043
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8044
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8045
     *
8046
     * @return string string after truncating
8047
     */
8048
    public static function str_truncate_safe(
8049
        string $str,
8050
        int $length,
8051
        string $substring = '',
8052
        string $encoding = 'UTF-8',
8053
        bool $ignoreDoNotSplitWordsForOneWord = false
8054
    ): string {
8055 47
        if ($str === '' || $length <= 0) {
8056 1
            return $substring;
8057
        }
8058
8059 47
        if ($encoding === 'UTF-8') {
8060 21
            if ($length >= (int) \mb_strlen($str)) {
8061 5
                return $str;
8062
            }
8063
8064
            // need to further trim the string so we can append the substring
8065 17
            $length -= (int) \mb_strlen($substring);
8066 17
            if ($length <= 0) {
8067 1
                return $substring;
8068
            }
8069
8070 17
            $truncated = \mb_substr($str, 0, $length);
8071
8072 17
            if ($truncated === false) {
8073
                return '';
8074
            }
8075
8076
            // if the last word was truncated
8077 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8078 17
            if ($strPosSpace !== $length) {
8079
                // find pos of the last occurrence of a space, get up to that
8080 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8081
8082
                if (
8083 13
                    $lastPos !== false
8084
                    ||
8085 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8086
                ) {
8087 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8088
                }
8089
            }
8090
        } else {
8091 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8092
8093 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8094 4
                return $str;
8095
            }
8096
8097
            // need to further trim the string so we can append the substring
8098 22
            $length -= (int) self::strlen($substring, $encoding);
8099 22
            if ($length <= 0) {
8100
                return $substring;
8101
            }
8102
8103 22
            $truncated = self::substr($str, 0, $length, $encoding);
8104
8105 22
            if ($truncated === false) {
8106
                return '';
8107
            }
8108
8109
            // if the last word was truncated
8110 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8111 22
            if ($strPosSpace !== $length) {
8112
                // find pos of the last occurrence of a space, get up to that
8113 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8114
8115
                if (
8116 12
                    $lastPos !== false
8117
                    ||
8118 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8119
                ) {
8120 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8121
                }
8122
            }
8123
        }
8124
8125 39
        return $truncated . $substring;
8126
    }
8127
8128
    /**
8129
     * Returns a lowercase and trimmed string separated by underscores.
8130
     * Underscores are inserted before uppercase characters (with the exception
8131
     * of the first character of the string), and in place of spaces as well as
8132
     * dashes.
8133
     *
8134
     * @param string $str
8135
     *
8136
     * @return string the underscored string
8137
     */
8138
    public static function str_underscored(string $str): string
8139
    {
8140 16
        return self::str_delimit($str, '_');
8141
    }
8142
8143
    /**
8144
     * Returns an UpperCamelCase version of the supplied string. It trims
8145
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8146
     * and underscores, and removes spaces, dashes, underscores.
8147
     *
8148
     * @param string      $str                   <p>The input string.</p>
8149
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8150
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8151
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8152
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8153
     *
8154
     * @return string string in UpperCamelCase
8155
     */
8156
    public static function str_upper_camelize(
8157
        string $str,
8158
        string $encoding = 'UTF-8',
8159
        bool $cleanUtf8 = false,
8160
        string $lang = null,
8161
        bool $tryToKeepStringLength = false
8162
    ): string {
8163 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8164
    }
8165
8166
    /**
8167
     * alias for "UTF8::ucfirst()"
8168
     *
8169
     * @param string      $str
8170
     * @param string      $encoding
8171
     * @param bool        $cleanUtf8
8172
     * @param string|null $lang
8173
     * @param bool        $tryToKeepStringLength
8174
     *
8175
     * @return string
8176
     *
8177
     * @see UTF8::ucfirst()
8178
     */
8179
    public static function str_upper_first(
8180
        string $str,
8181
        string $encoding = 'UTF-8',
8182
        bool $cleanUtf8 = false,
8183
        string $lang = null,
8184
        bool $tryToKeepStringLength = false
8185
    ): string {
8186 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8187
    }
8188
8189
    /**
8190
     * Counts number of words in the UTF-8 string.
8191
     *
8192
     * @param string $str      <p>The input string.</p>
8193
     * @param int    $format   [optional] <p>
8194
     *                         <strong>0</strong> => return a number of words (default)<br>
8195
     *                         <strong>1</strong> => return an array of words<br>
8196
     *                         <strong>2</strong> => return an array of words with word-offset as key
8197
     *                         </p>
8198
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8199
     *
8200
     * @return int|string[] The number of words in the string
8201
     */
8202
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8203
    {
8204 2
        $strParts = self::str_to_words($str, $charlist);
8205
8206 2
        $len = \count($strParts);
8207
8208 2
        if ($format === 1) {
8209 2
            $numberOfWords = [];
8210 2
            for ($i = 1; $i < $len; $i += 2) {
8211 2
                $numberOfWords[] = $strParts[$i];
8212
            }
8213 2
        } elseif ($format === 2) {
8214 2
            $numberOfWords = [];
8215 2
            $offset = (int) self::strlen($strParts[0]);
8216 2
            for ($i = 1; $i < $len; $i += 2) {
8217 2
                $numberOfWords[$offset] = $strParts[$i];
8218 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8219
            }
8220
        } else {
8221 2
            $numberOfWords = (int) (($len - 1) / 2);
8222
        }
8223
8224 2
        return $numberOfWords;
8225
    }
8226
8227
    /**
8228
     * Case-insensitive string comparison.
8229
     *
8230
     * INFO: Case-insensitive version of UTF8::strcmp()
8231
     *
8232
     * @param string $str1     <p>The first string.</p>
8233
     * @param string $str2     <p>The second string.</p>
8234
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8235
     *
8236
     * @return int
8237
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8238
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8239
     *             <strong>0</strong> if they are equal
8240
     */
8241
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8242
    {
8243 23
        return self::strcmp(
8244 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8245 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8246
        );
8247
    }
8248
8249
    /**
8250
     * alias for "UTF8::strstr()"
8251
     *
8252
     * @param string $haystack
8253
     * @param string $needle
8254
     * @param bool   $before_needle
8255
     * @param string $encoding
8256
     * @param bool   $cleanUtf8
8257
     *
8258
     * @return false|string
8259
     *
8260
     * @see UTF8::strstr()
8261
     */
8262
    public static function strchr(
8263
        string $haystack,
8264
        string $needle,
8265
        bool $before_needle = false,
8266
        string $encoding = 'UTF-8',
8267
        bool $cleanUtf8 = false
8268
    ) {
8269 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8270
    }
8271
8272
    /**
8273
     * Case-sensitive string comparison.
8274
     *
8275
     * @param string $str1 <p>The first string.</p>
8276
     * @param string $str2 <p>The second string.</p>
8277
     *
8278
     * @return int
8279
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8280
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8281
     *             <strong>0</strong> if they are equal
8282
     */
8283
    public static function strcmp(string $str1, string $str2): int
8284
    {
8285 29
        if ($str1 === $str2) {
8286 21
            return 0;
8287
        }
8288
8289 24
        return \strcmp(
8290 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8291 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8292
        );
8293
    }
8294
8295
    /**
8296
     * Find length of initial segment not matching mask.
8297
     *
8298
     * @param string $str
8299
     * @param string $charList
8300
     * @param int    $offset
8301
     * @param int    $length
8302
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8303
     *
8304
     * @return int
8305
     */
8306
    public static function strcspn(
8307
        string $str,
8308
        string $charList,
8309
        int $offset = null,
8310
        int $length = null,
8311
        string $encoding = 'UTF-8'
8312
    ): int {
8313 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8314
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8315
        }
8316
8317 12
        if ($charList === '') {
8318 2
            return (int) self::strlen($str, $encoding);
8319
        }
8320
8321 11
        if ($offset !== null || $length !== null) {
8322 3
            if ($encoding === 'UTF-8') {
8323 3
                if ($length === null) {
8324
                    /** @noinspection UnnecessaryCastingInspection */
8325 2
                    $strTmp = \mb_substr($str, (int) $offset);
8326
                } else {
8327
                    /** @noinspection UnnecessaryCastingInspection */
8328 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8329
                }
8330
            } else {
8331
                /** @noinspection UnnecessaryCastingInspection */
8332
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8333
            }
8334
8335 3
            if ($strTmp === false) {
8336
                return 0;
8337
            }
8338
8339 3
            $str = $strTmp;
8340
        }
8341
8342 11
        if ($str === '') {
8343 2
            return 0;
8344
        }
8345
8346 10
        $matches = [];
8347 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8348 9
            $return = self::strlen($matches[1], $encoding);
8349 9
            if ($return === false) {
8350
                return 0;
8351
            }
8352
8353 9
            return $return;
8354
        }
8355
8356 2
        return (int) self::strlen($str, $encoding);
8357
    }
8358
8359
    /**
8360
     * alias for "UTF8::stristr()"
8361
     *
8362
     * @param string $haystack
8363
     * @param string $needle
8364
     * @param bool   $before_needle
8365
     * @param string $encoding
8366
     * @param bool   $cleanUtf8
8367
     *
8368
     * @return false|string
8369
     *
8370
     * @see UTF8::stristr()
8371
     */
8372
    public static function strichr(
8373
        string $haystack,
8374
        string $needle,
8375
        bool $before_needle = false,
8376
        string $encoding = 'UTF-8',
8377
        bool $cleanUtf8 = false
8378
    ) {
8379 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8380
    }
8381
8382
    /**
8383
     * Create a UTF-8 string from code points.
8384
     *
8385
     * INFO: opposite to UTF8::codepoints()
8386
     *
8387
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8388
     *
8389
     * @return string UTF-8 encoded string
8390
     */
8391
    public static function string(array $array): string
8392
    {
8393 4
        return \implode(
8394 4
            '',
8395 4
            \array_map(
8396
                [
8397 4
                    self::class,
8398
                    'chr',
8399
                ],
8400 4
                $array
8401
            )
8402
        );
8403
    }
8404
8405
    /**
8406
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8407
     *
8408
     * @param string $str <p>The input string.</p>
8409
     *
8410
     * @return bool
8411
     *              <strong>true</strong> if the string has BOM at the start,<br>
8412
     *              <strong>false</strong> otherwise
8413
     */
8414
    public static function string_has_bom(string $str): bool
8415
    {
8416
        /** @noinspection PhpUnusedLocalVariableInspection */
8417 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8418 6
            if (\strpos($str, $bomString) === 0) {
8419 6
                return true;
8420
            }
8421
        }
8422
8423 6
        return false;
8424
    }
8425
8426
    /**
8427
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8428
     *
8429
     * @see http://php.net/manual/en/function.strip-tags.php
8430
     *
8431
     * @param string $str            <p>
8432
     *                               The input string.
8433
     *                               </p>
8434
     * @param string $allowable_tags [optional] <p>
8435
     *                               You can use the optional second parameter to specify tags which should
8436
     *                               not be stripped.
8437
     *                               </p>
8438
     *                               <p>
8439
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8440
     *                               can not be changed with allowable_tags.
8441
     *                               </p>
8442
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8443
     *
8444
     * @return string the stripped string
8445
     */
8446
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8447
    {
8448 4
        if ($str === '') {
8449 1
            return '';
8450
        }
8451
8452 4
        if ($cleanUtf8 === true) {
8453 2
            $str = self::clean($str);
8454
        }
8455
8456 4
        if ($allowable_tags === null) {
8457 4
            return \strip_tags($str);
8458
        }
8459
8460 2
        return \strip_tags($str, $allowable_tags);
8461
    }
8462
8463
    /**
8464
     * Strip all whitespace characters. This includes tabs and newline
8465
     * characters, as well as multibyte whitespace such as the thin space
8466
     * and ideographic space.
8467
     *
8468
     * @param string $str
8469
     *
8470
     * @return string
8471
     */
8472
    public static function strip_whitespace(string $str): string
8473
    {
8474 36
        if ($str === '') {
8475 3
            return '';
8476
        }
8477
8478 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8479
    }
8480
8481
    /**
8482
     * Finds position of first occurrence of a string within another, case insensitive.
8483
     *
8484
     * @see http://php.net/manual/en/function.mb-stripos.php
8485
     *
8486
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8487
     * @param string $needle    <p>The string to find in haystack.</p>
8488
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8489
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8490
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8491
     *
8492
     * @return false|int
8493
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8494
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8495
     */
8496
    public static function stripos(
8497
        string $haystack,
8498
        string $needle,
8499
        int $offset = 0,
8500
        $encoding = 'UTF-8',
8501
        bool $cleanUtf8 = false
8502
    ) {
8503 24
        if ($haystack === '' || $needle === '') {
8504 5
            return false;
8505
        }
8506
8507 23
        if ($cleanUtf8 === true) {
8508
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8509
            // if invalid characters are found in $haystack before $needle
8510 1
            $haystack = self::clean($haystack);
8511 1
            $needle = self::clean($needle);
8512
        }
8513
8514 23
        if (self::$SUPPORT['mbstring'] === true) {
8515 23
            if ($encoding === 'UTF-8') {
8516 23
                return \mb_stripos($haystack, $needle, $offset);
8517
            }
8518
8519 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8520
8521 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8522
        }
8523
8524 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8525
8526
        if (
8527 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8528
            &&
8529 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8530
            &&
8531 2
            self::$SUPPORT['intl'] === true
8532
        ) {
8533
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8534
            if ($returnTmp !== false) {
8535
                return $returnTmp;
8536
            }
8537
        }
8538
8539
        //
8540
        // fallback for ascii only
8541
        //
8542
8543 2
        if (self::is_ascii($haystack . $needle)) {
8544
            return \stripos($haystack, $needle, $offset);
8545
        }
8546
8547
        //
8548
        // fallback via vanilla php
8549
        //
8550
8551 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8552 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8553
8554 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8555
    }
8556
8557
    /**
8558
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8559
     *
8560
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8561
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8562
     * @param bool   $before_needle [optional] <p>
8563
     *                              If <b>TRUE</b>, it returns the part of the
8564
     *                              haystack before the first occurrence of the needle (excluding the needle).
8565
     *                              </p>
8566
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8567
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8568
     *
8569
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8570
     */
8571
    public static function stristr(
8572
        string $haystack,
8573
        string $needle,
8574
        bool $before_needle = false,
8575
        string $encoding = 'UTF-8',
8576
        bool $cleanUtf8 = false
8577
    ) {
8578 12
        if ($haystack === '' || $needle === '') {
8579 3
            return false;
8580
        }
8581
8582 9
        if ($cleanUtf8 === true) {
8583
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8584
            // if invalid characters are found in $haystack before $needle
8585 1
            $needle = self::clean($needle);
8586 1
            $haystack = self::clean($haystack);
8587
        }
8588
8589 9
        if (!$needle) {
8590
            return $haystack;
8591
        }
8592
8593 9
        if (self::$SUPPORT['mbstring'] === true) {
8594 9
            if ($encoding === 'UTF-8') {
8595 9
                return \mb_stristr($haystack, $needle, $before_needle);
8596
            }
8597
8598 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8599
8600 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8601
        }
8602
8603
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8604
8605
        if (
8606
            $encoding !== 'UTF-8'
8607
            &&
8608
            self::$SUPPORT['mbstring'] === false
8609
        ) {
8610
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8611
        }
8612
8613
        if (
8614
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8615
            &&
8616
            self::$SUPPORT['intl'] === true
8617
        ) {
8618
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8619
            if ($returnTmp !== false) {
8620
                return $returnTmp;
8621
            }
8622
        }
8623
8624
        if (self::is_ascii($needle . $haystack)) {
8625
            return \stristr($haystack, $needle, $before_needle);
8626
        }
8627
8628
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8629
8630
        if (!isset($match[1])) {
8631
            return false;
8632
        }
8633
8634
        if ($before_needle) {
8635
            return $match[1];
8636
        }
8637
8638
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8639
    }
8640
8641
    /**
8642
     * Get the string length, not the byte-length!
8643
     *
8644
     * @see http://php.net/manual/en/function.mb-strlen.php
8645
     *
8646
     * @param string $str       <p>The string being checked for length.</p>
8647
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8648
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8649
     *
8650
     * @return false|int
8651
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8652
     *                   $encoding.
8653
     *                   (One multi-byte character counted as +1).
8654
     *                   <br>
8655
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8656
     *                   chars.
8657
     */
8658
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8659
    {
8660 173
        if ($str === '') {
8661 21
            return 0;
8662
        }
8663
8664 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8665 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8666
        }
8667
8668 171
        if ($cleanUtf8 === true) {
8669
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8670
            // if invalid characters are found in $str
8671 4
            $str = self::clean($str);
8672
        }
8673
8674
        //
8675
        // fallback via mbstring
8676
        //
8677
8678 171
        if (self::$SUPPORT['mbstring'] === true) {
8679 165
            if ($encoding === 'UTF-8') {
8680 165
                return \mb_strlen($str);
8681
            }
8682
8683 4
            return \mb_strlen($str, $encoding);
8684
        }
8685
8686
        //
8687
        // fallback for binary || ascii only
8688
        //
8689
8690
        if (
8691 8
            $encoding === 'CP850'
8692
            ||
8693 8
            $encoding === 'ASCII'
8694
        ) {
8695
            return \strlen($str);
8696
        }
8697
8698
        if (
8699 8
            $encoding !== 'UTF-8'
8700
            &&
8701 8
            self::$SUPPORT['mbstring'] === false
8702
            &&
8703 8
            self::$SUPPORT['iconv'] === false
8704
        ) {
8705 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8706
        }
8707
8708
        //
8709
        // fallback via iconv
8710
        //
8711
8712 8
        if (self::$SUPPORT['iconv'] === true) {
8713
            $returnTmp = \iconv_strlen($str, $encoding);
8714
            if ($returnTmp !== false) {
8715
                return $returnTmp;
8716
            }
8717
        }
8718
8719
        //
8720
        // fallback via intl
8721
        //
8722
8723
        if (
8724 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8725
            &&
8726 8
            self::$SUPPORT['intl'] === true
8727
        ) {
8728
            $returnTmp = \grapheme_strlen($str);
8729
            if ($returnTmp !== null) {
8730
                return $returnTmp;
8731
            }
8732
        }
8733
8734
        //
8735
        // fallback for ascii only
8736
        //
8737
8738 8
        if (self::is_ascii($str)) {
8739 4
            return \strlen($str);
8740
        }
8741
8742
        //
8743
        // fallback via vanilla php
8744
        //
8745
8746 8
        \preg_match_all('/./us', $str, $parts);
8747
8748 8
        $returnTmp = \count($parts[0]);
8749 8
        if ($returnTmp === 0) {
8750
            return false;
8751
        }
8752
8753 8
        return $returnTmp;
8754
    }
8755
8756
    /**
8757
     * Get string length in byte.
8758
     *
8759
     * @param string $str
8760
     *
8761
     * @return int
8762
     */
8763
    public static function strlen_in_byte(string $str): int
8764
    {
8765
        if ($str === '') {
8766
            return 0;
8767
        }
8768
8769
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8770
            // "mb_" is available if overload is used, so use it ...
8771
            return \mb_strlen($str, 'CP850'); // 8-BIT
8772
        }
8773
8774
        return \strlen($str);
8775
    }
8776
8777
    /**
8778
     * Case insensitive string comparisons using a "natural order" algorithm.
8779
     *
8780
     * INFO: natural order version of UTF8::strcasecmp()
8781
     *
8782
     * @param string $str1     <p>The first string.</p>
8783
     * @param string $str2     <p>The second string.</p>
8784
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8785
     *
8786
     * @return int
8787
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8788
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8789
     *             <strong>0</strong> if they are equal
8790
     */
8791
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8792
    {
8793 2
        return self::strnatcmp(
8794 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8795 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8796
        );
8797
    }
8798
8799
    /**
8800
     * String comparisons using a "natural order" algorithm
8801
     *
8802
     * INFO: natural order version of UTF8::strcmp()
8803
     *
8804
     * @see http://php.net/manual/en/function.strnatcmp.php
8805
     *
8806
     * @param string $str1 <p>The first string.</p>
8807
     * @param string $str2 <p>The second string.</p>
8808
     *
8809
     * @return int
8810
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8811
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8812
     *             <strong>0</strong> if they are equal
8813
     */
8814
    public static function strnatcmp(string $str1, string $str2): int
8815
    {
8816 4
        if ($str1 === $str2) {
8817 4
            return 0;
8818
        }
8819
8820 4
        return \strnatcmp(
8821 4
            (string) self::strtonatfold($str1),
8822 4
            (string) self::strtonatfold($str2)
8823
        );
8824
    }
8825
8826
    /**
8827
     * Case-insensitive string comparison of the first n characters.
8828
     *
8829
     * @see http://php.net/manual/en/function.strncasecmp.php
8830
     *
8831
     * @param string $str1     <p>The first string.</p>
8832
     * @param string $str2     <p>The second string.</p>
8833
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8834
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8835
     *
8836
     * @return int
8837
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8838
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8839
     *             <strong>0</strong> if they are equal
8840
     */
8841
    public static function strncasecmp(
8842
        string $str1,
8843
        string $str2,
8844
        int $len,
8845
        string $encoding = 'UTF-8'
8846
    ): int {
8847 2
        return self::strncmp(
8848 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8849 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8850 2
            $len
8851
        );
8852
    }
8853
8854
    /**
8855
     * String comparison of the first n characters.
8856
     *
8857
     * @see http://php.net/manual/en/function.strncmp.php
8858
     *
8859
     * @param string $str1     <p>The first string.</p>
8860
     * @param string $str2     <p>The second string.</p>
8861
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8862
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8863
     *
8864
     * @return int
8865
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8866
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8867
     *             <strong>0</strong> if they are equal
8868
     */
8869
    public static function strncmp(
8870
        string $str1,
8871
        string $str2,
8872
        int $len,
8873
        string $encoding = 'UTF-8'
8874
    ): int {
8875 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8876
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8877
        }
8878
8879 4
        if ($encoding === 'UTF-8') {
8880 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8881 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8882
        } else {
8883
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8884
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8885
        }
8886
8887 4
        return self::strcmp($str1, $str2);
8888
    }
8889
8890
    /**
8891
     * Search a string for any of a set of characters.
8892
     *
8893
     * @see http://php.net/manual/en/function.strpbrk.php
8894
     *
8895
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8896
     * @param string $char_list <p>This parameter is case sensitive.</p>
8897
     *
8898
     * @return false|string string starting from the character found, or false if it is not found
8899
     */
8900
    public static function strpbrk(string $haystack, string $char_list)
8901
    {
8902 2
        if ($haystack === '' || $char_list === '') {
8903 2
            return false;
8904
        }
8905
8906 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8907 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8908
        }
8909
8910 2
        return false;
8911
    }
8912
8913
    /**
8914
     * Find position of first occurrence of string in a string.
8915
     *
8916
     * @see http://php.net/manual/en/function.mb-strpos.php
8917
     *
8918
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8919
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8920
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8921
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8922
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8923
     *
8924
     * @return false|int
8925
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8926
     *                   string.<br> If needle is not found it returns false.
8927
     */
8928
    public static function strpos(
8929
        string $haystack,
8930
        $needle,
8931
        int $offset = 0,
8932
        $encoding = 'UTF-8',
8933
        bool $cleanUtf8 = false
8934
    ) {
8935 53
        if ($haystack === '') {
8936 4
            return false;
8937
        }
8938
8939
        // iconv and mbstring do not support integer $needle
8940 52
        if ((int) $needle === $needle) {
8941
            $needle = (string) self::chr($needle);
8942
        }
8943 52
        $needle = (string) $needle;
8944
8945 52
        if ($needle === '') {
8946 2
            return false;
8947
        }
8948
8949 52
        if ($cleanUtf8 === true) {
8950
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8951
            // if invalid characters are found in $haystack before $needle
8952 3
            $needle = self::clean($needle);
8953 3
            $haystack = self::clean($haystack);
8954
        }
8955
8956 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8957 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8958
        }
8959
8960
        //
8961
        // fallback via mbstring
8962
        //
8963
8964 52
        if (self::$SUPPORT['mbstring'] === true) {
8965 50
            if ($encoding === 'UTF-8') {
8966 50
                return \mb_strpos($haystack, $needle, $offset);
8967
            }
8968
8969 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8970
        }
8971
8972
        //
8973
        // fallback for binary || ascii only
8974
        //
8975
        if (
8976 4
            $encoding === 'CP850'
8977
            ||
8978 4
            $encoding === 'ASCII'
8979
        ) {
8980 2
            return \strpos($haystack, $needle, $offset);
8981
        }
8982
8983
        if (
8984 4
            $encoding !== 'UTF-8'
8985
            &&
8986 4
            self::$SUPPORT['iconv'] === false
8987
            &&
8988 4
            self::$SUPPORT['mbstring'] === false
8989
        ) {
8990 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8991
        }
8992
8993
        //
8994
        // fallback via intl
8995
        //
8996
8997
        if (
8998 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8999
            &&
9000 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9001
            &&
9002 4
            self::$SUPPORT['intl'] === true
9003
        ) {
9004
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9005
            if ($returnTmp !== false) {
9006
                return $returnTmp;
9007
            }
9008
        }
9009
9010
        //
9011
        // fallback via iconv
9012
        //
9013
9014
        if (
9015 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9016
            &&
9017 4
            self::$SUPPORT['iconv'] === true
9018
        ) {
9019
            // ignore invalid negative offset to keep compatibility
9020
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9021
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9022
            if ($returnTmp !== false) {
9023
                return $returnTmp;
9024
            }
9025
        }
9026
9027
        //
9028
        // fallback for ascii only
9029
        //
9030
9031 4
        if (self::is_ascii($haystack . $needle)) {
9032 2
            return \strpos($haystack, $needle, $offset);
9033
        }
9034
9035
        //
9036
        // fallback via vanilla php
9037
        //
9038
9039 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9040 4
        if ($haystackTmp === false) {
9041
            $haystackTmp = '';
9042
        }
9043 4
        $haystack = (string) $haystackTmp;
9044
9045 4
        if ($offset < 0) {
9046
            $offset = 0;
9047
        }
9048
9049 4
        $pos = \strpos($haystack, $needle);
9050 4
        if ($pos === false) {
9051 2
            return false;
9052
        }
9053
9054 4
        if ($pos) {
9055 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9056
        }
9057
9058 2
        return $offset + 0;
9059
    }
9060
9061
    /**
9062
     * Find position of first occurrence of string in a string.
9063
     *
9064
     * @param string $haystack <p>
9065
     *                         The string being checked.
9066
     *                         </p>
9067
     * @param string $needle   <p>
9068
     *                         The position counted from the beginning of haystack.
9069
     *                         </p>
9070
     * @param int    $offset   [optional] <p>
9071
     *                         The search offset. If it is not specified, 0 is used.
9072
     *                         </p>
9073
     *
9074
     * @return false|int The numeric position of the first occurrence of needle in the
9075
     *                   haystack string. If needle is not found, it returns false.
9076
     */
9077
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9078
    {
9079
        if ($haystack === '' || $needle === '') {
9080
            return false;
9081
        }
9082
9083
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9084
            // "mb_" is available if overload is used, so use it ...
9085
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9086
        }
9087
9088
        return \strpos($haystack, $needle, $offset);
9089
    }
9090
9091
    /**
9092
     * Finds the last occurrence of a character in a string within another.
9093
     *
9094
     * @see http://php.net/manual/en/function.mb-strrchr.php
9095
     *
9096
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9097
     * @param string $needle        <p>The string to find in haystack</p>
9098
     * @param bool   $before_needle [optional] <p>
9099
     *                              Determines which portion of haystack
9100
     *                              this function returns.
9101
     *                              If set to true, it returns all of haystack
9102
     *                              from the beginning to the last occurrence of needle.
9103
     *                              If set to false, it returns all of haystack
9104
     *                              from the last occurrence of needle to the end,
9105
     *                              </p>
9106
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9107
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9108
     *
9109
     * @return false|string the portion of haystack or false if needle is not found
9110
     */
9111
    public static function strrchr(
9112
        string $haystack,
9113
        string $needle,
9114
        bool $before_needle = false,
9115
        string $encoding = 'UTF-8',
9116
        bool $cleanUtf8 = false
9117
    ) {
9118 2
        if ($haystack === '' || $needle === '') {
9119 2
            return false;
9120
        }
9121
9122 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9123 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9124
        }
9125
9126 2
        if ($cleanUtf8 === true) {
9127
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9128
            // if invalid characters are found in $haystack before $needle
9129 2
            $needle = self::clean($needle);
9130 2
            $haystack = self::clean($haystack);
9131
        }
9132
9133
        //
9134
        // fallback via mbstring
9135
        //
9136
9137 2
        if (self::$SUPPORT['mbstring'] === true) {
9138 2
            if ($encoding === 'UTF-8') {
9139 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9140
            }
9141
9142 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9143
        }
9144
9145
        //
9146
        // fallback for binary || ascii only
9147
        //
9148
9149
        if (
9150
            $before_needle === false
9151
            &&
9152
            (
9153
                $encoding === 'CP850'
9154
                ||
9155
                $encoding === 'ASCII'
9156
            )
9157
        ) {
9158
            return \strrchr($haystack, $needle);
9159
        }
9160
9161
        if (
9162
            $encoding !== 'UTF-8'
9163
            &&
9164
            self::$SUPPORT['mbstring'] === false
9165
        ) {
9166
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9167
        }
9168
9169
        //
9170
        // fallback via iconv
9171
        //
9172
9173
        if (self::$SUPPORT['iconv'] === true) {
9174
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9175
            if ($needleTmp === false) {
9176
                return false;
9177
            }
9178
            $needle = (string) $needleTmp;
9179
9180
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9181
            if ($pos === false) {
9182
                return false;
9183
            }
9184
9185
            if ($before_needle) {
9186
                return self::substr($haystack, 0, $pos, $encoding);
9187
            }
9188
9189
            return self::substr($haystack, $pos, null, $encoding);
9190
        }
9191
9192
        //
9193
        // fallback via vanilla php
9194
        //
9195
9196
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9197
        if ($needleTmp === false) {
9198
            return false;
9199
        }
9200
        $needle = (string) $needleTmp;
9201
9202
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9203
        if ($pos === false) {
9204
            return false;
9205
        }
9206
9207
        if ($before_needle) {
9208
            return self::substr($haystack, 0, $pos, $encoding);
9209
        }
9210
9211
        return self::substr($haystack, $pos, null, $encoding);
9212
    }
9213
9214
    /**
9215
     * Reverses characters order in the string.
9216
     *
9217
     * @param string $str      <p>The input string.</p>
9218
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9219
     *
9220
     * @return string the string with characters in the reverse sequence
9221
     */
9222
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9223
    {
9224 10
        if ($str === '') {
9225 4
            return '';
9226
        }
9227
9228
        // init
9229 8
        $reversed = '';
9230
9231 8
        $str = self::emoji_encode($str, true);
9232
9233 8
        if ($encoding === 'UTF-8') {
9234 8
            if (self::$SUPPORT['intl'] === true) {
9235
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9236 8
                $i = (int) \grapheme_strlen($str);
9237 8
                while ($i--) {
9238 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9239 8
                    if ($reversedTmp !== false) {
9240 8
                        $reversed .= $reversedTmp;
9241
                    }
9242
                }
9243
            } else {
9244
                $i = (int) \mb_strlen($str);
9245 8
                while ($i--) {
9246
                    $reversedTmp = \mb_substr($str, $i, 1);
9247
                    if ($reversedTmp !== false) {
9248
                        $reversed .= $reversedTmp;
9249
                    }
9250
                }
9251
            }
9252
        } else {
9253
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9254
9255
            $i = (int) self::strlen($str, $encoding);
9256
            while ($i--) {
9257
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9258
                if ($reversedTmp !== false) {
9259
                    $reversed .= $reversedTmp;
9260
                }
9261
            }
9262
        }
9263
9264 8
        return self::emoji_decode($reversed, true);
9265
    }
9266
9267
    /**
9268
     * Finds the last occurrence of a character in a string within another, case insensitive.
9269
     *
9270
     * @see http://php.net/manual/en/function.mb-strrichr.php
9271
     *
9272
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9273
     * @param string $needle        <p>The string to find in haystack.</p>
9274
     * @param bool   $before_needle [optional] <p>
9275
     *                              Determines which portion of haystack
9276
     *                              this function returns.
9277
     *                              If set to true, it returns all of haystack
9278
     *                              from the beginning to the last occurrence of needle.
9279
     *                              If set to false, it returns all of haystack
9280
     *                              from the last occurrence of needle to the end,
9281
     *                              </p>
9282
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9283
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9284
     *
9285
     * @return false|string the portion of haystack or<br>false if needle is not found
9286
     */
9287
    public static function strrichr(
9288
        string $haystack,
9289
        string $needle,
9290
        bool $before_needle = false,
9291
        string $encoding = 'UTF-8',
9292
        bool $cleanUtf8 = false
9293
    ) {
9294 3
        if ($haystack === '' || $needle === '') {
9295 2
            return false;
9296
        }
9297
9298 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9299 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9300
        }
9301
9302 3
        if ($cleanUtf8 === true) {
9303
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9304
            // if invalid characters are found in $haystack before $needle
9305 2
            $needle = self::clean($needle);
9306 2
            $haystack = self::clean($haystack);
9307
        }
9308
9309
        //
9310
        // fallback via mbstring
9311
        //
9312
9313 3
        if (self::$SUPPORT['mbstring'] === true) {
9314 3
            if ($encoding === 'UTF-8') {
9315 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9316
            }
9317
9318 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9319
        }
9320
9321
        //
9322
        // fallback via vanilla php
9323
        //
9324
9325
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9326
        if ($needleTmp === false) {
9327
            return false;
9328
        }
9329
        $needle = (string) $needleTmp;
9330
9331
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9332
        if ($pos === false) {
9333
            return false;
9334
        }
9335
9336
        if ($before_needle) {
9337
            return self::substr($haystack, 0, $pos, $encoding);
9338
        }
9339
9340
        return self::substr($haystack, $pos, null, $encoding);
9341
    }
9342
9343
    /**
9344
     * Find position of last occurrence of a case-insensitive string.
9345
     *
9346
     * @param string     $haystack  <p>The string to look in.</p>
9347
     * @param int|string $needle    <p>The string to look for.</p>
9348
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9349
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9350
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9351
     *
9352
     * @return false|int
9353
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9354
     *                   string.<br>If needle is not found, it returns false.
9355
     */
9356
    public static function strripos(
9357
        string $haystack,
9358
        $needle,
9359
        int $offset = 0,
9360
        string $encoding = 'UTF-8',
9361
        bool $cleanUtf8 = false
9362
    ) {
9363 3
        if ($haystack === '') {
9364
            return false;
9365
        }
9366
9367
        // iconv and mbstring do not support integer $needle
9368 3
        if ((int) $needle === $needle && $needle >= 0) {
9369
            $needle = (string) self::chr($needle);
9370
        }
9371 3
        $needle = (string) $needle;
9372
9373 3
        if ($needle === '') {
9374
            return false;
9375
        }
9376
9377 3
        if ($cleanUtf8 === true) {
9378
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9379 2
            $needle = self::clean($needle);
9380 2
            $haystack = self::clean($haystack);
9381
        }
9382
9383 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9384 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9385
        }
9386
9387
        //
9388
        // fallback via mbstrig
9389
        //
9390
9391 3
        if (self::$SUPPORT['mbstring'] === true) {
9392 3
            if ($encoding === 'UTF-8') {
9393 3
                return \mb_strripos($haystack, $needle, $offset);
9394
            }
9395
9396
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9397
        }
9398
9399
        //
9400
        // fallback for binary || ascii only
9401
        //
9402
9403
        if (
9404
            $encoding === 'CP850'
9405
            ||
9406
            $encoding === 'ASCII'
9407
        ) {
9408
            return \strripos($haystack, $needle, $offset);
9409
        }
9410
9411
        if (
9412
            $encoding !== 'UTF-8'
9413
            &&
9414
            self::$SUPPORT['mbstring'] === false
9415
        ) {
9416
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9417
        }
9418
9419
        //
9420
        // fallback via intl
9421
        //
9422
9423
        if (
9424
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9425
            &&
9426
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9427
            &&
9428
            self::$SUPPORT['intl'] === true
9429
        ) {
9430
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9431
            if ($returnTmp !== false) {
9432
                return $returnTmp;
9433
            }
9434
        }
9435
9436
        //
9437
        // fallback for ascii only
9438
        //
9439
9440
        if (self::is_ascii($haystack . $needle)) {
9441
            return \strripos($haystack, $needle, $offset);
9442
        }
9443
9444
        //
9445
        // fallback via vanilla php
9446
        //
9447
9448
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9449
        $needle = self::strtocasefold($needle, true, false, $encoding);
9450
9451
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9452
    }
9453
9454
    /**
9455
     * Finds position of last occurrence of a string within another, case insensitive.
9456
     *
9457
     * @param string $haystack <p>
9458
     *                         The string from which to get the position of the last occurrence
9459
     *                         of needle.
9460
     *                         </p>
9461
     * @param string $needle   <p>
9462
     *                         The string to find in haystack.
9463
     *                         </p>
9464
     * @param int    $offset   [optional] <p>
9465
     *                         The position in haystack
9466
     *                         to start searching.
9467
     *                         </p>
9468
     *
9469
     * @return false|int return the numeric position of the last occurrence of needle in the
9470
     *                   haystack string, or false if needle is not found
9471
     */
9472
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9473
    {
9474
        if ($haystack === '' || $needle === '') {
9475
            return false;
9476
        }
9477
9478
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9479
            // "mb_" is available if overload is used, so use it ...
9480
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9481
        }
9482
9483
        return \strripos($haystack, $needle, $offset);
9484
    }
9485
9486
    /**
9487
     * Find position of last occurrence of a string in a string.
9488
     *
9489
     * @see http://php.net/manual/en/function.mb-strrpos.php
9490
     *
9491
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9492
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9493
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9494
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9495
     *                              the end of the string.
9496
     *                              </p>
9497
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9498
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9499
     *
9500
     * @return false|int
9501
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9502
     *                   string.<br>If needle is not found, it returns false.
9503
     */
9504
    public static function strrpos(
9505
        string $haystack,
9506
        $needle,
9507
        int $offset = 0,
9508
        string $encoding = 'UTF-8',
9509
        bool $cleanUtf8 = false
9510
    ) {
9511 35
        if ($haystack === '') {
9512 3
            return false;
9513
        }
9514
9515
        // iconv and mbstring do not support integer $needle
9516 34
        if ((int) $needle === $needle && $needle >= 0) {
9517 2
            $needle = (string) self::chr($needle);
9518
        }
9519 34
        $needle = (string) $needle;
9520
9521 34
        if ($needle === '') {
9522 2
            return false;
9523
        }
9524
9525 34
        if ($cleanUtf8 === true) {
9526
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9527 4
            $needle = self::clean($needle);
9528 4
            $haystack = self::clean($haystack);
9529
        }
9530
9531 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9532 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9533
        }
9534
9535
        //
9536
        // fallback via mbstring
9537
        //
9538
9539 34
        if (self::$SUPPORT['mbstring'] === true) {
9540 34
            if ($encoding === 'UTF-8') {
9541 34
                return \mb_strrpos($haystack, $needle, $offset);
9542
            }
9543
9544 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9545
        }
9546
9547
        //
9548
        // fallback for binary || ascii only
9549
        //
9550
9551
        if (
9552
            $encoding === 'CP850'
9553
            ||
9554
            $encoding === 'ASCII'
9555
        ) {
9556
            return \strrpos($haystack, $needle, $offset);
9557
        }
9558
9559
        if (
9560
            $encoding !== 'UTF-8'
9561
            &&
9562
            self::$SUPPORT['mbstring'] === false
9563
        ) {
9564
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9565
        }
9566
9567
        //
9568
        // fallback via intl
9569
        //
9570
9571
        if (
9572
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9573
            &&
9574
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9575
            &&
9576
            self::$SUPPORT['intl'] === true
9577
        ) {
9578
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9579
            if ($returnTmp !== false) {
9580
                return $returnTmp;
9581
            }
9582
        }
9583
9584
        //
9585
        // fallback for ascii only
9586
        //
9587
9588
        if (self::is_ascii($haystack . $needle)) {
9589
            return \strrpos($haystack, $needle, $offset);
9590
        }
9591
9592
        //
9593
        // fallback via vanilla php
9594
        //
9595
9596
        $haystackTmp = null;
9597
        if ($offset > 0) {
9598
            $haystackTmp = self::substr($haystack, $offset);
9599
        } elseif ($offset < 0) {
9600
            $haystackTmp = self::substr($haystack, 0, $offset);
9601
            $offset = 0;
9602
        }
9603
9604
        if ($haystackTmp !== null) {
9605
            if ($haystackTmp === false) {
9606
                $haystackTmp = '';
9607
            }
9608
            $haystack = (string) $haystackTmp;
9609
        }
9610
9611
        $pos = \strrpos($haystack, $needle);
9612
        if ($pos === false) {
9613
            return false;
9614
        }
9615
9616
        $strTmp = \substr($haystack, 0, $pos);
9617
        if ($strTmp === false) {
9618
            return false;
9619
        }
9620
9621
        return $offset + (int) self::strlen($strTmp);
9622
    }
9623
9624
    /**
9625
     * Find position of last occurrence of a string in a string.
9626
     *
9627
     * @param string $haystack <p>
9628
     *                         The string being checked, for the last occurrence
9629
     *                         of needle.
9630
     *                         </p>
9631
     * @param string $needle   <p>
9632
     *                         The string to find in haystack.
9633
     *                         </p>
9634
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9635
     *                         the string. Negative values will stop searching at an arbitrary point
9636
     *                         prior to the end of the string.
9637
     *
9638
     * @return false|int The numeric position of the last occurrence of needle in the
9639
     *                   haystack string. If needle is not found, it returns false.
9640
     */
9641
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9642
    {
9643
        if ($haystack === '' || $needle === '') {
9644
            return false;
9645
        }
9646
9647
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9648
            // "mb_" is available if overload is used, so use it ...
9649
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9650
        }
9651
9652
        return \strrpos($haystack, $needle, $offset);
9653
    }
9654
9655
    /**
9656
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9657
     * mask.
9658
     *
9659
     * @param string $str      <p>The input string.</p>
9660
     * @param string $mask     <p>The mask of chars</p>
9661
     * @param int    $offset   [optional]
9662
     * @param int    $length   [optional]
9663
     * @param string $encoding [optional] <p>Set the charset.</p>
9664
     *
9665
     * @return false|int
9666
     */
9667
    public static function strspn(
9668
        string $str,
9669
        string $mask,
9670
        int $offset = 0,
9671
        int $length = null,
9672
        string $encoding = 'UTF-8'
9673
    ) {
9674 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9675
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9676
        }
9677
9678 10
        if ($offset || $length !== null) {
9679 2
            if ($encoding === 'UTF-8') {
9680 2
                if ($length === null) {
9681
                    $str = (string) \mb_substr($str, $offset);
9682
                } else {
9683 2
                    $str = (string) \mb_substr($str, $offset, $length);
9684
                }
9685
            } else {
9686
                $str = (string) self::substr($str, $offset, $length, $encoding);
9687
            }
9688
        }
9689
9690 10
        if ($str === '' || $mask === '') {
9691 2
            return 0;
9692
        }
9693
9694 8
        $matches = [];
9695
9696 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9697
    }
9698
9699
    /**
9700
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9701
     *
9702
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9703
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9704
     * @param bool   $before_needle [optional] <p>
9705
     *                              If <b>TRUE</b>, strstr() returns the part of the
9706
     *                              haystack before the first occurrence of the needle (excluding the needle).
9707
     *                              </p>
9708
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9709
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9710
     *
9711
     * @return false|string
9712
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9713
     */
9714
    public static function strstr(
9715
        string $haystack,
9716
        string $needle,
9717
        bool $before_needle = false,
9718
        string $encoding = 'UTF-8',
9719
        $cleanUtf8 = false
9720
    ) {
9721 3
        if ($haystack === '' || $needle === '') {
9722 2
            return false;
9723
        }
9724
9725 3
        if ($cleanUtf8 === true) {
9726
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9727
            // if invalid characters are found in $haystack before $needle
9728
            $needle = self::clean($needle);
9729
            $haystack = self::clean($haystack);
9730
        }
9731
9732 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9733 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9734
        }
9735
9736
        //
9737
        // fallback via mbstring
9738
        //
9739
9740 3
        if (self::$SUPPORT['mbstring'] === true) {
9741 3
            if ($encoding === 'UTF-8') {
9742 3
                return \mb_strstr($haystack, $needle, $before_needle);
9743
            }
9744
9745 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9746
        }
9747
9748
        //
9749
        // fallback for binary || ascii only
9750
        //
9751
9752
        if (
9753
            $encoding === 'CP850'
9754
            ||
9755
            $encoding === 'ASCII'
9756
        ) {
9757
            return \strstr($haystack, $needle, $before_needle);
9758
        }
9759
9760
        if (
9761
            $encoding !== 'UTF-8'
9762
            &&
9763
            self::$SUPPORT['mbstring'] === false
9764
        ) {
9765
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9766
        }
9767
9768
        //
9769
        // fallback via intl
9770
        //
9771
9772
        if (
9773
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9774
            &&
9775
            self::$SUPPORT['intl'] === true
9776
        ) {
9777
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9778
            if ($returnTmp !== false) {
9779
                return $returnTmp;
9780
            }
9781
        }
9782
9783
        //
9784
        // fallback for ascii only
9785
        //
9786
9787
        if (self::is_ascii($haystack . $needle)) {
9788
            return \strstr($haystack, $needle, $before_needle);
9789
        }
9790
9791
        //
9792
        // fallback via vanilla php
9793
        //
9794
9795
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9796
9797
        if (!isset($match[1])) {
9798
            return false;
9799
        }
9800
9801
        if ($before_needle) {
9802
            return $match[1];
9803
        }
9804
9805
        return self::substr($haystack, (int) self::strlen($match[1]));
9806
    }
9807
9808
    /**
9809
     *  * Finds first occurrence of a string within another.
9810
     *
9811
     * @param string $haystack      <p>
9812
     *                              The string from which to get the first occurrence
9813
     *                              of needle.
9814
     *                              </p>
9815
     * @param string $needle        <p>
9816
     *                              The string to find in haystack.
9817
     *                              </p>
9818
     * @param bool   $before_needle [optional] <p>
9819
     *                              Determines which portion of haystack
9820
     *                              this function returns.
9821
     *                              If set to true, it returns all of haystack
9822
     *                              from the beginning to the first occurrence of needle.
9823
     *                              If set to false, it returns all of haystack
9824
     *                              from the first occurrence of needle to the end,
9825
     *                              </p>
9826
     *
9827
     * @return false|string the portion of haystack,
9828
     *                      or false if needle is not found
9829
     */
9830
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9831
    {
9832
        if ($haystack === '' || $needle === '') {
9833
            return false;
9834
        }
9835
9836
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9837
            // "mb_" is available if overload is used, so use it ...
9838
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9839
        }
9840
9841
        return \strstr($haystack, $needle, $before_needle);
9842
    }
9843
9844
    /**
9845
     * Unicode transformation for case-less matching.
9846
     *
9847
     * @see http://unicode.org/reports/tr21/tr21-5.html
9848
     *
9849
     * @param string      $str       <p>The input string.</p>
9850
     * @param bool        $full      [optional] <p>
9851
     *                               <b>true</b>, replace full case folding chars (default)<br>
9852
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9853
     *                               </p>
9854
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9855
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9856
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9857
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9858
     *                               is for some languages better ...</p>
9859
     *
9860
     * @return string
9861
     */
9862
    public static function strtocasefold(
9863
        string $str,
9864
        bool $full = true,
9865
        bool $cleanUtf8 = false,
9866
        string $encoding = 'UTF-8',
9867
        string $lang = null,
9868
        $lower = true
9869
    ): string {
9870 32
        if ($str === '') {
9871 5
            return '';
9872
        }
9873
9874 31
        if ($cleanUtf8 === true) {
9875
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9876
            // if invalid characters are found in $haystack before $needle
9877 2
            $str = self::clean($str);
9878
        }
9879
9880 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9881
9882 31
        if ($lang === null && $encoding === 'UTF-8') {
9883 31
            if ($lower === true) {
9884 2
                return \mb_strtolower($str);
9885
            }
9886
9887 29
            return \mb_strtoupper($str);
9888
        }
9889
9890 2
        if ($lower === true) {
9891
            return self::strtolower($str, $encoding, false, $lang);
9892
        }
9893
9894 2
        return self::strtoupper($str, $encoding, false, $lang);
9895
    }
9896
9897
    /**
9898
     * Make a string lowercase.
9899
     *
9900
     * @see http://php.net/manual/en/function.mb-strtolower.php
9901
     *
9902
     * @param string      $str                   <p>The string being lowercased.</p>
9903
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9904
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9905
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9906
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9907
     *
9908
     * @return string
9909
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9910
     */
9911
    public static function strtolower(
9912
        $str,
9913
        string $encoding = 'UTF-8',
9914
        bool $cleanUtf8 = false,
9915
        string $lang = null,
9916
        bool $tryToKeepStringLength = false
9917
    ): string {
9918
        // init
9919 73
        $str = (string) $str;
9920
9921 73
        if ($str === '') {
9922 1
            return '';
9923
        }
9924
9925 72
        if ($cleanUtf8 === true) {
9926
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9927
            // if invalid characters are found in $haystack before $needle
9928 2
            $str = self::clean($str);
9929
        }
9930
9931
        // hack for old php version or for the polyfill ...
9932 72
        if ($tryToKeepStringLength === true) {
9933
            $str = self::fixStrCaseHelper($str, true);
9934
        }
9935
9936 72
        if ($lang === null && $encoding === 'UTF-8') {
9937 13
            return \mb_strtolower($str);
9938
        }
9939
9940 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9941
9942 61
        if ($lang !== null) {
9943 2
            if (self::$SUPPORT['intl'] === true) {
9944 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
9945
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
9946
                }
9947
9948 2
                $langCode = $lang . '-Lower';
9949 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
9950
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
9951
9952
                    $langCode = 'Any-Lower';
9953
                }
9954
9955
                /** @noinspection PhpComposerExtensionStubsInspection */
9956
                /** @noinspection UnnecessaryCastingInspection */
9957 2
                return (string) \transliterator_transliterate($langCode, $str);
9958
            }
9959
9960
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9961
        }
9962
9963
        // always fallback via symfony polyfill
9964 61
        return \mb_strtolower($str, $encoding);
9965
    }
9966
9967
    /**
9968
     * Make a string uppercase.
9969
     *
9970
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9971
     *
9972
     * @param string      $str                   <p>The string being uppercased.</p>
9973
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9974
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9975
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9976
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9977
     *
9978
     * @return string
9979
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9980
     */
9981
    public static function strtoupper(
9982
        $str,
9983
        string $encoding = 'UTF-8',
9984
        bool $cleanUtf8 = false,
9985
        string $lang = null,
9986
        bool $tryToKeepStringLength = false
9987
    ): string {
9988
        // init
9989 17
        $str = (string) $str;
9990
9991 17
        if ($str === '') {
9992 1
            return '';
9993
        }
9994
9995 16
        if ($cleanUtf8 === true) {
9996
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9997
            // if invalid characters are found in $haystack before $needle
9998 2
            $str = self::clean($str);
9999
        }
10000
10001
        // hack for old php version or for the polyfill ...
10002 16
        if ($tryToKeepStringLength === true) {
10003 2
            $str = self::fixStrCaseHelper($str, false);
10004
        }
10005
10006 16
        if ($lang === null && $encoding === 'UTF-8') {
10007 8
            return \mb_strtoupper($str);
10008
        }
10009
10010 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10011
10012 10
        if ($lang !== null) {
10013 2
            if (self::$SUPPORT['intl'] === true) {
10014 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10015
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10016
                }
10017
10018 2
                $langCode = $lang . '-Upper';
10019 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10020
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10021
10022
                    $langCode = 'Any-Upper';
10023
                }
10024
10025
                /** @noinspection PhpComposerExtensionStubsInspection */
10026
                /** @noinspection UnnecessaryCastingInspection */
10027 2
                return (string) \transliterator_transliterate($langCode, $str);
10028
            }
10029
10030
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10031
        }
10032
10033
        // always fallback via symfony polyfill
10034 10
        return \mb_strtoupper($str, $encoding);
10035
    }
10036
10037
    /**
10038
     * Translate characters or replace sub-strings.
10039
     *
10040
     * @see http://php.net/manual/en/function.strtr.php
10041
     *
10042
     * @param string          $str  <p>The string being translated.</p>
10043
     * @param string|string[] $from <p>The string replacing from.</p>
10044
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10045
     *
10046
     * @return string
10047
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10048
     *                corresponding character in to
10049
     */
10050
    public static function strtr(string $str, $from, $to = ''): string
10051
    {
10052 2
        if ($str === '') {
10053
            return '';
10054
        }
10055
10056 2
        if ($from === $to) {
10057
            return $str;
10058
        }
10059
10060 2
        if ($to !== '') {
10061 2
            $from = self::str_split($from);
10062 2
            $to = self::str_split($to);
10063 2
            $countFrom = \count($from);
10064 2
            $countTo = \count($to);
10065
10066 2
            if ($countFrom > $countTo) {
10067 2
                $from = \array_slice($from, 0, $countTo);
10068 2
            } elseif ($countFrom < $countTo) {
10069 2
                $to = \array_slice($to, 0, $countFrom);
10070
            }
10071
10072 2
            $from = \array_combine($from, $to);
10073 2
            if ($from === false) {
10074
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10075
            }
10076
        }
10077
10078 2
        if (\is_string($from)) {
10079 2
            return \str_replace($from, '', $str);
10080
        }
10081
10082 2
        return \strtr($str, $from);
10083
    }
10084
10085
    /**
10086
     * Return the width of a string.
10087
     *
10088
     * @param string $str       <p>The input string.</p>
10089
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10090
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10091
     *
10092
     * @return int
10093
     */
10094
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10095
    {
10096 2
        if ($str === '') {
10097 2
            return 0;
10098
        }
10099
10100 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10101 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10102
        }
10103
10104 2
        if ($cleanUtf8 === true) {
10105
            // iconv and mbstring are not tolerant to invalid encoding
10106
            // further, their behaviour is inconsistent with that of PHP's substr
10107 2
            $str = self::clean($str);
10108
        }
10109
10110
        //
10111
        // fallback via mbstring
10112
        //
10113
10114 2
        if (self::$SUPPORT['mbstring'] === true) {
10115 2
            if ($encoding === 'UTF-8') {
10116 2
                return \mb_strwidth($str);
10117
            }
10118
10119
            return \mb_strwidth($str, $encoding);
10120
        }
10121
10122
        //
10123
        // fallback via vanilla php
10124
        //
10125
10126
        if ($encoding !== 'UTF-8') {
10127
            $str = self::encode('UTF-8', $str, false, $encoding);
10128
        }
10129
10130
        $wide = 0;
10131
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10132
10133
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10134
    }
10135
10136
    /**
10137
     * Get part of a string.
10138
     *
10139
     * @see http://php.net/manual/en/function.mb-substr.php
10140
     *
10141
     * @param string $str       <p>The string being checked.</p>
10142
     * @param int    $offset    <p>The first position used in str.</p>
10143
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10144
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10145
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10146
     *
10147
     * @return false|string
10148
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10149
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10150
     *                      characters long, <b>FALSE</b> will be returned.
10151
     */
10152
    public static function substr(
10153
        string $str,
10154
        int $offset = 0,
10155
        int $length = null,
10156
        string $encoding = 'UTF-8',
10157
        bool $cleanUtf8 = false
10158
    ) {
10159
        // empty string
10160 172
        if ($str === '' || $length === 0) {
10161 8
            return '';
10162
        }
10163
10164 168
        if ($cleanUtf8 === true) {
10165
            // iconv and mbstring are not tolerant to invalid encoding
10166
            // further, their behaviour is inconsistent with that of PHP's substr
10167 2
            $str = self::clean($str);
10168
        }
10169
10170
        // whole string
10171 168
        if (!$offset && $length === null) {
10172 7
            return $str;
10173
        }
10174
10175 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10176 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10177
        }
10178
10179
        //
10180
        // fallback via mbstring
10181
        //
10182
10183 163
        if (self::$SUPPORT['mbstring'] === true) {
10184 161
            if ($encoding === 'UTF-8') {
10185 161
                if ($length === null) {
10186 64
                    return \mb_substr($str, $offset);
10187
                }
10188
10189 102
                return \mb_substr($str, $offset, $length);
10190
            }
10191
10192
            return self::substr($str, $offset, $length, $encoding);
10193
        }
10194
10195
        //
10196
        // fallback for binary || ascii only
10197
        //
10198
10199
        if (
10200 4
            $encoding === 'CP850'
10201
            ||
10202 4
            $encoding === 'ASCII'
10203
        ) {
10204
            if ($length === null) {
10205
                return \substr($str, $offset);
10206
            }
10207
10208
            return \substr($str, $offset, $length);
10209
        }
10210
10211
        // otherwise we need the string-length
10212 4
        $str_length = 0;
10213 4
        if ($offset || $length === null) {
10214 4
            $str_length = self::strlen($str, $encoding);
10215
        }
10216
10217
        // e.g.: invalid chars + mbstring not installed
10218 4
        if ($str_length === false) {
10219
            return false;
10220
        }
10221
10222
        // empty string
10223 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10224
            return '';
10225
        }
10226
10227
        // impossible
10228 4
        if ($offset && $offset > $str_length) {
10229
            return '';
10230
        }
10231
10232 4
        if ($length === null) {
10233 4
            $length = (int) $str_length;
10234
        } else {
10235 2
            $length = (int) $length;
10236
        }
10237
10238
        if (
10239 4
            $encoding !== 'UTF-8'
10240
            &&
10241 4
            self::$SUPPORT['mbstring'] === false
10242
        ) {
10243 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10244
        }
10245
10246
        //
10247
        // fallback via intl
10248
        //
10249
10250
        if (
10251 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10252
            &&
10253 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10254
            &&
10255 4
            self::$SUPPORT['intl'] === true
10256
        ) {
10257
            $returnTmp = \grapheme_substr($str, $offset, $length);
10258
            if ($returnTmp !== false) {
10259
                return $returnTmp;
10260
            }
10261
        }
10262
10263
        //
10264
        // fallback via iconv
10265
        //
10266
10267
        if (
10268 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10269
            &&
10270 4
            self::$SUPPORT['iconv'] === true
10271
        ) {
10272
            $returnTmp = \iconv_substr($str, $offset, $length);
10273
            if ($returnTmp !== false) {
10274
                return $returnTmp;
10275
            }
10276
        }
10277
10278
        //
10279
        // fallback for ascii only
10280
        //
10281
10282 4
        if (self::is_ascii($str)) {
10283
            return \substr($str, $offset, $length);
10284
        }
10285
10286
        //
10287
        // fallback via vanilla php
10288
        //
10289
10290
        // split to array, and remove invalid characters
10291 4
        $array = self::str_split($str);
10292
10293
        // extract relevant part, and join to make sting again
10294 4
        return \implode('', \array_slice($array, $offset, $length));
10295
    }
10296
10297
    /**
10298
     * Binary safe comparison of two strings from an offset, up to length characters.
10299
     *
10300
     * @param string   $str1               <p>The main string being compared.</p>
10301
     * @param string   $str2               <p>The secondary string being compared.</p>
10302
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10303
     *                                     counting from the end of the string.</p>
10304
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10305
     *                                     of the length of the str compared to the length of main_str less the
10306
     *                                     offset.</p>
10307
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10308
     *                                     insensitive.</p>
10309
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10310
     *
10311
     * @return int
10312
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10313
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10314
     *             <strong>0</strong> if they are equal
10315
     */
10316
    public static function substr_compare(
10317
        string $str1,
10318
        string $str2,
10319
        int $offset = 0,
10320
        int $length = null,
10321
        bool $case_insensitivity = false,
10322
        string $encoding = 'UTF-8'
10323
    ): int {
10324
        if (
10325 2
            $offset !== 0
10326
            ||
10327 2
            $length !== null
10328
        ) {
10329 2
            if ($encoding === 'UTF-8') {
10330 2
                if ($length === null) {
10331 2
                    $str1 = (string) \mb_substr($str1, $offset);
10332
                } else {
10333 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10334
                }
10335 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10336
            } else {
10337
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10338
10339
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10340
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10341
            }
10342
        }
10343
10344 2
        if ($case_insensitivity === true) {
10345 2
            return self::strcasecmp($str1, $str2, $encoding);
10346
        }
10347
10348 2
        return self::strcmp($str1, $str2);
10349
    }
10350
10351
    /**
10352
     * Count the number of substring occurrences.
10353
     *
10354
     * @see http://php.net/manual/en/function.substr-count.php
10355
     *
10356
     * @param string $haystack  <p>The string to search in.</p>
10357
     * @param string $needle    <p>The substring to search for.</p>
10358
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10359
     * @param int    $length    [optional] <p>
10360
     *                          The maximum length after the specified offset to search for the
10361
     *                          substring. It outputs a warning if the offset plus the length is
10362
     *                          greater than the haystack length.
10363
     *                          </p>
10364
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10365
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10366
     *
10367
     * @return false|int this functions returns an integer or false if there isn't a string
10368
     */
10369
    public static function substr_count(
10370
        string $haystack,
10371
        string $needle,
10372
        int $offset = 0,
10373
        int $length = null,
10374
        string $encoding = 'UTF-8',
10375
        bool $cleanUtf8 = false
10376
    ) {
10377 5
        if ($haystack === '' || $needle === '') {
10378 2
            return false;
10379
        }
10380
10381 5
        if ($length === 0) {
10382 2
            return 0;
10383
        }
10384
10385 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10386 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10387
        }
10388
10389 5
        if ($cleanUtf8 === true) {
10390
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10391
            // if invalid characters are found in $haystack before $needle
10392
            $needle = self::clean($needle);
10393
            $haystack = self::clean($haystack);
10394
        }
10395
10396 5
        if ($offset || $length > 0) {
10397 2
            if ($length === null) {
10398 2
                $lengthTmp = self::strlen($haystack, $encoding);
10399 2
                if ($lengthTmp === false) {
10400
                    return false;
10401
                }
10402 2
                $length = (int) $lengthTmp;
10403
            }
10404
10405 2
            if ($encoding === 'UTF-8') {
10406 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10407
            } else {
10408 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10409
            }
10410
        }
10411
10412
        if (
10413 5
            $encoding !== 'UTF-8'
10414
            &&
10415 5
            self::$SUPPORT['mbstring'] === false
10416
        ) {
10417
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10418
        }
10419
10420 5
        if (self::$SUPPORT['mbstring'] === true) {
10421 5
            if ($encoding === 'UTF-8') {
10422 5
                return \mb_substr_count($haystack, $needle);
10423
            }
10424
10425 2
            return \mb_substr_count($haystack, $needle, $encoding);
10426
        }
10427
10428
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10429
10430
        return \count($matches);
10431
    }
10432
10433
    /**
10434
     * Count the number of substring occurrences.
10435
     *
10436
     * @param string $haystack <p>
10437
     *                         The string being checked.
10438
     *                         </p>
10439
     * @param string $needle   <p>
10440
     *                         The string being found.
10441
     *                         </p>
10442
     * @param int    $offset   [optional] <p>
10443
     *                         The offset where to start counting
10444
     *                         </p>
10445
     * @param int    $length   [optional] <p>
10446
     *                         The maximum length after the specified offset to search for the
10447
     *                         substring. It outputs a warning if the offset plus the length is
10448
     *                         greater than the haystack length.
10449
     *                         </p>
10450
     *
10451
     * @return false|int the number of times the
10452
     *                   needle substring occurs in the
10453
     *                   haystack string
10454
     */
10455
    public static function substr_count_in_byte(
10456
        string $haystack,
10457
        string $needle,
10458
        int $offset = 0,
10459
        int $length = null
10460
    ) {
10461
        if ($haystack === '' || $needle === '') {
10462
            return 0;
10463
        }
10464
10465
        if (
10466
            ($offset || $length !== null)
10467
            &&
10468
            self::$SUPPORT['mbstring_func_overload'] === true
10469
        ) {
10470
            if ($length === null) {
10471
                $lengthTmp = self::strlen($haystack);
10472
                if ($lengthTmp === false) {
10473
                    return false;
10474
                }
10475
                $length = (int) $lengthTmp;
10476
            }
10477
10478
            if (
10479
                (
10480
                    $length !== 0
10481
                    &&
10482
                    $offset !== 0
10483
                )
10484
                &&
10485
                ($length + $offset) <= 0
10486
                &&
10487
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10488
            ) {
10489
                return false;
10490
            }
10491
10492
            $haystackTmp = \substr($haystack, $offset, $length);
10493
            if ($haystackTmp === false) {
10494
                $haystackTmp = '';
10495
            }
10496
            $haystack = (string) $haystackTmp;
10497
        }
10498
10499
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10500
            // "mb_" is available if overload is used, so use it ...
10501
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10502
        }
10503
10504
        if ($length === null) {
10505
            return \substr_count($haystack, $needle, $offset);
10506
        }
10507
10508
        return \substr_count($haystack, $needle, $offset, $length);
10509
    }
10510
10511
    /**
10512
     * Returns the number of occurrences of $substring in the given string.
10513
     * By default, the comparison is case-sensitive, but can be made insensitive
10514
     * by setting $caseSensitive to false.
10515
     *
10516
     * @param string $str           <p>The input string.</p>
10517
     * @param string $substring     <p>The substring to search for.</p>
10518
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10519
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10520
     *
10521
     * @return int
10522
     */
10523
    public static function substr_count_simple(
10524
        string $str,
10525
        string $substring,
10526
        bool $caseSensitive = true,
10527
        string $encoding = 'UTF-8'
10528
    ): int {
10529 15
        if ($str === '' || $substring === '') {
10530 2
            return 0;
10531
        }
10532
10533 13
        if ($encoding === 'UTF-8') {
10534 7
            if ($caseSensitive) {
10535
                return (int) \mb_substr_count($str, $substring);
10536
            }
10537
10538 7
            return (int) \mb_substr_count(
10539 7
                \mb_strtoupper($str),
10540 7
                \mb_strtoupper($substring)
10541
10542
            );
10543
        }
10544
10545 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10546
10547 6
        if ($caseSensitive) {
10548 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10549
        }
10550
10551 3
        return (int) \mb_substr_count(
10552 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10553 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10554 3
            $encoding
10555
        );
10556
    }
10557
10558
    /**
10559
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10560
     *
10561
     * @param string $haystack <p>The string to search in.</p>
10562
     * @param string $needle   <p>The substring to search for.</p>
10563
     *
10564
     * @return string return the sub-string
10565
     */
10566
    public static function substr_ileft(string $haystack, string $needle): string
10567
    {
10568 2
        if ($haystack === '') {
10569 2
            return '';
10570
        }
10571
10572 2
        if ($needle === '') {
10573 2
            return $haystack;
10574
        }
10575
10576 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10577 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10578
        }
10579
10580 2
        return $haystack;
10581
    }
10582
10583
    /**
10584
     * Get part of a string process in bytes.
10585
     *
10586
     * @param string $str    <p>The string being checked.</p>
10587
     * @param int    $offset <p>The first position used in str.</p>
10588
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10589
     *
10590
     * @return false|string
10591
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10592
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10593
     *                      characters long, <b>FALSE</b> will be returned.
10594
     */
10595
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10596
    {
10597
        // empty string
10598
        if ($str === '' || $length === 0) {
10599
            return '';
10600
        }
10601
10602
        // whole string
10603
        if (!$offset && $length === null) {
10604
            return $str;
10605
        }
10606
10607
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10608
            // "mb_" is available if overload is used, so use it ...
10609
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10610
        }
10611
10612
        return \substr($str, $offset, $length ?? 2147483647);
10613
    }
10614
10615
    /**
10616
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10617
     *
10618
     * @param string $haystack <p>The string to search in.</p>
10619
     * @param string $needle   <p>The substring to search for.</p>
10620
     *
10621
     * @return string return the sub-string
10622
     */
10623
    public static function substr_iright(string $haystack, string $needle): string
10624
    {
10625 2
        if ($haystack === '') {
10626 2
            return '';
10627
        }
10628
10629 2
        if ($needle === '') {
10630 2
            return $haystack;
10631
        }
10632
10633 2
        if (self::str_iends_with($haystack, $needle) === true) {
10634 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10635
        }
10636
10637 2
        return $haystack;
10638
    }
10639
10640
    /**
10641
     * Removes an prefix ($needle) from start of the string ($haystack).
10642
     *
10643
     * @param string $haystack <p>The string to search in.</p>
10644
     * @param string $needle   <p>The substring to search for.</p>
10645
     *
10646
     * @return string return the sub-string
10647
     */
10648
    public static function substr_left(string $haystack, string $needle): string
10649
    {
10650 2
        if ($haystack === '') {
10651 2
            return '';
10652
        }
10653
10654 2
        if ($needle === '') {
10655 2
            return $haystack;
10656
        }
10657
10658 2
        if (self::str_starts_with($haystack, $needle) === true) {
10659 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10660
        }
10661
10662 2
        return $haystack;
10663
    }
10664
10665
    /**
10666
     * Replace text within a portion of a string.
10667
     *
10668
     * source: https://gist.github.com/stemar/8287074
10669
     *
10670
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10671
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10672
     * @param int|int[]       $offset      <p>
10673
     *                                     If start is positive, the replacing will begin at the start'th offset
10674
     *                                     into string.
10675
     *                                     <br><br>
10676
     *                                     If start is negative, the replacing will begin at the start'th character
10677
     *                                     from the end of string.
10678
     *                                     </p>
10679
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10680
     *                                     portion of string which is to be replaced. If it is negative, it
10681
     *                                     represents the number of characters from the end of string at which to
10682
     *                                     stop replacing. If it is not given, then it will default to strlen(
10683
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10684
     *                                     length is zero then this function will have the effect of inserting
10685
     *                                     replacement into string at the given start offset.</p>
10686
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10687
     *
10688
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10689
     */
10690
    public static function substr_replace(
10691
        $str,
10692
        $replacement,
10693
        $offset,
10694
        $length = null,
10695
        string $encoding = 'UTF-8'
10696
    ) {
10697 10
        if (\is_array($str) === true) {
10698 1
            $num = \count($str);
10699
10700
            // the replacement
10701 1
            if (\is_array($replacement) === true) {
10702 1
                $replacement = \array_slice($replacement, 0, $num);
10703
            } else {
10704 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10705
            }
10706
10707
            // the offset
10708 1
            if (\is_array($offset) === true) {
10709 1
                $offset = \array_slice($offset, 0, $num);
10710 1
                foreach ($offset as &$valueTmp) {
10711 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10712
                }
10713 1
                unset($valueTmp);
10714
            } else {
10715 1
                $offset = \array_pad([$offset], $num, $offset);
10716
            }
10717
10718
            // the length
10719 1
            if ($length === null) {
10720 1
                $length = \array_fill(0, $num, 0);
10721 1
            } elseif (\is_array($length) === true) {
10722 1
                $length = \array_slice($length, 0, $num);
10723 1
                foreach ($length as &$valueTmpV2) {
10724 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10725
                }
10726 1
                unset($valueTmpV2);
10727
            } else {
10728 1
                $length = \array_pad([$length], $num, $length);
10729
            }
10730
10731
            // recursive call
10732 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10733
        }
10734
10735 10
        if (\is_array($replacement) === true) {
10736 1
            if (\count($replacement) > 0) {
10737 1
                $replacement = $replacement[0];
10738
            } else {
10739 1
                $replacement = '';
10740
            }
10741
        }
10742
10743
        // init
10744 10
        $str = (string) $str;
10745 10
        $replacement = (string) $replacement;
10746
10747 10
        if (\is_array($length) === true) {
10748
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10749
        }
10750
10751 10
        if (\is_array($offset) === true) {
10752
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10753
        }
10754
10755 10
        if ($str === '') {
10756 1
            return $replacement;
10757
        }
10758
10759 9
        if (self::$SUPPORT['mbstring'] === true) {
10760 9
            $string_length = (int) self::strlen($str, $encoding);
10761
10762 9
            if ($offset < 0) {
10763 1
                $offset = (int) \max(0, $string_length + $offset);
10764 9
            } elseif ($offset > $string_length) {
10765 1
                $offset = $string_length;
10766
            }
10767
10768 9
            if ($length !== null && $length < 0) {
10769 1
                $length = (int) \max(0, $string_length - $offset + $length);
10770 9
            } elseif ($length === null || $length > $string_length) {
10771 4
                $length = $string_length;
10772
            }
10773
10774
            /** @noinspection AdditionOperationOnArraysInspection */
10775 9
            if (($offset + $length) > $string_length) {
10776 4
                $length = $string_length - $offset;
10777
            }
10778
10779
            /** @noinspection AdditionOperationOnArraysInspection */
10780 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10781 9
                   $replacement .
10782 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10783
        }
10784
10785
        //
10786
        // fallback for ascii only
10787
        //
10788
10789
        if (self::is_ascii($str)) {
10790
            return ($length === null) ?
10791
                \substr_replace($str, $replacement, $offset) :
10792
                \substr_replace($str, $replacement, $offset, $length);
10793
        }
10794
10795
        //
10796
        // fallback via vanilla php
10797
        //
10798
10799
        \preg_match_all('/./us', $str, $smatches);
10800
        \preg_match_all('/./us', $replacement, $rmatches);
10801
10802
        if ($length === null) {
10803
            $lengthTmp = self::strlen($str, $encoding);
10804
            if ($lengthTmp === false) {
10805
                // e.g.: non mbstring support + invalid chars
10806
                return '';
10807
            }
10808
            $length = (int) $lengthTmp;
10809
        }
10810
10811
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10812
10813
        return \implode('', $smatches[0]);
10814
    }
10815
10816
    /**
10817
     * Removes an suffix ($needle) from end of the string ($haystack).
10818
     *
10819
     * @param string $haystack <p>The string to search in.</p>
10820
     * @param string $needle   <p>The substring to search for.</p>
10821
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10822
     *
10823
     * @return string return the sub-string
10824
     */
10825
    public static function substr_right(
10826
        string $haystack,
10827
        string $needle,
10828
        string $encoding = 'UTF-8'
10829
    ): string {
10830 2
        if ($haystack === '') {
10831 2
            return '';
10832
        }
10833
10834 2
        if ($needle === '') {
10835 2
            return $haystack;
10836
        }
10837
10838
        if (
10839 2
            $encoding === 'UTF-8'
10840
            &&
10841 2
            \substr($haystack, -\strlen($needle)) === $needle
10842
        ) {
10843 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10844
        }
10845
10846 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10847
            return (string) self::substr(
10848
                $haystack,
10849
                0,
10850
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10851
                $encoding
10852
            );
10853
        }
10854
10855 2
        return $haystack;
10856
    }
10857
10858
    /**
10859
     * Returns a case swapped version of the string.
10860
     *
10861
     * @param string $str       <p>The input string.</p>
10862
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10863
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10864
     *
10865
     * @return string each character's case swapped
10866
     */
10867
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10868
    {
10869 6
        if ($str === '') {
10870 1
            return '';
10871
        }
10872
10873 6
        if ($cleanUtf8 === true) {
10874
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10875
            // if invalid characters are found in $haystack before $needle
10876 2
            $str = self::clean($str);
10877
        }
10878
10879 6
        if ($encoding === 'UTF-8') {
10880 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10881
        }
10882
10883 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10884
    }
10885
10886
    /**
10887
     * Checks whether symfony-polyfills are used.
10888
     *
10889
     * @return bool
10890
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10891
     */
10892
    public static function symfony_polyfill_used(): bool
10893
    {
10894
        // init
10895
        $return = false;
10896
10897
        $returnTmp = \extension_loaded('mbstring');
10898
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10899
            $return = true;
10900
        }
10901
10902
        $returnTmp = \extension_loaded('iconv');
10903
        if ($returnTmp === false && \function_exists('iconv')) {
10904
            $return = true;
10905
        }
10906
10907
        return $return;
10908
    }
10909
10910
    /**
10911
     * @param string $str
10912
     * @param int    $tabLength
10913
     *
10914
     * @return string
10915
     */
10916
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10917
    {
10918 6
        if ($tabLength === 4) {
10919 3
            $spaces = '    ';
10920 3
        } elseif ($tabLength === 2) {
10921 1
            $spaces = '  ';
10922
        } else {
10923 2
            $spaces = \str_repeat(' ', $tabLength);
10924
        }
10925
10926 6
        return \str_replace("\t", $spaces, $str);
10927
    }
10928
10929
    /**
10930
     * Converts the first character of each word in the string to uppercase
10931
     * and all other chars to lowercase.
10932
     *
10933
     * @param string      $str                   <p>The input string.</p>
10934
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10935
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10936
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10937
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10938
     *
10939
     * @return string string with all characters of $str being title-cased
10940
     */
10941
    public static function titlecase(
10942
        string $str,
10943
        string $encoding = 'UTF-8',
10944
        bool $cleanUtf8 = false,
10945
        string $lang = null,
10946
        bool $tryToKeepStringLength = false
10947
    ): string {
10948 5
        if ($cleanUtf8 === true) {
10949
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10950
            // if invalid characters are found in $haystack before $needle
10951
            $str = self::clean($str);
10952
        }
10953
10954 5
        if ($lang === null && $tryToKeepStringLength === false) {
10955 5
            if ($encoding === 'UTF-8') {
10956 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10957
            }
10958
10959 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10960
10961 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10962
        }
10963
10964
        return self::str_titleize(
10965
            $str,
10966
            null,
10967
            $encoding,
10968
            false,
10969
            $lang,
10970
            $tryToKeepStringLength,
10971
            false
10972
        );
10973
    }
10974
10975
    /**
10976
     * alias for "UTF8::to_ascii()"
10977
     *
10978
     * @param string $str
10979
     * @param string $subst_chr
10980
     * @param bool   $strict
10981
     *
10982
     * @return string
10983
     *
10984
     * @see UTF8::to_ascii()
10985
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10986
     */
10987
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10988
    {
10989 7
        return self::to_ascii($str, $subst_chr, $strict);
10990
    }
10991
10992
    /**
10993
     * alias for "UTF8::to_iso8859()"
10994
     *
10995
     * @param string|string[] $str
10996
     *
10997
     * @return string|string[]
10998
     *
10999
     * @see UTF8::to_iso8859()
11000
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11001
     */
11002
    public static function toIso8859($str)
11003
    {
11004 2
        return self::to_iso8859($str);
11005
    }
11006
11007
    /**
11008
     * alias for "UTF8::to_latin1()"
11009
     *
11010
     * @param string|string[] $str
11011
     *
11012
     * @return string|string[]
11013
     *
11014
     * @see UTF8::to_latin1()
11015
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11016
     */
11017
    public static function toLatin1($str)
11018
    {
11019 2
        return self::to_latin1($str);
11020
    }
11021
11022
    /**
11023
     * alias for "UTF8::to_utf8()"
11024
     *
11025
     * @param string|string[] $str
11026
     *
11027
     * @return string|string[]
11028
     *
11029
     * @see UTF8::to_utf8()
11030
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11031
     */
11032
    public static function toUTF8($str)
11033
    {
11034 2
        return self::to_utf8($str);
11035
    }
11036
11037
    /**
11038
     * Convert a string into ASCII.
11039
     *
11040
     * @param string $str     <p>The input string.</p>
11041
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11042
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11043
     *                        performance</p>
11044
     *
11045
     * @return string
11046
     */
11047
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11048
    {
11049 38
        static $UTF8_TO_ASCII;
11050
11051 38
        if ($str === '') {
11052 3
            return '';
11053
        }
11054
11055
        // check if we only have ASCII, first (better performance)
11056 35
        if (self::is_ascii($str) === true) {
11057 9
            return $str;
11058
        }
11059
11060 28
        $str = self::clean(
11061 28
            $str,
11062 28
            true,
11063 28
            true,
11064 28
            true,
11065 28
            false,
11066 28
            true,
11067 28
            true
11068
        );
11069
11070
        // check again, if we only have ASCII, now ...
11071 28
        if (self::is_ascii($str) === true) {
11072 10
            return $str;
11073
        }
11074
11075
        if (
11076 19
            $strict === true
11077
            &&
11078 19
            self::$SUPPORT['intl'] === true
11079
        ) {
11080
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11081
            /** @noinspection PhpComposerExtensionStubsInspection */
11082
            /** @noinspection UnnecessaryCastingInspection */
11083 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11084
11085
            // check again, if we only have ASCII, now ...
11086 1
            if (self::is_ascii($str) === true) {
11087 1
                return $str;
11088
            }
11089
        }
11090
11091 19
        if (self::$ORD === null) {
11092
            self::$ORD = self::getData('ord');
11093
        }
11094
11095 19
        \preg_match_all('/.|[^\x00]$/us', $str, $ar);
11096 19
        $chars = $ar[0];
11097 19
        $ord = null;
11098
        /** @noinspection ForeachSourceInspection */
11099 19
        foreach ($chars as &$c) {
11100 19
            $ordC0 = self::$ORD[$c[0]];
11101
11102 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11103 15
                continue;
11104
            }
11105
11106 19
            $ordC1 = self::$ORD[$c[1]];
11107
11108
            // ASCII - next please
11109 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11110 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11111
            }
11112
11113 19
            if ($ordC0 >= 224) {
11114 8
                $ordC2 = self::$ORD[$c[2]];
11115
11116 8
                if ($ordC0 <= 239) {
11117 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11118
                }
11119
11120 8
                if ($ordC0 >= 240) {
11121 2
                    $ordC3 = self::$ORD[$c[3]];
11122
11123 2
                    if ($ordC0 <= 247) {
11124 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11125
                    }
11126
11127 2
                    if ($ordC0 >= 248) {
11128
                        $ordC4 = self::$ORD[$c[4]];
11129
11130
                        if ($ordC0 <= 251) {
11131
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11132
                        }
11133
11134
                        if ($ordC0 >= 252) {
11135
                            $ordC5 = self::$ORD[$c[5]];
11136
11137
                            if ($ordC0 <= 253) {
11138
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11139
                            }
11140
                        }
11141
                    }
11142
                }
11143
            }
11144
11145 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11146
                $c = $unknown;
11147
11148
                continue;
11149
            }
11150
11151 19
            if ($ord === null) {
11152
                $c = $unknown;
11153
11154
                continue;
11155
            }
11156
11157 19
            $bank = $ord >> 8;
11158 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11159 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11160 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11161 2
                    $UTF8_TO_ASCII[$bank] = [];
11162
                }
11163
            }
11164
11165 19
            $newchar = $ord & 255;
11166
11167
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11168 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11169
11170
                // keep for debugging
11171
                /*
11172
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11173
                echo "char: " . $c . "\n";
11174
                echo "ord: " . $ord . "\n";
11175
                echo "newchar: " . $newchar . "\n";
11176
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11177
                echo "bank:" . $bank . "\n\n";
11178
                 */
11179
11180 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11181
            } else {
11182
11183
                // keep for debugging missing chars
11184
                /*
11185
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11186
                echo "char: " . $c . "\n";
11187
                echo "ord: " . $ord . "\n";
11188
                echo "newchar: " . $newchar . "\n";
11189
                echo "bank:" . $bank . "\n\n";
11190
                 */
11191
11192 19
                $c = $unknown;
11193
            }
11194
        }
11195
11196 19
        return \implode('', $chars);
11197
    }
11198
11199
    /**
11200
     * @param mixed $str
11201
     *
11202
     * @return bool
11203
     */
11204
    public static function to_boolean($str): bool
11205
    {
11206
        // init
11207 19
        $str = (string) $str;
11208
11209 19
        if ($str === '') {
11210 2
            return false;
11211
        }
11212
11213
        // Info: http://php.net/manual/en/filter.filters.validate.php
11214
        $map = [
11215 17
            'true'  => true,
11216
            '1'     => true,
11217
            'on'    => true,
11218
            'yes'   => true,
11219
            'false' => false,
11220
            '0'     => false,
11221
            'off'   => false,
11222
            'no'    => false,
11223
        ];
11224
11225 17
        if (isset($map[$str])) {
11226 11
            return $map[$str];
11227
        }
11228
11229 6
        $key = \strtolower($str);
11230 6
        if (isset($map[$key])) {
11231 2
            return $map[$key];
11232
        }
11233
11234 4
        if (\is_numeric($str)) {
11235 2
            return ((float) $str + 0) > 0;
11236
        }
11237
11238 2
        return (bool) \trim($str);
11239
    }
11240
11241
    /**
11242
     * Convert given string to safe filename (and keep string case).
11243
     *
11244
     * @param string $string
11245
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11246
     *                                  simply replaced with hyphen.
11247
     * @param string $fallback_char
11248
     *
11249
     * @return string
11250
     */
11251
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11252
    {
11253 1
        if ($use_transliterate === true) {
11254 1
            $string = self::str_transliterate($string, $fallback_char);
11255
        }
11256
11257 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11258
11259 1
        $string = (string) \preg_replace(
11260
            [
11261 1
                '/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
11262 1
                '/[\\s]+/u',                                             // 2) convert spaces to $fallback_char
11263 1
                '/[' . $fallback_char_escaped . ']+/u',                  // 3) remove double $fallback_char's
11264
            ],
11265
            [
11266 1
                '',
11267 1
                $fallback_char,
11268 1
                $fallback_char,
11269
            ],
11270 1
            $string
11271
        );
11272
11273
        // trim "$fallback_char" from beginning and end of the string
11274 1
        return \trim($string, $fallback_char);
11275
    }
11276
11277
    /**
11278
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11279
     *
11280
     * @param string|string[] $str
11281
     *
11282
     * @return string|string[]
11283
     */
11284
    public static function to_iso8859($str)
11285
    {
11286 8
        if (\is_array($str) === true) {
11287 2
            foreach ($str as $k => &$v) {
11288 2
                $v = self::to_iso8859($v);
11289
            }
11290
11291 2
            return $str;
11292
        }
11293
11294 8
        $str = (string) $str;
11295 8
        if ($str === '') {
11296 2
            return '';
11297
        }
11298
11299 8
        return self::utf8_decode($str);
11300
    }
11301
11302
    /**
11303
     * alias for "UTF8::to_iso8859()"
11304
     *
11305
     * @param string|string[] $str
11306
     *
11307
     * @return string|string[]
11308
     *
11309
     * @see UTF8::to_iso8859()
11310
     */
11311
    public static function to_latin1($str)
11312
    {
11313 2
        return self::to_iso8859($str);
11314
    }
11315
11316
    /**
11317
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11318
     *
11319
     * <ul>
11320
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11321
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11322
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11323
     * case.</li>
11324
     * </ul>
11325
     *
11326
     * @param string|string[] $str                    <p>Any string or array.</p>
11327
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11328
     *
11329
     * @return string|string[] the UTF-8 encoded string
11330
     */
11331
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11332
    {
11333 41
        if (\is_array($str) === true) {
11334 4
            foreach ($str as $k => &$v) {
11335 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11336
            }
11337
11338 4
            return $str;
11339
        }
11340
11341 41
        $str = (string) $str;
11342 41
        if ($str === '') {
11343 6
            return $str;
11344
        }
11345
11346 41
        $max = \strlen($str);
11347 41
        $buf = '';
11348
11349 41
        for ($i = 0; $i < $max; ++$i) {
11350 41
            $c1 = $str[$i];
11351
11352 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11353
11354 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11355
11356 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11357
11358 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11359 20
                        $buf .= $c1 . $c2;
11360 20
                        ++$i;
11361
                    } else { // not valid UTF8 - convert it
11362 34
                        $buf .= self::to_utf8_convert_helper($c1);
11363
                    }
11364 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11365
11366 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11367 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11368
11369 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11370 15
                        $buf .= $c1 . $c2 . $c3;
11371 15
                        $i += 2;
11372
                    } else { // not valid UTF8 - convert it
11373 33
                        $buf .= self::to_utf8_convert_helper($c1);
11374
                    }
11375 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11376
11377 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11378 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11379 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11380
11381 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11382 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11383 8
                        $i += 3;
11384
                    } else { // not valid UTF8 - convert it
11385 26
                        $buf .= self::to_utf8_convert_helper($c1);
11386
                    }
11387
                } else { // doesn't look like UTF8, but should be converted
11388
11389 37
                    $buf .= self::to_utf8_convert_helper($c1);
11390
                }
11391 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11392
11393 4
                $buf .= self::to_utf8_convert_helper($c1);
11394
            } else { // it doesn't need conversion
11395
11396 38
                $buf .= $c1;
11397
            }
11398
        }
11399
11400
        // decode unicode escape sequences + unicode surrogate pairs
11401 41
        $buf = \preg_replace_callback(
11402 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11403
            /**
11404
             * @param array $matches
11405
             *
11406
             * @return string
11407
             */
11408
            static function (array $matches): string {
11409 12
                if (isset($matches[3])) {
11410 12
                    $cp = (int) \hexdec($matches[3]);
11411
                } else {
11412
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11413
                    $cp = ((int) \hexdec($matches[1]) << 10)
11414
                          + (int) \hexdec($matches[2])
11415
                          + 0x10000
11416
                          - (0xD800 << 10)
11417
                          - 0xDC00;
11418
                }
11419
11420
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11421
                //
11422
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11423
11424 12
                if ($cp < 0x80) {
11425 8
                    return (string) self::chr($cp);
11426
                }
11427
11428 9
                if ($cp < 0xA0) {
11429
                    /** @noinspection UnnecessaryCastingInspection */
11430
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11431
                }
11432
11433 9
                return self::decimal_to_chr($cp);
11434 41
            },
11435 41
            $buf
11436
        );
11437
11438 41
        if ($buf === null) {
11439
            return '';
11440
        }
11441
11442
        // decode UTF-8 codepoints
11443 41
        if ($decodeHtmlEntityToUtf8 === true) {
11444 2
            $buf = self::html_entity_decode($buf);
11445
        }
11446
11447 41
        return $buf;
11448
    }
11449
11450
    /**
11451
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11452
     *
11453
     * INFO: This is slower then "trim()"
11454
     *
11455
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11456
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11457
     *
11458
     * @param string      $str   <p>The string to be trimmed</p>
11459
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11460
     *
11461
     * @return string the trimmed string
11462
     */
11463
    public static function trim(string $str = '', string $chars = null): string
11464
    {
11465 55
        if ($str === '') {
11466 9
            return '';
11467
        }
11468
11469 48
        if ($chars) {
11470 27
            $chars = \preg_quote($chars, '/');
11471 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11472
        } else {
11473 21
            $pattern = '^[\\s]+|[\\s]+$';
11474
        }
11475
11476 48
        if (self::$SUPPORT['mbstring'] === true) {
11477
            /** @noinspection PhpComposerExtensionStubsInspection */
11478 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11479
        }
11480
11481 8
        return self::regex_replace($str, $pattern, '', '', '/');
11482
    }
11483
11484
    /**
11485
     * Makes string's first char uppercase.
11486
     *
11487
     * @param string      $str                   <p>The input string.</p>
11488
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11489
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11490
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11491
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11492
     *
11493
     * @return string the resulting string
11494
     */
11495
    public static function ucfirst(
11496
        string $str,
11497
        string $encoding = 'UTF-8',
11498
        bool $cleanUtf8 = false,
11499
        string $lang = null,
11500
        bool $tryToKeepStringLength = false
11501
    ): string {
11502 69
        if ($str === '') {
11503 3
            return '';
11504
        }
11505
11506 68
        if ($cleanUtf8 === true) {
11507
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11508
            // if invalid characters are found in $haystack before $needle
11509 1
            $str = self::clean($str);
11510
        }
11511
11512 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11513
11514 68
        if ($encoding === 'UTF-8') {
11515 22
            $strPartTwo = (string) \mb_substr($str, 1);
11516
11517 22
            if ($useMbFunction === true) {
11518 22
                $strPartOne = \mb_strtoupper(
11519 22
                    (string) \mb_substr($str, 0, 1)
11520
                );
11521
            } else {
11522
                $strPartOne = self::strtoupper(
11523
                    (string) \mb_substr($str, 0, 1),
11524
                    $encoding,
11525
                    false,
11526
                    $lang,
11527 22
                    $tryToKeepStringLength
11528
                );
11529
            }
11530
        } else {
11531 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11532
11533 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11534
11535 47
            if ($useMbFunction === true) {
11536 47
                $strPartOne = \mb_strtoupper(
11537 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11538 47
                    $encoding
11539
                );
11540
            } else {
11541
                $strPartOne = self::strtoupper(
11542
                    (string) self::substr($str, 0, 1, $encoding),
11543
                    $encoding,
11544
                    false,
11545
                    $lang,
11546
                    $tryToKeepStringLength
11547
                );
11548
            }
11549
        }
11550
11551 68
        return $strPartOne . $strPartTwo;
11552
    }
11553
11554
    /**
11555
     * alias for "UTF8::ucfirst()"
11556
     *
11557
     * @param string $str
11558
     * @param string $encoding
11559
     * @param bool   $cleanUtf8
11560
     *
11561
     * @return string
11562
     *
11563
     * @see UTF8::ucfirst()
11564
     */
11565
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11566
    {
11567 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11568
    }
11569
11570
    /**
11571
     * Uppercase for all words in the string.
11572
     *
11573
     * @param string   $str        <p>The input string.</p>
11574
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11575
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11576
     *                             word.</p>
11577
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11578
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11579
     *
11580
     * @return string
11581
     */
11582
    public static function ucwords(
11583
        string $str,
11584
        array $exceptions = [],
11585
        string $charlist = '',
11586
        string $encoding = 'UTF-8',
11587
        bool $cleanUtf8 = false
11588
    ): string {
11589 8
        if (!$str) {
11590 2
            return '';
11591
        }
11592
11593
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11594
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11595
11596 7
        if ($cleanUtf8 === true) {
11597
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11598
            // if invalid characters are found in $haystack before $needle
11599 1
            $str = self::clean($str);
11600
        }
11601
11602 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11603
11604
        if (
11605 7
            $usePhpDefaultFunctions === true
11606
            &&
11607 7
            self::is_ascii($str) === true
11608
        ) {
11609
            return \ucwords($str);
11610
        }
11611
11612 7
        $words = self::str_to_words($str, $charlist);
11613 7
        $useExceptions = \count($exceptions) > 0;
11614
11615 7
        foreach ($words as &$word) {
11616 7
            if (!$word) {
11617 7
                continue;
11618
            }
11619
11620
            if (
11621 7
                $useExceptions === false
11622
                ||
11623 7
                !\in_array($word, $exceptions, true)
11624
            ) {
11625 7
                $word = self::ucfirst($word, $encoding);
11626
            }
11627
        }
11628
11629 7
        return \implode('', $words);
11630
    }
11631
11632
    /**
11633
     * Multi decode html entity & fix urlencoded-win1252-chars.
11634
     *
11635
     * e.g:
11636
     * 'test+test'                     => 'test test'
11637
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11638
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11639
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11640
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11641
     * 'Düsseldorf'                   => 'Düsseldorf'
11642
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11643
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11644
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11645
     *
11646
     * @param string $str          <p>The input string.</p>
11647
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11648
     *
11649
     * @return string
11650
     */
11651
    public static function urldecode(string $str, bool $multi_decode = true): string
11652
    {
11653 4
        if ($str === '') {
11654 3
            return '';
11655
        }
11656
11657
        if (
11658 4
            \strpos($str, '&') === false
11659
            &&
11660 4
            \strpos($str, '%') === false
11661
            &&
11662 4
            \strpos($str, '+') === false
11663
            &&
11664 4
            \strpos($str, '\u') === false
11665
        ) {
11666 3
            return self::fix_simple_utf8($str);
11667
        }
11668
11669 4
        $str = self::urldecode_unicode_helper($str);
11670
11671
        do {
11672 4
            $str_compare = $str;
11673
11674
            /**
11675
             * @psalm-suppress PossiblyInvalidArgument
11676
             */
11677 4
            $str = self::fix_simple_utf8(
11678 4
                \urldecode(
11679 4
                    self::html_entity_decode(
11680 4
                        self::to_utf8($str),
11681 4
                        \ENT_QUOTES | \ENT_HTML5
11682
                    )
11683
                )
11684
            );
11685 4
        } while ($multi_decode === true && $str_compare !== $str);
11686
11687 4
        return $str;
11688
    }
11689
11690
    /**
11691
     * Return a array with "urlencoded"-win1252 -> UTF-8
11692
     *
11693
     * @return string[]
11694
     *
11695
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11696
     */
11697
    public static function urldecode_fix_win1252_chars(): array
11698
    {
11699
        return [
11700 2
            '%20' => ' ',
11701
            '%21' => '!',
11702
            '%22' => '"',
11703
            '%23' => '#',
11704
            '%24' => '$',
11705
            '%25' => '%',
11706
            '%26' => '&',
11707
            '%27' => "'",
11708
            '%28' => '(',
11709
            '%29' => ')',
11710
            '%2A' => '*',
11711
            '%2B' => '+',
11712
            '%2C' => ',',
11713
            '%2D' => '-',
11714
            '%2E' => '.',
11715
            '%2F' => '/',
11716
            '%30' => '0',
11717
            '%31' => '1',
11718
            '%32' => '2',
11719
            '%33' => '3',
11720
            '%34' => '4',
11721
            '%35' => '5',
11722
            '%36' => '6',
11723
            '%37' => '7',
11724
            '%38' => '8',
11725
            '%39' => '9',
11726
            '%3A' => ':',
11727
            '%3B' => ';',
11728
            '%3C' => '<',
11729
            '%3D' => '=',
11730
            '%3E' => '>',
11731
            '%3F' => '?',
11732
            '%40' => '@',
11733
            '%41' => 'A',
11734
            '%42' => 'B',
11735
            '%43' => 'C',
11736
            '%44' => 'D',
11737
            '%45' => 'E',
11738
            '%46' => 'F',
11739
            '%47' => 'G',
11740
            '%48' => 'H',
11741
            '%49' => 'I',
11742
            '%4A' => 'J',
11743
            '%4B' => 'K',
11744
            '%4C' => 'L',
11745
            '%4D' => 'M',
11746
            '%4E' => 'N',
11747
            '%4F' => 'O',
11748
            '%50' => 'P',
11749
            '%51' => 'Q',
11750
            '%52' => 'R',
11751
            '%53' => 'S',
11752
            '%54' => 'T',
11753
            '%55' => 'U',
11754
            '%56' => 'V',
11755
            '%57' => 'W',
11756
            '%58' => 'X',
11757
            '%59' => 'Y',
11758
            '%5A' => 'Z',
11759
            '%5B' => '[',
11760
            '%5C' => '\\',
11761
            '%5D' => ']',
11762
            '%5E' => '^',
11763
            '%5F' => '_',
11764
            '%60' => '`',
11765
            '%61' => 'a',
11766
            '%62' => 'b',
11767
            '%63' => 'c',
11768
            '%64' => 'd',
11769
            '%65' => 'e',
11770
            '%66' => 'f',
11771
            '%67' => 'g',
11772
            '%68' => 'h',
11773
            '%69' => 'i',
11774
            '%6A' => 'j',
11775
            '%6B' => 'k',
11776
            '%6C' => 'l',
11777
            '%6D' => 'm',
11778
            '%6E' => 'n',
11779
            '%6F' => 'o',
11780
            '%70' => 'p',
11781
            '%71' => 'q',
11782
            '%72' => 'r',
11783
            '%73' => 's',
11784
            '%74' => 't',
11785
            '%75' => 'u',
11786
            '%76' => 'v',
11787
            '%77' => 'w',
11788
            '%78' => 'x',
11789
            '%79' => 'y',
11790
            '%7A' => 'z',
11791
            '%7B' => '{',
11792
            '%7C' => '|',
11793
            '%7D' => '}',
11794
            '%7E' => '~',
11795
            '%7F' => '',
11796
            '%80' => '`',
11797
            '%81' => '',
11798
            '%82' => '‚',
11799
            '%83' => 'ƒ',
11800
            '%84' => '„',
11801
            '%85' => '…',
11802
            '%86' => '†',
11803
            '%87' => '‡',
11804
            '%88' => 'ˆ',
11805
            '%89' => '‰',
11806
            '%8A' => 'Š',
11807
            '%8B' => '‹',
11808
            '%8C' => 'Œ',
11809
            '%8D' => '',
11810
            '%8E' => 'Ž',
11811
            '%8F' => '',
11812
            '%90' => '',
11813
            '%91' => '‘',
11814
            '%92' => '’',
11815
            '%93' => '“',
11816
            '%94' => '”',
11817
            '%95' => '•',
11818
            '%96' => '–',
11819
            '%97' => '—',
11820
            '%98' => '˜',
11821
            '%99' => '™',
11822
            '%9A' => 'š',
11823
            '%9B' => '›',
11824
            '%9C' => 'œ',
11825
            '%9D' => '',
11826
            '%9E' => 'ž',
11827
            '%9F' => 'Ÿ',
11828
            '%A0' => '',
11829
            '%A1' => '¡',
11830
            '%A2' => '¢',
11831
            '%A3' => '£',
11832
            '%A4' => '¤',
11833
            '%A5' => '¥',
11834
            '%A6' => '¦',
11835
            '%A7' => '§',
11836
            '%A8' => '¨',
11837
            '%A9' => '©',
11838
            '%AA' => 'ª',
11839
            '%AB' => '«',
11840
            '%AC' => '¬',
11841
            '%AD' => '',
11842
            '%AE' => '®',
11843
            '%AF' => '¯',
11844
            '%B0' => '°',
11845
            '%B1' => '±',
11846
            '%B2' => '²',
11847
            '%B3' => '³',
11848
            '%B4' => '´',
11849
            '%B5' => 'µ',
11850
            '%B6' => '¶',
11851
            '%B7' => '·',
11852
            '%B8' => '¸',
11853
            '%B9' => '¹',
11854
            '%BA' => 'º',
11855
            '%BB' => '»',
11856
            '%BC' => '¼',
11857
            '%BD' => '½',
11858
            '%BE' => '¾',
11859
            '%BF' => '¿',
11860
            '%C0' => 'À',
11861
            '%C1' => 'Á',
11862
            '%C2' => 'Â',
11863
            '%C3' => 'Ã',
11864
            '%C4' => 'Ä',
11865
            '%C5' => 'Å',
11866
            '%C6' => 'Æ',
11867
            '%C7' => 'Ç',
11868
            '%C8' => 'È',
11869
            '%C9' => 'É',
11870
            '%CA' => 'Ê',
11871
            '%CB' => 'Ë',
11872
            '%CC' => 'Ì',
11873
            '%CD' => 'Í',
11874
            '%CE' => 'Î',
11875
            '%CF' => 'Ï',
11876
            '%D0' => 'Ð',
11877
            '%D1' => 'Ñ',
11878
            '%D2' => 'Ò',
11879
            '%D3' => 'Ó',
11880
            '%D4' => 'Ô',
11881
            '%D5' => 'Õ',
11882
            '%D6' => 'Ö',
11883
            '%D7' => '×',
11884
            '%D8' => 'Ø',
11885
            '%D9' => 'Ù',
11886
            '%DA' => 'Ú',
11887
            '%DB' => 'Û',
11888
            '%DC' => 'Ü',
11889
            '%DD' => 'Ý',
11890
            '%DE' => 'Þ',
11891
            '%DF' => 'ß',
11892
            '%E0' => 'à',
11893
            '%E1' => 'á',
11894
            '%E2' => 'â',
11895
            '%E3' => 'ã',
11896
            '%E4' => 'ä',
11897
            '%E5' => 'å',
11898
            '%E6' => 'æ',
11899
            '%E7' => 'ç',
11900
            '%E8' => 'è',
11901
            '%E9' => 'é',
11902
            '%EA' => 'ê',
11903
            '%EB' => 'ë',
11904
            '%EC' => 'ì',
11905
            '%ED' => 'í',
11906
            '%EE' => 'î',
11907
            '%EF' => 'ï',
11908
            '%F0' => 'ð',
11909
            '%F1' => 'ñ',
11910
            '%F2' => 'ò',
11911
            '%F3' => 'ó',
11912
            '%F4' => 'ô',
11913
            '%F5' => 'õ',
11914
            '%F6' => 'ö',
11915
            '%F7' => '÷',
11916
            '%F8' => 'ø',
11917
            '%F9' => 'ù',
11918
            '%FA' => 'ú',
11919
            '%FB' => 'û',
11920
            '%FC' => 'ü',
11921
            '%FD' => 'ý',
11922
            '%FE' => 'þ',
11923
            '%FF' => 'ÿ',
11924
        ];
11925
    }
11926
11927
    /**
11928
     * Decodes an UTF-8 string to ISO-8859-1.
11929
     *
11930
     * @param string $str           <p>The input string.</p>
11931
     * @param bool   $keepUtf8Chars
11932
     *
11933
     * @return string
11934
     */
11935
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11936
    {
11937 14
        if ($str === '') {
11938 6
            return '';
11939
        }
11940
11941
        // save for later comparision
11942 14
        $str_backup = $str;
11943 14
        $len = \strlen($str);
11944
11945 14
        if (self::$ORD === null) {
11946
            self::$ORD = self::getData('ord');
11947
        }
11948
11949 14
        if (self::$CHR === null) {
11950
            self::$CHR = self::getData('chr');
11951
        }
11952
11953 14
        $noCharFound = '?';
11954
        /** @noinspection ForeachInvariantsInspection */
11955 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11956 14
            switch ($str[$i] & "\xF0") {
11957 14
                case "\xC0":
11958 13
                case "\xD0":
11959 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11960 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11961
11962 13
                    break;
11963
11964
                /** @noinspection PhpMissingBreakStatementInspection */
11965 13
                case "\xF0":
11966
                    ++$i;
11967
11968
                // no break
11969
11970 13
                case "\xE0":
11971 11
                    $str[$j] = $noCharFound;
11972 11
                    $i += 2;
11973
11974 11
                    break;
11975
11976
                default:
11977 12
                    $str[$j] = $str[$i];
11978
            }
11979
        }
11980
11981 14
        $return = \substr($str, 0, $j);
11982 14
        if ($return === false) {
11983
            $return = '';
11984
        }
11985
11986
        if (
11987 14
            $keepUtf8Chars === true
11988
            &&
11989 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11990
        ) {
11991 2
            return $str_backup;
11992
        }
11993
11994 14
        return $return;
11995
    }
11996
11997
    /**
11998
     * Encodes an ISO-8859-1 string to UTF-8.
11999
     *
12000
     * @param string $str <p>The input string.</p>
12001
     *
12002
     * @return string
12003
     */
12004
    public static function utf8_encode(string $str): string
12005
    {
12006 14
        if ($str === '') {
12007 14
            return '';
12008
        }
12009
12010 14
        $str = \utf8_encode($str);
12011
12012
        // the polyfill maybe return false
12013
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12014
        /** @psalm-suppress TypeDoesNotContainType */
12015 14
        if ($str === false) {
12016
            return '';
12017
        }
12018
12019 14
        return $str;
12020
    }
12021
12022
    /**
12023
     * fix -> utf8-win1252 chars
12024
     *
12025
     * @param string $str <p>The input string.</p>
12026
     *
12027
     * @return string
12028
     *
12029
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12030
     */
12031
    public static function utf8_fix_win1252_chars(string $str): string
12032
    {
12033 2
        return self::fix_simple_utf8($str);
12034
    }
12035
12036
    /**
12037
     * Returns an array with all utf8 whitespace characters.
12038
     *
12039
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12040
     *
12041
     * @return string[]
12042
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12043
     *                  as defined in above URL
12044
     */
12045
    public static function whitespace_table(): array
12046
    {
12047 2
        return self::$WHITESPACE_TABLE;
12048
    }
12049
12050
    /**
12051
     * Limit the number of words in a string.
12052
     *
12053
     * @param string $str      <p>The input string.</p>
12054
     * @param int    $limit    <p>The limit of words as integer.</p>
12055
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12056
     *
12057
     * @return string
12058
     */
12059
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12060
    {
12061 2
        if ($str === '' || $limit < 1) {
12062 2
            return '';
12063
        }
12064
12065 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12066
12067
        if (
12068 2
            !isset($matches[0])
12069
            ||
12070 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12071
        ) {
12072 2
            return $str;
12073
        }
12074
12075 2
        return \rtrim($matches[0]) . $strAddOn;
12076
    }
12077
12078
    /**
12079
     * Wraps a string to a given number of characters
12080
     *
12081
     * @see http://php.net/manual/en/function.wordwrap.php
12082
     *
12083
     * @param string $str   <p>The input string.</p>
12084
     * @param int    $width [optional] <p>The column width.</p>
12085
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12086
     * @param bool   $cut   [optional] <p>
12087
     *                      If the cut is set to true, the string is
12088
     *                      always wrapped at or before the specified width. So if you have
12089
     *                      a word that is larger than the given width, it is broken apart.
12090
     *                      </p>
12091
     *
12092
     * @return string
12093
     *                <p>The given string wrapped at the specified column.</p>
12094
     */
12095
    public static function wordwrap(
12096
        string $str,
12097
        int $width = 75,
12098
        string $break = "\n",
12099
        bool $cut = false
12100
    ): string {
12101 12
        if ($str === '' || $break === '') {
12102 4
            return '';
12103
        }
12104
12105 10
        $strSplit = \explode($break, $str);
12106 10
        if ($strSplit === false) {
12107
            return '';
12108
        }
12109
12110 10
        $chars = [];
12111 10
        $wordSplit = '';
12112 10
        foreach ($strSplit as $i => $iValue) {
12113 10
            if ($i) {
12114 3
                $chars[] = $break;
12115 3
                $wordSplit .= '#';
12116
            }
12117
12118 10
            foreach (self::str_split($iValue) as $c) {
12119 10
                $chars[] = $c;
12120 10
                if ($c === ' ') {
12121 3
                    $wordSplit .= ' ';
12122
                } else {
12123 10
                    $wordSplit .= '?';
12124
                }
12125
            }
12126
        }
12127
12128 10
        $strReturn = '';
12129 10
        $j = 0;
12130 10
        $b = -1;
12131 10
        $i = -1;
12132 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12133
12134 10
        $max = \mb_strlen($wordSplit);
12135 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
12136 8
            for (++$i; $i < $b; ++$i) {
12137 8
                $strReturn .= $chars[$j];
12138 8
                unset($chars[$j++]);
12139
12140
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12141 8
                if ($i > $max) {
12142
                    break 2;
12143
                }
12144
            }
12145
12146
            if (
12147 8
                $break === $chars[$j]
12148
                ||
12149 8
                $chars[$j] === ' '
12150
            ) {
12151 5
                unset($chars[$j++]);
12152
            }
12153
12154 8
            $strReturn .= $break;
12155
12156
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12157 8
            if ($b > $max) {
12158
                break;
12159
            }
12160
        }
12161
12162 10
        return $strReturn . \implode('', $chars);
12163
    }
12164
12165
    /**
12166
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12167
     *    ... so that we wrap the per line.
12168
     *
12169
     * @param string      $str           <p>The input string.</p>
12170
     * @param int         $width         [optional] <p>The column width.</p>
12171
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
12172
     * @param bool        $cut           [optional] <p>
12173
     *                                   If the cut is set to true, the string is
12174
     *                                   always wrapped at or before the specified width. So if you have
12175
     *                                   a word that is larger than the given width, it is broken apart.
12176
     *                                   </p>
12177
     * @param bool        $addFinalBreak [optional] <p>
12178
     *                                   If this flag is true, then the method will add a $break at the end
12179
     *                                   of the result string.
12180
     *                                   </p>
12181
     * @param string|null $delimiter     [optional] <p>
12182
     *                                   You can change the default behavior, where we split the string by newline.
12183
     *                                   </p>
12184
     *
12185
     * @return string
12186
     */
12187
    public static function wordwrap_per_line(
12188
        string $str,
12189
        int $width = 75,
12190
        string $break = "\n",
12191
        bool $cut = false,
12192
        bool $addFinalBreak = true,
12193
        string $delimiter = null
12194
    ): string {
12195 1
        if ($delimiter === null) {
12196 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12197
        } else {
12198 1
            $strings = \explode($delimiter, $str);
12199
        }
12200
12201 1
        $stringArray = [];
12202 1
        if ($strings !== false) {
12203 1
            foreach ($strings as $value) {
12204 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
12205
            }
12206
        }
12207
12208 1
        if ($addFinalBreak) {
12209 1
            $finalBreak = $break;
12210
        } else {
12211 1
            $finalBreak = '';
12212
        }
12213
12214 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
12215
    }
12216
12217
    /**
12218
     * Returns an array of Unicode White Space characters.
12219
     *
12220
     * @return string[] an array with numeric code point as key and White Space Character as value
12221
     */
12222
    public static function ws(): array
12223
    {
12224 2
        return self::$WHITESPACE;
12225
    }
12226
12227
    /**
12228
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
12229
     *
12230
     * @see http://hsivonen.iki.fi/php-utf8/
12231
     *
12232
     * @param string $str    <p>The string to be checked.</p>
12233
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12234
     *
12235
     * @return bool
12236
     */
12237
    private static function is_utf8_string(string $str, bool $strict = false): bool
12238
    {
12239 108
        if ($str === '') {
12240 14
            return true;
12241
        }
12242
12243 102
        if ($strict === true) {
12244 2
            $isBinary = self::is_binary($str, true);
12245
12246 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
12247 2
                return false;
12248
            }
12249
12250
            if ($isBinary && self::is_utf32($str, false) !== false) {
12251
                return false;
12252
            }
12253
        }
12254
12255 102
        if (self::pcre_utf8_support() !== true) {
12256
            // If even just the first character can be matched, when the /u
12257
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12258
            // invalid, nothing at all will match, even if the string contains
12259
            // some valid sequences
12260
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
12261
        }
12262
12263 102
        $mState = 0; // cached expected number of octets after the current octet
12264
        // until the beginning of the next UTF8 character sequence
12265 102
        $mUcs4 = 0; // cached Unicode character
12266 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12267
12268 102
        if (self::$ORD === null) {
12269
            self::$ORD = self::getData('ord');
12270
        }
12271
12272 102
        $len = \strlen((string) $str);
12273
        /** @noinspection ForeachInvariantsInspection */
12274 102
        for ($i = 0; $i < $len; ++$i) {
12275 102
            $in = self::$ORD[$str[$i]];
12276
12277 102
            if ($mState === 0) {
12278
                // When mState is zero we expect either a US-ASCII character or a
12279
                // multi-octet sequence.
12280 102
                if ((0x80 & $in) === 0) {
12281
                    // US-ASCII, pass straight through.
12282 97
                    $mBytes = 1;
12283 83
                } elseif ((0xE0 & $in) === 0xC0) {
12284
                    // First octet of 2 octet sequence.
12285 73
                    $mUcs4 = $in;
12286 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12287 73
                    $mState = 1;
12288 73
                    $mBytes = 2;
12289 58
                } elseif ((0xF0 & $in) === 0xE0) {
12290
                    // First octet of 3 octet sequence.
12291 42
                    $mUcs4 = $in;
12292 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12293 42
                    $mState = 2;
12294 42
                    $mBytes = 3;
12295 29
                } elseif ((0xF8 & $in) === 0xF0) {
12296
                    // First octet of 4 octet sequence.
12297 18
                    $mUcs4 = $in;
12298 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12299 18
                    $mState = 3;
12300 18
                    $mBytes = 4;
12301 13
                } elseif ((0xFC & $in) === 0xF8) {
12302
                    /* First octet of 5 octet sequence.
12303
                     *
12304
                     * This is illegal because the encoded codepoint must be either
12305
                     * (a) not the shortest form or
12306
                     * (b) outside the Unicode range of 0-0x10FFFF.
12307
                     * Rather than trying to resynchronize, we will carry on until the end
12308
                     * of the sequence and let the later error handling code catch it.
12309
                     */
12310 5
                    $mUcs4 = $in;
12311 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12312 5
                    $mState = 4;
12313 5
                    $mBytes = 5;
12314 10
                } elseif ((0xFE & $in) === 0xFC) {
12315
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12316 5
                    $mUcs4 = $in;
12317 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12318 5
                    $mState = 5;
12319 5
                    $mBytes = 6;
12320
                } else {
12321
                    // Current octet is neither in the US-ASCII range nor a legal first
12322
                    // octet of a multi-octet sequence.
12323 102
                    return false;
12324
                }
12325 83
            } elseif ((0xC0 & $in) === 0x80) {
12326
12327
                // When mState is non-zero, we expect a continuation of the multi-octet
12328
                // sequence
12329
12330
                // Legal continuation.
12331 75
                $shift = ($mState - 1) * 6;
12332 75
                $tmp = $in;
12333 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12334 75
                $mUcs4 |= $tmp;
12335
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12336
                // Unicode code point to be output.
12337 75
                if (--$mState === 0) {
12338
                    // Check for illegal sequences and code points.
12339
                    //
12340
                    // From Unicode 3.1, non-shortest form is illegal
12341
                    if (
12342 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12343
                        ||
12344 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12345
                        ||
12346 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12347
                        ||
12348 75
                        ($mBytes > 4)
12349
                        ||
12350
                        // From Unicode 3.2, surrogate characters are illegal.
12351 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12352
                        ||
12353
                        // Code points outside the Unicode range are illegal.
12354 75
                        ($mUcs4 > 0x10FFFF)
12355
                    ) {
12356 8
                        return false;
12357
                    }
12358
                    // initialize UTF8 cache
12359 75
                    $mState = 0;
12360 75
                    $mUcs4 = 0;
12361 75
                    $mBytes = 1;
12362
                }
12363
            } else {
12364
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12365
                // Incomplete multi-octet sequence.
12366 35
                return false;
12367
            }
12368
        }
12369
12370 67
        return true;
12371
    }
12372
12373
    /**
12374
     * @param string $str
12375
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12376
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12377
     *
12378
     * @return string
12379
     */
12380
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12381
    {
12382 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12383 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12384
12385 33
        if ($useLower === true) {
12386 2
            $str = \str_replace(
12387 2
                $upper,
12388 2
                $lower,
12389 2
                $str
12390
            );
12391
        } else {
12392 31
            $str = \str_replace(
12393 31
                $lower,
12394 31
                $upper,
12395 31
                $str
12396
            );
12397
        }
12398
12399 33
        if ($fullCaseFold) {
12400 31
            static $FULL_CASE_FOLD = null;
12401 31
            if ($FULL_CASE_FOLD === null) {
12402 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12403
            }
12404
12405 31
            if ($useLower === true) {
12406 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12407
            } else {
12408 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12409
            }
12410
        }
12411
12412 33
        return $str;
12413
    }
12414
12415
    /**
12416
     * get data from "/data/*.php"
12417
     *
12418
     * @param string $file
12419
     *
12420
     * @return array
12421
     */
12422
    private static function getData(string $file): array
12423
    {
12424
        /** @noinspection PhpIncludeInspection */
12425
        /** @noinspection UsingInclusionReturnValueInspection */
12426
        /** @psalm-suppress UnresolvableInclude */
12427 6
        return include __DIR__ . '/data/' . $file . '.php';
12428
    }
12429
12430
    /**
12431
     * get data from "/data/*.php"
12432
     *
12433
     * @param string $file
12434
     *
12435
     * @return false|mixed will return false on error
12436
     */
12437
    private static function getDataIfExists(string $file)
12438
    {
12439 9
        $file = __DIR__ . '/data/' . $file . '.php';
12440 9
        if (\file_exists($file)) {
12441
            /** @noinspection PhpIncludeInspection */
12442
            /** @noinspection UsingInclusionReturnValueInspection */
12443 8
            return include $file;
12444
        }
12445
12446 2
        return false;
12447
    }
12448
12449
    /**
12450
     * @return true|null
12451
     */
12452
    private static function initEmojiData()
12453
    {
12454 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12455 1
            if (self::$EMOJI === null) {
12456 1
                self::$EMOJI = self::getData('emoji');
12457
            }
12458
12459 1
            \uksort(
12460 1
                self::$EMOJI,
12461
                static function (string $a, string $b): int {
12462 1
                    return \strlen($b) <=> \strlen($a);
12463 1
                }
12464
            );
12465
12466 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12467 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12468
12469 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12470 1
                $tmpKey = \crc32($key);
12471 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12472
            }
12473
12474 1
            return true;
12475
        }
12476
12477 12
        return null;
12478
    }
12479
12480
    /**
12481
     * Checks whether mbstring "overloaded" is active on the server.
12482
     *
12483
     * @return bool
12484
     */
12485
    private static function mbstring_overloaded(): bool
12486
    {
12487
        /**
12488
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12489
         */
12490
12491
        /** @noinspection PhpComposerExtensionStubsInspection */
12492
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12493
        return \defined('MB_OVERLOAD_STRING')
12494
               &&
12495
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12496
    }
12497
12498
    /**
12499
     * @param array $strings
12500
     * @param bool  $removeEmptyValues
12501
     * @param int   $removeShortValues
12502
     *
12503
     * @return array
12504
     */
12505
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12506
    {
12507
        // init
12508 2
        $return = [];
12509
12510 2
        foreach ($strings as &$str) {
12511
            if (
12512 2
                $removeShortValues !== null
12513
                &&
12514 2
                \mb_strlen($str) <= $removeShortValues
12515
            ) {
12516 2
                continue;
12517
            }
12518
12519
            if (
12520 2
                $removeEmptyValues === true
12521
                &&
12522 2
                \trim($str) === ''
12523
            ) {
12524 2
                continue;
12525
            }
12526
12527 2
            $return[] = $str;
12528
        }
12529
12530 2
        return $return;
12531
    }
12532
12533
    /**
12534
     * rxClass
12535
     *
12536
     * @param string $s
12537
     * @param string $class
12538
     *
12539
     * @return string
12540
     */
12541
    private static function rxClass(string $s, string $class = ''): string
12542
    {
12543 33
        static $RX_CLASS_CACHE = [];
12544
12545 33
        $cacheKey = $s . $class;
12546
12547 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12548 21
            return $RX_CLASS_CACHE[$cacheKey];
12549
        }
12550
12551 16
        $classArray = [$class];
12552
12553
        /** @noinspection SuspiciousLoopInspection */
12554
        /** @noinspection AlterInForeachInspection */
12555 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12556 15
            if ($s === '-') {
12557
                $classArray[0] = '-' . $classArray[0];
12558 15
            } elseif (!isset($s[2])) {
12559 15
                $classArray[0] .= \preg_quote($s, '/');
12560 1
            } elseif (self::strlen($s) === 1) {
12561 1
                $classArray[0] .= $s;
12562
            } else {
12563 15
                $classArray[] = $s;
12564
            }
12565
        }
12566
12567 16
        if ($classArray[0]) {
12568 16
            $classArray[0] = '[' . $classArray[0] . ']';
12569
        }
12570
12571 16
        if (\count($classArray) === 1) {
12572 16
            $return = $classArray[0];
12573
        } else {
12574
            $return = '(?:' . \implode('|', $classArray) . ')';
12575
        }
12576
12577 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12578
12579 16
        return $return;
12580
    }
12581
12582
    /**
12583
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12584
     *
12585
     * @param string $names
12586
     * @param string $delimiter
12587
     * @param string $encoding
12588
     *
12589
     * @return string
12590
     */
12591
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12592
    {
12593
        // init
12594 1
        $namesArray = \explode($delimiter, $names);
12595
12596 1
        if ($namesArray === false) {
12597
            return '';
12598
        }
12599
12600
        $specialCases = [
12601 1
            'names' => [
12602
                'ab',
12603
                'af',
12604
                'al',
12605
                'and',
12606
                'ap',
12607
                'bint',
12608
                'binte',
12609
                'da',
12610
                'de',
12611
                'del',
12612
                'den',
12613
                'der',
12614
                'di',
12615
                'dit',
12616
                'ibn',
12617
                'la',
12618
                'mac',
12619
                'nic',
12620
                'of',
12621
                'ter',
12622
                'the',
12623
                'und',
12624
                'van',
12625
                'von',
12626
                'y',
12627
                'zu',
12628
            ],
12629
            'prefixes' => [
12630
                'al-',
12631
                "d'",
12632
                'ff',
12633
                "l'",
12634
                'mac',
12635
                'mc',
12636
                'nic',
12637
            ],
12638
        ];
12639
12640 1
        foreach ($namesArray as &$name) {
12641 1
            if (\in_array($name, $specialCases['names'], true)) {
12642 1
                continue;
12643
            }
12644
12645 1
            $continue = false;
12646
12647 1
            if ($delimiter === '-') {
12648
                /** @noinspection AlterInForeachInspection */
12649 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12650 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12651 1
                        $continue = true;
12652
                    }
12653
                }
12654
            }
12655
12656
            /** @noinspection AlterInForeachInspection */
12657 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12658 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12659 1
                    $continue = true;
12660
                }
12661
            }
12662
12663 1
            if ($continue === true) {
12664 1
                continue;
12665
            }
12666
12667 1
            $name = self::ucfirst($name);
12668
        }
12669
12670 1
        return \implode($delimiter, $namesArray);
12671
    }
12672
12673
    /**
12674
     * Generic case sensitive transformation for collation matching.
12675
     *
12676
     * @param string $str <p>The input string</p>
12677
     *
12678
     * @return string|null
12679
     */
12680
    private static function strtonatfold(string $str)
12681
    {
12682 6
        return \preg_replace(
12683 6
            '/\p{Mn}+/u',
12684 6
            '',
12685 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12686
        );
12687
    }
12688
12689
    /**
12690
     * @param int|string $input
12691
     *
12692
     * @return string
12693
     */
12694
    private static function to_utf8_convert_helper($input): string
12695
    {
12696
        // init
12697 31
        $buf = '';
12698
12699 31
        if (self::$ORD === null) {
12700 1
            self::$ORD = self::getData('ord');
12701
        }
12702
12703 31
        if (self::$CHR === null) {
12704 1
            self::$CHR = self::getData('chr');
12705
        }
12706
12707 31
        if (self::$WIN1252_TO_UTF8 === null) {
12708 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12709
        }
12710
12711 31
        $ordC1 = self::$ORD[$input];
12712 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12713 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12714
        } else {
12715
            /** @noinspection OffsetOperationsInspection */
12716 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12717 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12718 1
            $buf .= $cc1 . $cc2;
12719
        }
12720
12721 31
        return $buf;
12722
    }
12723
12724
    /**
12725
     * @param string $str
12726
     *
12727
     * @return string
12728
     */
12729
    private static function urldecode_unicode_helper(string $str): string
12730
    {
12731 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12732 9
        if (\preg_match($pattern, $str)) {
12733 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12734
        }
12735
12736 9
        return $str;
12737
    }
12738
}
12739