Passed
Push — master ( 98cca6...4a6bd9 )
by Lars
07:45 queued 30s
created

UTF8::str_upper_first()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 5
dl 0
loc 8
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $EMOJI;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI_VALUES_CACHE;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_KEYS_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $CHR;
234
235
    /**
236
     * __construct()
237
     */
238 32
    public function __construct()
239
    {
240 32
    }
241
242
    /**
243
     * Return the character at the specified position: $str[1] like functionality.
244
     *
245
     * @param string $str      <p>A UTF-8 string.</p>
246
     * @param int    $pos      <p>The position of character to return.</p>
247
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
248
     *
249
     * @return string single multi-byte character
250
     */
251 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
252
    {
253 3
        if ($str === '' || $pos < 0) {
254 2
            return '';
255
        }
256
257 3
        if ($encoding === 'UTF-8') {
258 3
            return (string) \mb_substr($str, $pos, 1);
259
        }
260
261
        return (string) self::substr($str, $pos, 1, $encoding);
262
    }
263
264
    /**
265
     * Prepends UTF-8 BOM character to the string and returns the whole string.
266
     *
267
     * INFO: If BOM already existed there, the Input string is returned.
268
     *
269
     * @param string $str <p>The input string.</p>
270
     *
271
     * @return string the output string that contains BOM
272
     */
273 2
    public static function add_bom_to_string(string $str): string
274
    {
275 2
        if (self::string_has_bom($str) === false) {
276 2
            $str = self::bom() . $str;
277
        }
278
279 2
        return $str;
280
    }
281
282
    /**
283
     * Changes all keys in an array.
284
     *
285
     * @param array  $array    <p>The array to work on</p>
286
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
287
     *                         or <strong>CASE_LOWER</strong> (default)</p>
288
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
289
     *
290
     * @return string[] an array with its keys lower or uppercased
291
     */
292 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
293
    {
294
        if (
295 2
            $case !== \CASE_LOWER
296
            &&
297 2
            $case !== \CASE_UPPER
298
        ) {
299
            $case = \CASE_LOWER;
300
        }
301
302 2
        $return = [];
303 2
        foreach ($array as $key => &$value) {
304 2
            $key = $case === \CASE_LOWER
305 2
                ? self::strtolower((string) $key, $encoding)
306 2
                : self::strtoupper((string) $key, $encoding);
307
308 2
            $return[$key] = $value;
309
        }
310
311 2
        return $return;
312
    }
313
314
    /**
315
     * Returns the substring between $start and $end, if found, or an empty
316
     * string. An optional offset may be supplied from which to begin the
317
     * search for the start string.
318
     *
319
     * @param string $str
320
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
321
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
322
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
324
     *
325
     * @return string
326
     */
327 16
    public static function between(
328
        string $str,
329
        string $start,
330
        string $end,
331
        int $offset = 0,
332
        string $encoding = 'UTF-8'
333
    ): string {
334 16
        if ($encoding === 'UTF-8') {
335 8
            $posStart = \mb_strpos($str, $start, $offset);
336 8
            if ($posStart === false) {
337 1
                return '';
338
            }
339
340 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
341 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
342
            if (
343 7
                $posEnd === false
344
                ||
345 7
                $posEnd === $substrIndex
346
            ) {
347 2
                return '';
348
            }
349
350 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
351
        }
352
353 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
354
355 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
356 8
        if ($posStart === false) {
357 1
            return '';
358
        }
359
360 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
361 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
362
        if (
363 7
            $posEnd === false
364
            ||
365 7
            $posEnd === $substrIndex
366
        ) {
367 2
            return '';
368
        }
369
370 5
        return (string) self::substr(
371 5
            $str,
372 5
            $substrIndex,
373 5
            $posEnd - $substrIndex,
374 5
            $encoding
375
        );
376
    }
377
378
    /**
379
     * Convert binary into an string.
380
     *
381
     * @param mixed $bin 1|0
382
     *
383
     * @return string
384
     */
385 2
    public static function binary_to_str($bin): string
386
    {
387 2
        if (!isset($bin[0])) {
388
            return '';
389
        }
390
391 2
        $convert = \base_convert($bin, 2, 16);
392 2
        if ($convert === '0') {
393 1
            return '';
394
        }
395
396 2
        return \pack('H*', $convert);
397
    }
398
399
    /**
400
     * Returns the UTF-8 Byte Order Mark Character.
401
     *
402
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
     *
404
     * @return string UTF-8 Byte Order Mark
405
     */
406 4
    public static function bom(): string
407
    {
408 4
        return "\xef\xbb\xbf";
409
    }
410
411
    /**
412
     * @alias of UTF8::chr_map()
413
     *
414
     * @see   UTF8::chr_map()
415
     *
416
     * @param array|string $callback
417
     * @param string       $str
418
     *
419
     * @return string[]
420
     */
421 2
    public static function callback($callback, string $str): array
422
    {
423 2
        return self::chr_map($callback, $str);
424
    }
425
426
    /**
427
     * Returns the character at $index, with indexes starting at 0.
428
     *
429
     * @param string $str      <p>The input string.</p>
430
     * @param int    $index    <p>Position of the character.</p>
431
     * @param string $encoding [optional] <p>Default is UTF-8</p>
432
     *
433
     * @return string the character at $index
434
     */
435 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
    {
437 9
        if ($encoding === 'UTF-8') {
438 5
            return (string) \mb_substr($str, $index, 1);
439
        }
440
441 4
        return (string) self::substr($str, $index, 1, $encoding);
442
    }
443
444
    /**
445
     * Returns an array consisting of the characters in the string.
446
     *
447
     * @param string $str <p>The input string.</p>
448
     *
449
     * @return string[] an array of chars
450
     */
451 3
    public static function chars(string $str): array
452
    {
453 3
        return self::str_split($str);
454
    }
455
456
    /**
457
     * This method will auto-detect your server environment for UTF-8 support.
458
     *
459
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
460
     */
461 5
    public static function checkForSupport()
462
    {
463 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
464
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
465
466
            // http://php.net/manual/en/book.mbstring.php
467
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
468
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
469
            if (self::$SUPPORT['mbstring'] === true) {
470
                \mb_internal_encoding('UTF-8');
471
                /** @noinspection UnusedFunctionResultInspection */
472
                /** @noinspection PhpComposerExtensionStubsInspection */
473
                \mb_regex_encoding('UTF-8');
474
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
475
            }
476
477
            // http://php.net/manual/en/book.iconv.php
478
            self::$SUPPORT['iconv'] = self::iconv_loaded();
479
480
            // http://php.net/manual/en/book.intl.php
481
            self::$SUPPORT['intl'] = self::intl_loaded();
482
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
483
484
            if (
485
                self::$SUPPORT['intl'] === true
486
                &&
487
                \function_exists('transliterator_list_ids') === true
488
            ) {
489
                /** @noinspection PhpComposerExtensionStubsInspection */
490
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
491
            }
492
493
            // http://php.net/manual/en/class.intlchar.php
494
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
495
496
            // http://php.net/manual/en/book.ctype.php
497
            self::$SUPPORT['ctype'] = self::ctype_loaded();
498
499
            // http://php.net/manual/en/class.finfo.php
500
            self::$SUPPORT['finfo'] = self::finfo_loaded();
501
502
            // http://php.net/manual/en/book.json.php
503
            self::$SUPPORT['json'] = self::json_loaded();
504
505
            // http://php.net/manual/en/book.pcre.php
506
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
507
508
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
509
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
510
                \mb_internal_encoding('UTF-8');
511
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
512
            }
513
        }
514 5
    }
515
516
    /**
517
     * Generates a UTF-8 encoded character from the given code point.
518
     *
519
     * INFO: opposite to UTF8::ord()
520
     *
521
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
522
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
523
     *
524
     * @return string|null multi-byte character, returns null on failure or empty input
525
     */
526 24
    public static function chr($code_point, string $encoding = 'UTF-8')
527
    {
528
        // init
529 24
        static $CHAR_CACHE = [];
530
531 24
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
532 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
533
        }
534
535
        if (
536 24
            $encoding !== 'UTF-8'
537
            &&
538 24
            $encoding !== 'ISO-8859-1'
539
            &&
540 24
            $encoding !== 'WINDOWS-1252'
541
            &&
542 24
            self::$SUPPORT['mbstring'] === false
543
        ) {
544
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
545
        }
546
547 24
        $cacheKey = $code_point . $encoding;
548 24
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
549 22
            return $CHAR_CACHE[$cacheKey];
550
        }
551
552 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
553
554 12
            if (self::$CHR === null) {
555
                self::$CHR = (array) self::getData('chr');
556
            }
557
558
            /**
559
             * @psalm-suppress PossiblyNullArrayAccess
560
             */
561 12
            $chr = self::$CHR[$code_point];
562
563 12
            if ($encoding !== 'UTF-8') {
564 1
                $chr = self::encode($encoding, $chr);
565
            }
566
567 12
            return $CHAR_CACHE[$cacheKey] = $chr;
568
        }
569
570
        //
571
        // fallback via "IntlChar"
572
        //
573
574 7
        if (self::$SUPPORT['intlChar'] === true) {
575
            /** @noinspection PhpComposerExtensionStubsInspection */
576 7
            $chr = \IntlChar::chr($code_point);
577
578 7
            if ($encoding !== 'UTF-8') {
579
                $chr = self::encode($encoding, $chr);
580
            }
581
582 7
            return $CHAR_CACHE[$cacheKey] = $chr;
583
        }
584
585
        //
586
        // fallback via vanilla php
587
        //
588
589
        if (self::$CHR === null) {
590
            self::$CHR = (array) self::getData('chr');
591
        }
592
593
        $code_point = (int) $code_point;
594
        if ($code_point <= 0x7F) {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[$code_point];
599
        } elseif ($code_point <= 0x7FF) {
600
            /**
601
             * @psalm-suppress PossiblyNullArrayAccess
602
             */
603
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
604
                   self::$CHR[($code_point & 0x3F) + 0x80];
605
        } elseif ($code_point <= 0xFFFF) {
606
            /**
607
             * @psalm-suppress PossiblyNullArrayAccess
608
             */
609
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
610
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
611
                   self::$CHR[($code_point & 0x3F) + 0x80];
612
        } else {
613
            /**
614
             * @psalm-suppress PossiblyNullArrayAccess
615
             */
616
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
617
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
618
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
619
                   self::$CHR[($code_point & 0x3F) + 0x80];
620
        }
621
622
        if ($encoding !== 'UTF-8') {
623
            $chr = self::encode($encoding, $chr);
624
        }
625
626
        return $CHAR_CACHE[$cacheKey] = $chr;
627
    }
628
629
    /**
630
     * Applies callback to all characters of a string.
631
     *
632
     * @param array|string $callback <p>The callback function.</p>
633
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
634
     *
635
     * @return string[] the outcome of callback
636
     */
637 2
    public static function chr_map($callback, string $str): array
638
    {
639 2
        return \array_map(
640 2
            $callback,
641 2
            self::str_split($str)
642
        );
643
    }
644
645
    /**
646
     * Generates an array of byte length of each character of a Unicode string.
647
     *
648
     * 1 byte => U+0000  - U+007F
649
     * 2 byte => U+0080  - U+07FF
650
     * 3 byte => U+0800  - U+FFFF
651
     * 4 byte => U+10000 - U+10FFFF
652
     *
653
     * @param string $str <p>The original unicode string.</p>
654
     *
655
     * @return int[] an array of byte lengths of each character
656
     */
657 4
    public static function chr_size_list(string $str): array
658
    {
659 4
        if ($str === '') {
660 4
            return [];
661
        }
662
663 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
664
            return \array_map(
665
                static function (string $data): int {
666
                    // "mb_" is available if overload is used, so use it ...
667
                    return \mb_strlen($data, 'CP850'); // 8-BIT
668
                },
669
                self::str_split($str)
670
            );
671
        }
672
673 4
        return \array_map('\strlen', self::str_split($str));
674
    }
675
676
    /**
677
     * Get a decimal code representation of a specific character.
678
     *
679
     * @param string $char <p>The input character.</p>
680
     *
681
     * @return int
682
     */
683 4
    public static function chr_to_decimal(string $char): int
684
    {
685 4
        $code = self::ord($char[0]);
686 4
        $bytes = 1;
687
688 4
        if (!($code & 0x80)) {
689
            // 0xxxxxxx
690 4
            return $code;
691
        }
692
693 4
        if (($code & 0xe0) === 0xc0) {
694
            // 110xxxxx
695 4
            $bytes = 2;
696 4
            $code &= ~0xc0;
697 4
        } elseif (($code & 0xf0) === 0xe0) {
698
            // 1110xxxx
699 4
            $bytes = 3;
700 4
            $code &= ~0xe0;
701 2
        } elseif (($code & 0xf8) === 0xf0) {
702
            // 11110xxx
703 2
            $bytes = 4;
704 2
            $code &= ~0xf0;
705
        }
706
707 4
        for ($i = 2; $i <= $bytes; ++$i) {
708
            // 10xxxxxx
709 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
710
        }
711
712 4
        return $code;
713
    }
714
715
    /**
716
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
717
     *
718
     * @param int|string $char <p>The input character</p>
719
     * @param string     $pfix [optional]
720
     *
721
     * @return string The code point encoded as U+xxxx
722
     */
723 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
724
    {
725 2
        if ($char === '') {
726 2
            return '';
727
        }
728
729 2
        if ($char === '&#0;') {
730 2
            $char = '';
731
        }
732
733 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
734
    }
735
736
    /**
737
     * alias for "UTF8::chr_to_decimal()"
738
     *
739
     * @see UTF8::chr_to_decimal()
740
     *
741
     * @param string $chr
742
     *
743
     * @return int
744
     */
745 2
    public static function chr_to_int(string $chr): int
746
    {
747 2
        return self::chr_to_decimal($chr);
748
    }
749
750
    /**
751
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
752
     *
753
     * @param string $body     <p>The original string to be split.</p>
754
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
755
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
756
     *
757
     * @return string the chunked string
758
     */
759 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
760
    {
761 4
        return \implode($end, self::str_split($body, $chunklen));
762
    }
763
764
    /**
765
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
766
     *
767
     * @param string $str                           <p>The string to be sanitized.</p>
768
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
769
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
770
     *                                              whitespace.</p>
771
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
772
     *                                              e.g.: "…"
773
     *                                              => "..."</p>
774
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
775
     *                                              combination with
776
     *                                              $normalize_whitespace</p>
777
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
778
     *                                              mark e.g.: "�"</p>
779
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
780
     *                                              characters e.g.: "\0"</p>
781
     *
782
     * @return string clean UTF-8 encoded string
783
     */
784 113
    public static function clean(
785
        string $str,
786
        bool $remove_bom = false,
787
        bool $normalize_whitespace = false,
788
        bool $normalize_msword = false,
789
        bool $keep_non_breaking_space = false,
790
        bool $replace_diamond_question_mark = false,
791
        bool $remove_invisible_characters = true
792
    ): string {
793
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
794
        // caused connection reset problem on larger strings
795
796 113
        $regx = '/
797
          (
798
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
799
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
800
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
801
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
802
            ){1,100}                      # ...one or more times
803
          )
804
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
805
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
806
        /x';
807 113
        $str = (string) \preg_replace($regx, '$1', $str);
808
809 113
        if ($replace_diamond_question_mark === true) {
810 60
            $str = self::replace_diamond_question_mark($str, '');
811
        }
812
813 113
        if ($remove_invisible_characters === true) {
814 113
            $str = self::remove_invisible_characters($str);
815
        }
816
817 113
        if ($normalize_whitespace === true) {
818 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
819
        }
820
821 113
        if ($normalize_msword === true) {
822 32
            $str = self::normalize_msword($str);
823
        }
824
825 113
        if ($remove_bom === true) {
826 64
            $str = self::remove_bom($str);
827
        }
828
829 113
        return $str;
830
    }
831
832
    /**
833
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
834
     *
835
     * @param string $str <p>The input string.</p>
836
     *
837
     * @return string
838
     */
839 33
    public static function cleanup($str): string
840
    {
841
        // init
842 33
        $str = (string) $str;
843
844 33
        if ($str === '') {
845 5
            return '';
846
        }
847
848
        // fixed ISO <-> UTF-8 Errors
849 33
        $str = self::fix_simple_utf8($str);
850
851
        // remove all none UTF-8 symbols
852
        // && remove diamond question mark (�)
853
        // && remove remove invisible characters (e.g. "\0")
854
        // && remove BOM
855
        // && normalize whitespace chars (but keep non-breaking-spaces)
856 33
        return self::clean(
857 33
            $str,
858 33
            true,
859 33
            true,
860 33
            false,
861 33
            true,
862 33
            true,
863 33
            true
864
        );
865
    }
866
867
    /**
868
     * Accepts a string or a array of strings and returns an array of Unicode code points.
869
     *
870
     * INFO: opposite to UTF8::string()
871
     *
872
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
873
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
874
     *                                 default, code points will be returned as integers.</p>
875
     *
876
     * @return array<int|string>
877
     *                           The array of code points:<br>
878
     *                           array<int> for $u_style === false<br>
879
     *                           array<string> for $u_style === true<br>
880
     */
881 12
    public static function codepoints($arg, bool $u_style = false): array
882
    {
883 12
        if (\is_string($arg) === true) {
884 12
            $arg = self::str_split($arg);
885
        }
886
887 12
        $arg = \array_map(
888
            [
889 12
                self::class,
890
                'ord',
891
            ],
892 12
            $arg
893
        );
894
895 12
        if (\count($arg) === 0) {
896 7
            return [];
897
        }
898
899 11
        if ($u_style === true) {
900 2
            $arg = \array_map(
901
                [
902 2
                    self::class,
903
                    'int_to_hex',
904
                ],
905 2
                $arg
906
            );
907
        }
908
909 11
        return $arg;
910
    }
911
912
    /**
913
     * Trims the string and replaces consecutive whitespace characters with a
914
     * single space. This includes tabs and newline characters, as well as
915
     * multibyte whitespace such as the thin space and ideographic space.
916
     *
917
     * @param string $str <p>The input string.</p>
918
     *
919
     * @return string string with a trimmed $str and condensed whitespace
920
     */
921 13
    public static function collapse_whitespace(string $str): string
922
    {
923 13
        if (self::$SUPPORT['mbstring'] === true) {
924
            /** @noinspection PhpComposerExtensionStubsInspection */
925 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
926
        }
927
928
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
929
    }
930
931
    /**
932
     * Returns count of characters used in a string.
933
     *
934
     * @param string $str                <p>The input string.</p>
935
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
936
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
937
     *
938
     * @return int[] an associative array of Character as keys and
939
     *               their count as values
940
     */
941 19
    public static function count_chars(
942
        string $str,
943
        bool $cleanUtf8 = false,
944
        bool $tryToUseMbFunction = true
945
    ): array {
946 19
        return \array_count_values(
947 19
            self::str_split(
948 19
                $str,
949 19
                1,
950 19
                $cleanUtf8,
951 19
                $tryToUseMbFunction
952
            )
953
        );
954
    }
955
956
    /**
957
     * Remove css media-queries.
958
     *
959
     * @param string $str
960
     *
961
     * @return string
962
     */
963 1
    public static function css_stripe_media_queries(string $str): string
964
    {
965 1
        return (string) \preg_replace(
966 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
967 1
            '',
968 1
            $str
969
        );
970
    }
971
972
    /**
973
     * Checks whether ctype is available on the server.
974
     *
975
     * @return bool
976
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
977
     */
978
    public static function ctype_loaded(): bool
979
    {
980
        return \extension_loaded('ctype');
981
    }
982
983
    /**
984
     * Converts a int-value into an UTF-8 character.
985
     *
986
     * @param mixed $int
987
     *
988
     * @return string
989
     */
990 16
    public static function decimal_to_chr($int): string
991
    {
992 16
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
993
    }
994
995
    /**
996
     * Decodes a MIME header field
997
     *
998
     * @param string $str
999
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1000
     *
1001
     * @return false|string
1002
     *                      A decoded MIME field on success,
1003
     *                      or false if an error occurs during the decoding
1004
     */
1005
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1006
    {
1007
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1008
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1009
        }
1010
1011
        if (self::$SUPPORT['iconv'] === true) {
1012
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1013
        }
1014
1015
        if ($encoding !== 'UTF-8') {
1016
            $str = self::encode($encoding, $str);
1017
        }
1018
1019
        return \mb_decode_mimeheader($str);
1020
    }
1021
1022
    /**
1023
     * Encode a string with a new charset-encoding.
1024
     *
1025
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1026
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1027
     *
1028
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1029
     * @param string $str                    <p>The input string</p>
1030
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1031
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1032
     *                                       string-encoding</p>
1033
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1034
     *                                       A empty string will trigger the autodetect anyway.</p>
1035
     *
1036
     * @return string
1037
     *
1038
     * @psalm-suppress InvalidReturnStatement
1039
     */
1040 28
    public static function encode(
1041
        string $toEncoding,
1042
        string $str,
1043
        bool $autodetectFromEncoding = true,
1044
        string $fromEncoding = ''
1045
    ): string {
1046 28
        if ($str === '' || $toEncoding === '') {
1047 13
            return $str;
1048
        }
1049
1050 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1051 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1052
        }
1053
1054 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1055 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1056
        }
1057
1058
        if (
1059 28
            $toEncoding
1060
            &&
1061 28
            $fromEncoding
1062
            &&
1063 28
            $fromEncoding === $toEncoding
1064
        ) {
1065
            return $str;
1066
        }
1067
1068 28
        if ($toEncoding === 'JSON') {
1069 1
            $return = self::json_encode($str);
1070 1
            if ($return === false) {
1071
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1072
            }
1073
1074 1
            return $return;
1075
        }
1076 28
        if ($fromEncoding === 'JSON') {
1077 1
            $str = self::json_decode($str);
1078 1
            $fromEncoding = '';
1079
        }
1080
1081 28
        if ($toEncoding === 'BASE64') {
1082 2
            return \base64_encode($str);
1083
        }
1084 28
        if ($fromEncoding === 'BASE64') {
1085 2
            $str = \base64_decode($str, true);
1086 2
            $fromEncoding = '';
1087
        }
1088
1089 28
        if ($toEncoding === 'HTML-ENTITIES') {
1090 2
            return self::html_encode($str, true, 'UTF-8');
1091
        }
1092 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1093 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1094 2
            $fromEncoding = '';
1095
        }
1096
1097 28
        $fromEncodingDetected = false;
1098
        if (
1099 28
            $autodetectFromEncoding === true
1100
            ||
1101 28
            !$fromEncoding
1102
        ) {
1103 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1104
        }
1105
1106
        // DEBUG
1107
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1108
1109 28
        if ($fromEncodingDetected !== false) {
1110 24
            $fromEncoding = $fromEncodingDetected;
1111 7
        } elseif ($autodetectFromEncoding === true) {
1112
            // fallback for the "autodetect"-mode
1113 7
            return self::to_utf8($str);
1114
        }
1115
1116
        if (
1117 24
            !$fromEncoding
1118
            ||
1119 24
            $fromEncoding === $toEncoding
1120
        ) {
1121 15
            return $str;
1122
        }
1123
1124
        if (
1125 18
            $toEncoding === 'UTF-8'
1126
            &&
1127
            (
1128 16
                $fromEncoding === 'WINDOWS-1252'
1129
                ||
1130 18
                $fromEncoding === 'ISO-8859-1'
1131
            )
1132
        ) {
1133 13
            return self::to_utf8($str);
1134
        }
1135
1136
        if (
1137 11
            $toEncoding === 'ISO-8859-1'
1138
            &&
1139
            (
1140 6
                $fromEncoding === 'WINDOWS-1252'
1141
                ||
1142 11
                $fromEncoding === 'UTF-8'
1143
            )
1144
        ) {
1145 6
            return self::to_iso8859($str);
1146
        }
1147
1148
        if (
1149 9
            $toEncoding !== 'UTF-8'
1150
            &&
1151 9
            $toEncoding !== 'ISO-8859-1'
1152
            &&
1153 9
            $toEncoding !== 'WINDOWS-1252'
1154
            &&
1155 9
            self::$SUPPORT['mbstring'] === false
1156
        ) {
1157
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1158
        }
1159
1160 9
        if (self::$SUPPORT['mbstring'] === true) {
1161
            // warning: do not use the symfony polyfill here
1162 9
            $strEncoded = \mb_convert_encoding(
1163 9
                $str,
1164 9
                $toEncoding,
1165 9
                $fromEncoding
1166
            );
1167
1168 9
            if ($strEncoded) {
1169 9
                return $strEncoded;
1170
            }
1171
        }
1172
1173
        $return = \iconv($fromEncoding, $toEncoding, $str);
1174
        if ($return !== false) {
1175
            return $return;
1176
        }
1177
1178
        return $str;
1179
    }
1180
1181
    /**
1182
     * @param string $str
1183
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1184
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1185
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1186
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1187
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1188
     *
1189
     * @return false|string
1190
     *                      An encoded MIME field on success,
1191
     *                      or false if an error occurs during the encoding
1192
     */
1193
    public static function encode_mimeheader(
1194
        $str,
1195
        $fromCharset = 'UTF-8',
1196
        $toCharset = 'UTF-8',
1197
        $transferEncoding = 'Q',
1198
        $linefeed = "\r\n",
1199
        $indent = 76
1200
    ) {
1201
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1202
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1203
        }
1204
1205
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1206
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1207
        }
1208
1209
        return \iconv_mime_encode(
1210
            '',
1211
            $str,
1212
            [
1213
                'scheme'           => $transferEncoding,
1214
                'line-length'      => $indent,
1215
                'input-charset'    => $fromCharset,
1216
                'output-charset'   => $toCharset,
1217
                'line-break-chars' => $linefeed,
1218
            ]
1219
        );
1220
    }
1221
1222
    /**
1223
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1224
     *
1225
     * @param string   $str                    <p>The input string.</p>
1226
     * @param string   $search                 <p>The searched string.</p>
1227
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1228
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1229
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1230
     *
1231
     * @return string
1232
     */
1233 1
    public static function extract_text(
1234
        string $str,
1235
        string $search = '',
1236
        int $length = null,
1237
        string $replacerForSkippedText = '…',
1238
        string $encoding = 'UTF-8'
1239
    ): string {
1240 1
        if ($str === '') {
1241 1
            return '';
1242
        }
1243
1244 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1245
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1246
        }
1247
1248 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1249
1250 1
        if ($length === null) {
1251 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1252
        }
1253
1254 1
        if ($search === '') {
1255 1
            if ($encoding === 'UTF-8') {
1256 1
                if ($length > 0) {
1257 1
                    $stringLength = (int) \mb_strlen($str);
1258 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1259
                } else {
1260 1
                    $end = 0;
1261
                }
1262
1263 1
                $pos = (int) \min(
1264 1
                    \mb_strpos($str, ' ', $end),
1265 1
                    \mb_strpos($str, '.', $end)
1266
                );
1267
            } else {
1268
                if ($length > 0) {
1269
                    $stringLength = (int) self::strlen($str, $encoding);
1270
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1271
                } else {
1272
                    $end = 0;
1273
                }
1274
1275
                $pos = (int) \min(
1276
                    self::strpos($str, ' ', $end, $encoding),
1277
                    self::strpos($str, '.', $end, $encoding)
1278
                );
1279
            }
1280
1281 1
            if ($pos) {
1282 1
                if ($encoding === 'UTF-8') {
1283 1
                    $strSub = \mb_substr($str, 0, $pos);
1284
                } else {
1285
                    $strSub = self::substr($str, 0, $pos, $encoding);
1286
                }
1287
1288 1
                if ($strSub === false) {
1289
                    return '';
1290
                }
1291
1292 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1293
            }
1294
1295
            return $str;
1296
        }
1297
1298 1
        if ($encoding === 'UTF-8') {
1299 1
            $wordPos = (int) \mb_stripos($str, $search);
1300 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1301
        } else {
1302
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1303
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1304
        }
1305
1306 1
        $pos_start = 0;
1307 1
        if ($halfSide > 0) {
1308 1
            if ($encoding === 'UTF-8') {
1309 1
                $halfText = \mb_substr($str, 0, $halfSide);
1310
            } else {
1311
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1312
            }
1313 1
            if ($halfText !== false) {
1314 1
                if ($encoding === 'UTF-8') {
1315 1
                    $pos_start = (int) \max(
1316 1
                        \mb_strrpos($halfText, ' '),
1317 1
                        \mb_strrpos($halfText, '.')
1318
                    );
1319
                } else {
1320
                    $pos_start = (int) \max(
1321
                        self::strrpos($halfText, ' ', 0, $encoding),
1322
                        self::strrpos($halfText, '.', 0, $encoding)
1323
                    );
1324
                }
1325
            }
1326
        }
1327
1328 1
        if ($wordPos && $halfSide > 0) {
1329 1
            $offset = $pos_start + $length - 1;
1330 1
            $realLength = (int) self::strlen($str, $encoding);
1331
1332 1
            if ($offset > $realLength) {
1333
                $offset = $realLength;
1334
            }
1335
1336 1
            if ($encoding === 'UTF-8') {
1337 1
                $pos_end = (int) \min(
1338 1
                    \mb_strpos($str, ' ', $offset),
1339 1
                    \mb_strpos($str, '.', $offset)
1340 1
                ) - $pos_start;
1341
            } else {
1342
                $pos_end = (int) \min(
1343
                    self::strpos($str, ' ', $offset, $encoding),
1344
                    self::strpos($str, '.', $offset, $encoding)
1345
                ) - $pos_start;
1346
            }
1347
1348 1
            if (!$pos_end || $pos_end <= 0) {
1349 1
                if ($encoding === 'UTF-8') {
1350 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1351
                } else {
1352
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1353
                }
1354 1
                if ($strSub !== false) {
1355 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1356
                } else {
1357 1
                    $extract = '';
1358
                }
1359
            } else {
1360 1
                if ($encoding === 'UTF-8') {
1361 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1362
                } else {
1363
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1364
                }
1365 1
                if ($strSub !== false) {
1366 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1367
                } else {
1368 1
                    $extract = '';
1369
                }
1370
            }
1371
        } else {
1372 1
            $offset = $length - 1;
1373 1
            $trueLength = (int) self::strlen($str, $encoding);
1374
1375 1
            if ($offset > $trueLength) {
1376
                $offset = $trueLength;
1377
            }
1378
1379 1
            if ($encoding === 'UTF-8') {
1380 1
                $pos_end = (int) \min(
1381 1
                    \mb_strpos($str, ' ', $offset),
1382 1
                    \mb_strpos($str, '.', $offset)
1383
                );
1384
            } else {
1385
                $pos_end = (int) \min(
1386
                    self::strpos($str, ' ', $offset, $encoding),
1387
                    self::strpos($str, '.', $offset, $encoding)
1388
                );
1389
            }
1390
1391 1
            if ($pos_end) {
1392 1
                if ($encoding === 'UTF-8') {
1393 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1394
                } else {
1395
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1396
                }
1397 1
                if ($strSub !== false) {
1398 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1399
                } else {
1400 1
                    $extract = '';
1401
                }
1402
            } else {
1403 1
                $extract = $str;
1404
            }
1405
        }
1406
1407 1
        return $extract;
1408
    }
1409
1410
    /**
1411
     * Reads entire file into a string.
1412
     *
1413
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1414
     *
1415
     * @see http://php.net/manual/en/function.file-get-contents.php
1416
     *
1417
     * @param string        $filename         <p>
1418
     *                                        Name of the file to read.
1419
     *                                        </p>
1420
     * @param bool          $use_include_path [optional] <p>
1421
     *                                        Prior to PHP 5, this parameter is called
1422
     *                                        use_include_path and is a bool.
1423
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1424
     *                                        to trigger include path
1425
     *                                        search.
1426
     *                                        </p>
1427
     * @param resource|null $context          [optional] <p>
1428
     *                                        A valid context resource created with
1429
     *                                        stream_context_create. If you don't need to use a
1430
     *                                        custom context, you can skip this parameter by &null;.
1431
     *                                        </p>
1432
     * @param int|null      $offset           [optional] <p>
1433
     *                                        The offset where the reading starts.
1434
     *                                        </p>
1435
     * @param int|null      $maxLength        [optional] <p>
1436
     *                                        Maximum length of data read. The default is to read until end
1437
     *                                        of file is reached.
1438
     *                                        </p>
1439
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1440
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1441
     *                                        some files, because they used non default utf-8 chars. Binary files
1442
     *                                        like images or pdf will not be converted.</p>
1443
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1444
     *                                        A empty string will trigger the autodetect anyway.</p>
1445
     *
1446
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1447
     */
1448 12
    public static function file_get_contents(
1449
        string $filename,
1450
        bool $use_include_path = false,
1451
        $context = null,
1452
        int $offset = null,
1453
        int $maxLength = null,
1454
        int $timeout = 10,
1455
        bool $convertToUtf8 = true,
1456
        string $fromEncoding = ''
1457
    ) {
1458
        // init
1459 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1460
1461 12
        if ($filename === false) {
1462
            return false;
1463
        }
1464
1465 12
        if ($timeout && $context === null) {
1466 9
            $context = \stream_context_create(
1467
                [
1468
                    'http' => [
1469 9
                        'timeout' => $timeout,
1470
                    ],
1471
                ]
1472
            );
1473
        }
1474
1475 12
        if ($offset === null) {
1476 12
            $offset = 0;
1477
        }
1478
1479 12
        if (\is_int($maxLength) === true) {
1480 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1481
        } else {
1482 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1483
        }
1484
1485
        // return false on error
1486 12
        if ($data === false) {
1487
            return false;
1488
        }
1489
1490 12
        if ($convertToUtf8 === true) {
1491
            if (
1492 12
                self::is_binary($data, true) === true
1493
                &&
1494 12
                self::is_utf16($data, false) === false
1495
                &&
1496 12
                self::is_utf32($data, false) === false
1497 7
            ) {
1498
                // do nothing, it's binary and not UTF16 or UTF32
1499
            } else {
1500 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1501 9
                $data = self::cleanup($data);
1502
            }
1503
        }
1504
1505 12
        return $data;
1506
    }
1507
1508
    /**
1509
     * Checks if a file starts with BOM (Byte Order Mark) character.
1510
     *
1511
     * @param string $file_path <p>Path to a valid file.</p>
1512
     *
1513
     * @throws \RuntimeException if file_get_contents() returned false
1514
     *
1515
     * @return bool
1516
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1517
     */
1518 2
    public static function file_has_bom(string $file_path): bool
1519
    {
1520 2
        $file_content = \file_get_contents($file_path);
1521 2
        if ($file_content === false) {
1522
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1523
        }
1524
1525 2
        return self::string_has_bom($file_content);
1526
    }
1527
1528
    /**
1529
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1530
     *
1531
     * @param mixed  $var
1532
     * @param int    $normalization_form
1533
     * @param string $leading_combining
1534
     *
1535
     * @return mixed
1536
     */
1537 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1538
    {
1539 62
        switch (\gettype($var)) {
1540 62
            case 'array':
1541 6
                foreach ($var as $k => &$v) {
1542 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1543
                }
1544 6
                unset($v);
1545
1546 6
                break;
1547 62
            case 'object':
1548 4
                foreach ($var as $k => &$v) {
1549 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1550
                }
1551 4
                unset($v);
1552
1553 4
                break;
1554 62
            case 'string':
1555
1556 62
                if (\strpos($var, "\r") !== false) {
1557
                    // Workaround https://bugs.php.net/65732
1558 3
                    $var = self::normalize_line_ending($var);
1559
                }
1560
1561 62
                if (self::is_ascii($var) === false) {
1562 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1563 27
                        $n = '-';
1564
                    } else {
1565 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1566
1567 12
                        if (isset($n[0])) {
1568 7
                            $var = $n;
1569
                        } else {
1570 8
                            $var = self::encode('UTF-8', $var, true);
1571
                        }
1572
                    }
1573
1574
                    if (
1575 32
                        $var[0] >= "\x80"
1576
                        &&
1577 32
                        isset($n[0], $leading_combining[0])
1578
                        &&
1579 32
                        \preg_match('/^\p{Mn}/u', $var)
1580
                    ) {
1581
                        // Prevent leading combining chars
1582
                        // for NFC-safe concatenations.
1583 3
                        $var = $leading_combining . $var;
1584
                    }
1585
                }
1586
1587 62
                break;
1588
        }
1589
1590 62
        return $var;
1591
    }
1592
1593
    /**
1594
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1595
     *
1596
     * Gets a specific external variable by name and optionally filters it
1597
     *
1598
     * @see  http://php.net/manual/en/function.filter-input.php
1599
     *
1600
     * @param int    $type          <p>
1601
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1602
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1603
     *                              <b>INPUT_ENV</b>.
1604
     *                              </p>
1605
     * @param string $variable_name <p>
1606
     *                              Name of a variable to get.
1607
     *                              </p>
1608
     * @param int    $filter        [optional] <p>
1609
     *                              The ID of the filter to apply. The
1610
     *                              manual page lists the available filters.
1611
     *                              </p>
1612
     * @param mixed  $options       [optional] <p>
1613
     *                              Associative array of options or bitwise disjunction of flags. If filter
1614
     *                              accepts options, flags can be provided in "flags" field of array.
1615
     *                              </p>
1616
     *
1617
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1618
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1619
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1620
     */
1621
    public static function filter_input(
1622
        int $type,
1623
        string $variable_name,
1624
        int $filter = \FILTER_DEFAULT,
1625
        $options = null
1626
    ) {
1627
        if (\func_num_args() < 4) {
1628
            $var = \filter_input($type, $variable_name, $filter);
1629
        } else {
1630
            $var = \filter_input($type, $variable_name, $filter, $options);
1631
        }
1632
1633
        return self::filter($var);
1634
    }
1635
1636
    /**
1637
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1638
     *
1639
     * Gets external variables and optionally filters them
1640
     *
1641
     * @see  http://php.net/manual/en/function.filter-input-array.php
1642
     *
1643
     * @param int   $type       <p>
1644
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1645
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1646
     *                          <b>INPUT_ENV</b>.
1647
     *                          </p>
1648
     * @param mixed $definition [optional] <p>
1649
     *                          An array defining the arguments. A valid key is a string
1650
     *                          containing a variable name and a valid value is either a filter type, or an array
1651
     *                          optionally specifying the filter, flags and options. If the value is an
1652
     *                          array, valid keys are filter which specifies the
1653
     *                          filter type,
1654
     *                          flags which specifies any flags that apply to the
1655
     *                          filter, and options which specifies any options that
1656
     *                          apply to the filter. See the example below for a better understanding.
1657
     *                          </p>
1658
     *                          <p>
1659
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1660
     *                          input array are filtered by this filter.
1661
     *                          </p>
1662
     * @param bool  $add_empty  [optional] <p>
1663
     *                          Add missing keys as <b>NULL</b> to the return value.
1664
     *                          </p>
1665
     *
1666
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1667
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1668
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1669
     *               is not set and <b>NULL</b> if the filter fails.
1670
     */
1671
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1672
    {
1673
        if (\func_num_args() < 2) {
1674
            $a = \filter_input_array($type);
1675
        } else {
1676
            $a = \filter_input_array($type, $definition, $add_empty);
1677
        }
1678
1679
        return self::filter($a);
1680
    }
1681
1682
    /**
1683
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1684
     *
1685
     * Filters a variable with a specified filter
1686
     *
1687
     * @see  http://php.net/manual/en/function.filter-var.php
1688
     *
1689
     * @param mixed $variable <p>
1690
     *                        Value to filter.
1691
     *                        </p>
1692
     * @param int   $filter   [optional] <p>
1693
     *                        The ID of the filter to apply. The
1694
     *                        manual page lists the available filters.
1695
     *                        </p>
1696
     * @param mixed $options  [optional] <p>
1697
     *                        Associative array of options or bitwise disjunction of flags. If filter
1698
     *                        accepts options, flags can be provided in "flags" field of array. For
1699
     *                        the "callback" filter, callable type should be passed. The
1700
     *                        callback must accept one argument, the value to be filtered, and return
1701
     *                        the value after filtering/sanitizing it.
1702
     *                        </p>
1703
     *                        <p>
1704
     *                        <code>
1705
     *                        // for filters that accept options, use this format
1706
     *                        $options = array(
1707
     *                        'options' => array(
1708
     *                        'default' => 3, // value to return if the filter fails
1709
     *                        // other options here
1710
     *                        'min_range' => 0
1711
     *                        ),
1712
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1713
     *                        );
1714
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1715
     *                        // for filter that only accept flags, you can pass them directly
1716
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1717
     *                        // for filter that only accept flags, you can also pass as an array
1718
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1719
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1720
     *                        // callback validate filter
1721
     *                        function foo($value)
1722
     *                        {
1723
     *                        // Expected format: Surname, GivenNames
1724
     *                        if (strpos($value, ", ") === false) return false;
1725
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1726
     *                        $empty = (empty($surname) || empty($givennames));
1727
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1728
     *                        if ($empty || $notstrings) {
1729
     *                        return false;
1730
     *                        } else {
1731
     *                        return $value;
1732
     *                        }
1733
     *                        }
1734
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1735
     *                        </code>
1736
     *                        </p>
1737
     *
1738
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1739
     */
1740 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1741
    {
1742 2
        if (\func_num_args() < 3) {
1743 2
            $variable = \filter_var($variable, $filter);
1744
        } else {
1745 2
            $variable = \filter_var($variable, $filter, $options);
1746
        }
1747
1748 2
        return self::filter($variable);
1749
    }
1750
1751
    /**
1752
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1753
     *
1754
     * Gets multiple variables and optionally filters them
1755
     *
1756
     * @see  http://php.net/manual/en/function.filter-var-array.php
1757
     *
1758
     * @param array $data       <p>
1759
     *                          An array with string keys containing the data to filter.
1760
     *                          </p>
1761
     * @param mixed $definition [optional] <p>
1762
     *                          An array defining the arguments. A valid key is a string
1763
     *                          containing a variable name and a valid value is either a
1764
     *                          filter type, or an
1765
     *                          array optionally specifying the filter, flags and options.
1766
     *                          If the value is an array, valid keys are filter
1767
     *                          which specifies the filter type,
1768
     *                          flags which specifies any flags that apply to the
1769
     *                          filter, and options which specifies any options that
1770
     *                          apply to the filter. See the example below for a better understanding.
1771
     *                          </p>
1772
     *                          <p>
1773
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1774
     *                          input array are filtered by this filter.
1775
     *                          </p>
1776
     * @param bool  $add_empty  [optional] <p>
1777
     *                          Add missing keys as <b>NULL</b> to the return value.
1778
     *                          </p>
1779
     *
1780
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1781
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1782
     *               set
1783
     */
1784 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1785
    {
1786 2
        if (\func_num_args() < 2) {
1787 2
            $a = \filter_var_array($data);
1788
        } else {
1789 2
            $a = \filter_var_array($data, $definition, $add_empty);
1790
        }
1791
1792 2
        return self::filter($a);
1793
    }
1794
1795
    /**
1796
     * Checks whether finfo is available on the server.
1797
     *
1798
     * @return bool
1799
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1800
     */
1801
    public static function finfo_loaded(): bool
1802
    {
1803
        return \class_exists('finfo');
1804
    }
1805
1806
    /**
1807
     * Returns the first $n characters of the string.
1808
     *
1809
     * @param string $str      <p>The input string.</p>
1810
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1811
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1812
     *
1813
     * @return string
1814
     */
1815 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1816
    {
1817 13
        if ($str === '' || $n <= 0) {
1818 5
            return '';
1819
        }
1820
1821 8
        if ($encoding === 'UTF-8') {
1822 4
            return (string) \mb_substr($str, 0, $n);
1823
        }
1824
1825 4
        return (string) self::substr($str, 0, $n, $encoding);
1826
    }
1827
1828
    /**
1829
     * Check if the number of unicode characters are not more than the specified integer.
1830
     *
1831
     * @param string $str      the original string to be checked
1832
     * @param int    $box_size the size in number of chars to be checked against string
1833
     *
1834
     * @return bool true if string is less than or equal to $box_size, false otherwise
1835
     */
1836 2
    public static function fits_inside(string $str, int $box_size): bool
1837
    {
1838 2
        return self::strlen($str) <= $box_size;
1839
    }
1840
1841
    /**
1842
     * Try to fix simple broken UTF-8 strings.
1843
     *
1844
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1845
     *
1846
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1847
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1848
     * See: http://en.wikipedia.org/wiki/Windows-1252
1849
     *
1850
     * @param string $str <p>The input string</p>
1851
     *
1852
     * @return string
1853
     */
1854 42
    public static function fix_simple_utf8(string $str): string
1855
    {
1856 42
        if ($str === '') {
1857 4
            return '';
1858
        }
1859
1860 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1861 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1862
1863 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1864 1
            if (self::$BROKEN_UTF8_FIX === null) {
1865 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1866
            }
1867
1868 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1869 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1870
        }
1871
1872 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1873
    }
1874
1875
    /**
1876
     * Fix a double (or multiple) encoded UTF8 string.
1877
     *
1878
     * @param string|string[] $str you can use a string or an array of strings
1879
     *
1880
     * @return string|string[]
1881
     *                         Will return the fixed input-"array" or
1882
     *                         the fixed input-"string"
1883
     *
1884
     * @psalm-suppress InvalidReturnType
1885
     */
1886 2
    public static function fix_utf8($str)
1887
    {
1888 2
        if (\is_array($str) === true) {
1889 2
            foreach ($str as $k => &$v) {
1890 2
                $v = self::fix_utf8($v);
1891
            }
1892 2
            unset($v);
1893
1894
            /**
1895
             * @psalm-suppress InvalidReturnStatement
1896
             */
1897 2
            return $str;
1898
        }
1899
1900 2
        $str = (string) $str;
1901 2
        $last = '';
1902 2
        while ($last !== $str) {
1903 2
            $last = $str;
1904
            /**
1905
             * @psalm-suppress PossiblyInvalidArgument
1906
             */
1907 2
            $str = self::to_utf8(
1908 2
                self::utf8_decode($str, true)
1909
            );
1910
        }
1911
1912
        /**
1913
         * @psalm-suppress InvalidReturnStatement
1914
         */
1915 2
        return $str;
1916
    }
1917
1918
    /**
1919
     * Get character of a specific character.
1920
     *
1921
     * @param string $char
1922
     *
1923
     * @return string 'RTL' or 'LTR'
1924
     */
1925 2
    public static function getCharDirection(string $char): string
1926
    {
1927 2
        if (self::$SUPPORT['intlChar'] === true) {
1928
            /** @noinspection PhpComposerExtensionStubsInspection */
1929 2
            $tmpReturn = \IntlChar::charDirection($char);
1930
1931
            // from "IntlChar"-Class
1932
            $charDirection = [
1933 2
                'RTL' => [1, 13, 14, 15, 21],
1934
                'LTR' => [0, 11, 12, 20],
1935
            ];
1936
1937 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1938
                return 'LTR';
1939
            }
1940
1941 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1942 2
                return 'RTL';
1943
            }
1944
        }
1945
1946 2
        $c = static::chr_to_decimal($char);
1947
1948 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1949 2
            return 'LTR';
1950
        }
1951
1952 2
        if ($c <= 0x85e) {
1953 2
            if ($c === 0x5be ||
1954 2
                $c === 0x5c0 ||
1955 2
                $c === 0x5c3 ||
1956 2
                $c === 0x5c6 ||
1957 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1958 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1959 2
                $c === 0x608 ||
1960 2
                $c === 0x60b ||
1961 2
                $c === 0x60d ||
1962 2
                $c === 0x61b ||
1963 2
                ($c >= 0x61e && $c <= 0x64a) ||
1964
                ($c >= 0x66d && $c <= 0x66f) ||
1965
                ($c >= 0x671 && $c <= 0x6d5) ||
1966
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1967
                ($c >= 0x6ee && $c <= 0x6ef) ||
1968
                ($c >= 0x6fa && $c <= 0x70d) ||
1969
                $c === 0x710 ||
1970
                ($c >= 0x712 && $c <= 0x72f) ||
1971
                ($c >= 0x74d && $c <= 0x7a5) ||
1972
                $c === 0x7b1 ||
1973
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1974
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1975
                $c === 0x7fa ||
1976
                ($c >= 0x800 && $c <= 0x815) ||
1977
                $c === 0x81a ||
1978
                $c === 0x824 ||
1979
                $c === 0x828 ||
1980
                ($c >= 0x830 && $c <= 0x83e) ||
1981
                ($c >= 0x840 && $c <= 0x858) ||
1982 2
                $c === 0x85e
1983
            ) {
1984 2
                return 'RTL';
1985
            }
1986 2
        } elseif ($c === 0x200f) {
1987
            return 'RTL';
1988 2
        } elseif ($c >= 0xfb1d) {
1989 2
            if ($c === 0xfb1d ||
1990 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1991 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1992 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1993 2
                $c === 0xfb3e ||
1994 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1995 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1996 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1997 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1998 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1999 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2000 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2001 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2002 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2003 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2004 2
                $c === 0x10808 ||
2005 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2006 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2007 2
                $c === 0x1083c ||
2008 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2009 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2010 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2011 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2012 2
                $c === 0x1093f ||
2013 2
                $c === 0x10a00 ||
2014 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2015 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2016 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2017 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2018 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2019 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2020 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2021 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2022 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2023 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2024
            ) {
2025 2
                return 'RTL';
2026
            }
2027
        }
2028
2029 2
        return 'LTR';
2030
    }
2031
2032
    /**
2033
     * Check for php-support.
2034
     *
2035
     * @param string|null $key
2036
     *
2037
     * @return mixed
2038
     *               Return the full support-"array", if $key === null<br>
2039
     *               return bool-value, if $key is used and available<br>
2040
     *               otherwise return <strong>null</strong>
2041
     */
2042 27
    public static function getSupportInfo(string $key = null)
2043
    {
2044 27
        if ($key === null) {
2045 4
            return self::$SUPPORT;
2046
        }
2047
2048 25
        if (!isset(self::$SUPPORT[$key])) {
2049 2
            return null;
2050
        }
2051
2052 23
        return self::$SUPPORT[$key];
2053
    }
2054
2055
    /**
2056
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2057
     *          if you need more supported types, please use e.g. "finfo"
2058
     *
2059
     * @param string $str
2060
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2061
     *
2062
     * @return array
2063
     *               with this keys: 'ext', 'mime', 'type'
2064
     */
2065 39
    public static function get_file_type(
2066
        string $str,
2067
        array $fallback = [
2068
            'ext'  => null,
2069
            'mime' => 'application/octet-stream',
2070
            'type' => null,
2071
        ]
2072
    ): array {
2073 39
        if ($str === '') {
2074
            return $fallback;
2075
        }
2076
2077 39
        $str_info = \substr($str, 0, 2);
2078 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2079 11
            return $fallback;
2080
        }
2081
2082 35
        $str_info = \unpack('C2chars', $str_info);
2083 35
        if ($str_info === false) {
2084
            return $fallback;
2085
        }
2086 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2087
2088
        // DEBUG
2089
        //var_dump($type_code);
2090
2091
        switch ($type_code) {
2092 35
            case 3780:
2093 5
                $ext = 'pdf';
2094 5
                $mime = 'application/pdf';
2095 5
                $type = 'binary';
2096
2097 5
                break;
2098 35
            case 7790:
2099
                $ext = 'exe';
2100
                $mime = 'application/octet-stream';
2101
                $type = 'binary';
2102
2103
                break;
2104 35
            case 7784:
2105
                $ext = 'midi';
2106
                $mime = 'audio/x-midi';
2107
                $type = 'binary';
2108
2109
                break;
2110 35
            case 8075:
2111 7
                $ext = 'zip';
2112 7
                $mime = 'application/zip';
2113 7
                $type = 'binary';
2114
2115 7
                break;
2116 35
            case 8297:
2117
                $ext = 'rar';
2118
                $mime = 'application/rar';
2119
                $type = 'binary';
2120
2121
                break;
2122 35
            case 255216:
2123
                $ext = 'jpg';
2124
                $mime = 'image/jpeg';
2125
                $type = 'binary';
2126
2127
                break;
2128 35
            case 7173:
2129
                $ext = 'gif';
2130
                $mime = 'image/gif';
2131
                $type = 'binary';
2132
2133
                break;
2134 35
            case 6677:
2135
                $ext = 'bmp';
2136
                $mime = 'image/bmp';
2137
                $type = 'binary';
2138
2139
                break;
2140 35
            case 13780:
2141 7
                $ext = 'png';
2142 7
                $mime = 'image/png';
2143 7
                $type = 'binary';
2144
2145 7
                break;
2146
            default:
2147 32
                return $fallback;
2148
        }
2149
2150
        return [
2151 7
            'ext'  => $ext,
2152 7
            'mime' => $mime,
2153 7
            'type' => $type,
2154
        ];
2155
    }
2156
2157
    /**
2158
     * @param int    $length        <p>Length of the random string.</p>
2159
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2160
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2161
     *
2162
     * @return string
2163
     */
2164 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2165
    {
2166
        // init
2167 1
        $i = 0;
2168 1
        $str = '';
2169
2170
        //
2171
        // add random chars
2172
        //
2173
2174 1
        if ($encoding === 'UTF-8') {
2175 1
            $maxlength = (int) \mb_strlen($possibleChars);
2176 1
            if ($maxlength === 0) {
2177 1
                return '';
2178
            }
2179
2180 1
            while ($i < $length) {
2181
                try {
2182 1
                    $randInt = \random_int(0, $maxlength - 1);
2183
                } catch (\Exception $e) {
2184
                    /** @noinspection RandomApiMigrationInspection */
2185
                    $randInt = \mt_rand(0, $maxlength - 1);
2186
                }
2187 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2188 1
                if ($char !== false) {
2189 1
                    $str .= $char;
2190 1
                    ++$i;
2191
                }
2192
            }
2193
        } else {
2194
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2195
2196
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2197
            if ($maxlength === 0) {
2198
                return '';
2199
            }
2200
2201
            while ($i < $length) {
2202
                try {
2203
                    $randInt = \random_int(0, $maxlength - 1);
2204
                } catch (\Exception $e) {
2205
                    /** @noinspection RandomApiMigrationInspection */
2206
                    $randInt = \mt_rand(0, $maxlength - 1);
2207
                }
2208
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2209
                if ($char !== false) {
2210
                    $str .= $char;
2211
                    ++$i;
2212
                }
2213
            }
2214
        }
2215
2216 1
        return $str;
2217
    }
2218
2219
    /**
2220
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2221
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2222
     *
2223
     * @return string
2224
     */
2225 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2226
    {
2227 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2228 1
                        \session_id() .
2229 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2230 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2231 1
                        $entropyExtra;
2232
2233 1
        $uniqueString = \uniqid($uniqueHelper, true);
2234
2235 1
        if ($md5) {
2236 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2237
        }
2238
2239 1
        return $uniqueString;
2240
    }
2241
2242
    /**
2243
     * alias for "UTF8::string_has_bom()"
2244
     *
2245
     * @see        UTF8::string_has_bom()
2246
     *
2247
     * @param string $str
2248
     *
2249
     * @return bool
2250
     *
2251
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2252
     */
2253 2
    public static function hasBom(string $str): bool
2254
    {
2255 2
        return self::string_has_bom($str);
2256
    }
2257
2258
    /**
2259
     * Returns true if the string contains a lower case char, false otherwise.
2260
     *
2261
     * @param string $str <p>The input string.</p>
2262
     *
2263
     * @return bool whether or not the string contains a lower case character
2264
     */
2265 47
    public static function has_lowercase(string $str): bool
2266
    {
2267 47
        if (self::$SUPPORT['mbstring'] === true) {
2268
            /** @noinspection PhpComposerExtensionStubsInspection */
2269 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2270
        }
2271
2272
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2273
    }
2274
2275
    /**
2276
     * Returns true if the string contains an upper case char, false otherwise.
2277
     *
2278
     * @param string $str <p>The input string.</p>
2279
     *
2280
     * @return bool whether or not the string contains an upper case character
2281
     */
2282 12
    public static function has_uppercase(string $str): bool
2283
    {
2284 12
        if (self::$SUPPORT['mbstring'] === true) {
2285
            /** @noinspection PhpComposerExtensionStubsInspection */
2286 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2287
        }
2288
2289
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2290
    }
2291
2292
    /**
2293
     * Converts a hexadecimal-value into an UTF-8 character.
2294
     *
2295
     * @param string $hexdec <p>The hexadecimal value.</p>
2296
     *
2297
     * @return false|string one single UTF-8 character
2298
     */
2299 4
    public static function hex_to_chr(string $hexdec)
2300
    {
2301 4
        return self::decimal_to_chr(\hexdec($hexdec));
2302
    }
2303
2304
    /**
2305
     * Converts hexadecimal U+xxxx code point representation to integer.
2306
     *
2307
     * INFO: opposite to UTF8::int_to_hex()
2308
     *
2309
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2310
     *
2311
     * @return false|int the code point, or false on failure
2312
     */
2313 2
    public static function hex_to_int($hexDec)
2314
    {
2315
        // init
2316 2
        $hexDec = (string) $hexDec;
2317
2318 2
        if ($hexDec === '') {
2319 2
            return false;
2320
        }
2321
2322 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2323 2
            return \intval($match[1], 16);
2324
        }
2325
2326 2
        return false;
2327
    }
2328
2329
    /**
2330
     * alias for "UTF8::html_entity_decode()"
2331
     *
2332
     * @see UTF8::html_entity_decode()
2333
     *
2334
     * @param string $str
2335
     * @param int    $flags
2336
     * @param string $encoding
2337
     *
2338
     * @return string
2339
     */
2340 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2341
    {
2342 4
        return self::html_entity_decode($str, $flags, $encoding);
2343
    }
2344
2345
    /**
2346
     * Converts a UTF-8 string to a series of HTML numbered entities.
2347
     *
2348
     * INFO: opposite to UTF8::html_decode()
2349
     *
2350
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2351
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2352
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2353
     *
2354
     * @return string HTML numbered entities
2355
     */
2356 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2357
    {
2358 13
        if ($str === '') {
2359 4
            return '';
2360
        }
2361
2362 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2363 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2364
        }
2365
2366
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2367 13
        if (self::$SUPPORT['mbstring'] === true) {
2368 13
            $startCode = 0x00;
2369 13
            if ($keepAsciiChars === true) {
2370 13
                $startCode = 0x80;
2371
            }
2372
2373 13
            if ($encoding === 'UTF-8') {
2374 13
                return \mb_encode_numericentity(
2375 13
                    $str,
2376 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2377
                );
2378
            }
2379
2380 4
            return \mb_encode_numericentity(
2381 4
                $str,
2382 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2383 4
                $encoding
2384
            );
2385
        }
2386
2387
        //
2388
        // fallback via vanilla php
2389
        //
2390
2391
        return \implode(
2392
            '',
2393
            \array_map(
2394
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2395
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2396
                },
2397
                self::str_split($str)
2398
            )
2399
        );
2400
    }
2401
2402
    /**
2403
     * UTF-8 version of html_entity_decode()
2404
     *
2405
     * The reason we are not using html_entity_decode() by itself is because
2406
     * while it is not technically correct to leave out the semicolon
2407
     * at the end of an entity most browsers will still interpret the entity
2408
     * correctly. html_entity_decode() does not convert entities without
2409
     * semicolons, so we are left with our own little solution here. Bummer.
2410
     *
2411
     * Convert all HTML entities to their applicable characters
2412
     *
2413
     * INFO: opposite to UTF8::html_encode()
2414
     *
2415
     * @see http://php.net/manual/en/function.html-entity-decode.php
2416
     *
2417
     * @param string $str      <p>
2418
     *                         The input string.
2419
     *                         </p>
2420
     * @param int    $flags    [optional] <p>
2421
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2422
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2423
     *                         <table>
2424
     *                         Available <i>flags</i> constants
2425
     *                         <tr valign="top">
2426
     *                         <td>Constant Name</td>
2427
     *                         <td>Description</td>
2428
     *                         </tr>
2429
     *                         <tr valign="top">
2430
     *                         <td><b>ENT_COMPAT</b></td>
2431
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2432
     *                         </tr>
2433
     *                         <tr valign="top">
2434
     *                         <td><b>ENT_QUOTES</b></td>
2435
     *                         <td>Will convert both double and single quotes.</td>
2436
     *                         </tr>
2437
     *                         <tr valign="top">
2438
     *                         <td><b>ENT_NOQUOTES</b></td>
2439
     *                         <td>Will leave both double and single quotes unconverted.</td>
2440
     *                         </tr>
2441
     *                         <tr valign="top">
2442
     *                         <td><b>ENT_HTML401</b></td>
2443
     *                         <td>
2444
     *                         Handle code as HTML 4.01.
2445
     *                         </td>
2446
     *                         </tr>
2447
     *                         <tr valign="top">
2448
     *                         <td><b>ENT_XML1</b></td>
2449
     *                         <td>
2450
     *                         Handle code as XML 1.
2451
     *                         </td>
2452
     *                         </tr>
2453
     *                         <tr valign="top">
2454
     *                         <td><b>ENT_XHTML</b></td>
2455
     *                         <td>
2456
     *                         Handle code as XHTML.
2457
     *                         </td>
2458
     *                         </tr>
2459
     *                         <tr valign="top">
2460
     *                         <td><b>ENT_HTML5</b></td>
2461
     *                         <td>
2462
     *                         Handle code as HTML 5.
2463
     *                         </td>
2464
     *                         </tr>
2465
     *                         </table>
2466
     *                         </p>
2467
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2468
     *
2469
     * @return string the decoded string
2470
     */
2471 42
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2472
    {
2473
        if (
2474 42
            !isset($str[3]) // examples: &; || &x;
2475
            ||
2476 42
            \strpos($str, '&') === false // no "&"
2477
        ) {
2478 19
            return $str;
2479
        }
2480
2481 41
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2482 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2483
        }
2484
2485 41
        if ($flags === null) {
2486 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2487
        }
2488
2489
        if (
2490 41
            $encoding !== 'UTF-8'
2491
            &&
2492 41
            $encoding !== 'ISO-8859-1'
2493
            &&
2494 41
            $encoding !== 'WINDOWS-1252'
2495
            &&
2496 41
            self::$SUPPORT['mbstring'] === false
2497
        ) {
2498
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2499
        }
2500
2501
        do {
2502 41
            $str_compare = $str;
2503
2504
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2505 41
            if (self::$SUPPORT['mbstring'] === true) {
2506 41
                if ($encoding === 'UTF-8') {
2507 41
                    $str = \mb_decode_numericentity(
2508 41
                        $str,
2509 41
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2510
                    );
2511
                } else {
2512 4
                    $str = \mb_decode_numericentity(
2513 4
                        $str,
2514 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2515 41
                        $encoding
2516
                    );
2517
                }
2518
            } else {
2519
                $str = (string) \preg_replace_callback(
2520
                    "/&#\d{2,6};/",
2521
                    /**
2522
                     * @param string[] $matches
2523
                     *
2524
                     * @return string
2525
                     */
2526
                    static function (array $matches) use ($encoding): string {
2527
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2528
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2529
                            return $returnTmp;
2530
                        }
2531
2532
                        return $matches[0];
2533
                    },
2534
                    $str
2535
                );
2536
            }
2537
2538 41
            if (\strpos($str, '&') !== false) {
2539 37
                if (\strpos($str, '&#') !== false) {
2540
                    // decode also numeric & UTF16 two byte entities
2541 29
                    $str = (string) \preg_replace(
2542 29
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2543 29
                        '$1;',
2544 29
                        $str
2545
                    );
2546
                }
2547
2548 37
                $str = \html_entity_decode(
2549 37
                    $str,
2550 37
                    $flags,
2551 37
                    $encoding
2552
                );
2553
            }
2554 41
        } while ($str_compare !== $str);
2555
2556 41
        return $str;
2557
    }
2558
2559
    /**
2560
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2561
     *
2562
     * @param string $str
2563
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2564
     *
2565
     * @return string
2566
     */
2567 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2568
    {
2569 6
        return self::htmlspecialchars(
2570 6
            $str,
2571 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2572 6
            $encoding
2573
        );
2574
    }
2575
2576
    /**
2577
     * Remove empty html-tag.
2578
     *
2579
     * e.g.: <tag></tag>
2580
     *
2581
     * @param string $str
2582
     *
2583
     * @return string
2584
     */
2585 1
    public static function html_stripe_empty_tags(string $str): string
2586
    {
2587 1
        return (string) \preg_replace(
2588 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2589 1
            '',
2590 1
            $str
2591
        );
2592
    }
2593
2594
    /**
2595
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2596
     *
2597
     * @see http://php.net/manual/en/function.htmlentities.php
2598
     *
2599
     * @param string $str           <p>
2600
     *                              The input string.
2601
     *                              </p>
2602
     * @param int    $flags         [optional] <p>
2603
     *                              A bitmask of one or more of the following flags, which specify how to handle
2604
     *                              quotes, invalid code unit sequences and the used document type. The default is
2605
     *                              ENT_COMPAT | ENT_HTML401.
2606
     *                              <table>
2607
     *                              Available <i>flags</i> constants
2608
     *                              <tr valign="top">
2609
     *                              <td>Constant Name</td>
2610
     *                              <td>Description</td>
2611
     *                              </tr>
2612
     *                              <tr valign="top">
2613
     *                              <td><b>ENT_COMPAT</b></td>
2614
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2615
     *                              </tr>
2616
     *                              <tr valign="top">
2617
     *                              <td><b>ENT_QUOTES</b></td>
2618
     *                              <td>Will convert both double and single quotes.</td>
2619
     *                              </tr>
2620
     *                              <tr valign="top">
2621
     *                              <td><b>ENT_NOQUOTES</b></td>
2622
     *                              <td>Will leave both double and single quotes unconverted.</td>
2623
     *                              </tr>
2624
     *                              <tr valign="top">
2625
     *                              <td><b>ENT_IGNORE</b></td>
2626
     *                              <td>
2627
     *                              Silently discard invalid code unit sequences instead of returning
2628
     *                              an empty string. Using this flag is discouraged as it
2629
     *                              may have security implications.
2630
     *                              </td>
2631
     *                              </tr>
2632
     *                              <tr valign="top">
2633
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2634
     *                              <td>
2635
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2636
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2637
     *                              string.
2638
     *                              </td>
2639
     *                              </tr>
2640
     *                              <tr valign="top">
2641
     *                              <td><b>ENT_DISALLOWED</b></td>
2642
     *                              <td>
2643
     *                              Replace invalid code points for the given document type with a
2644
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2645
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2646
     *                              instance, to ensure the well-formedness of XML documents with
2647
     *                              embedded external content.
2648
     *                              </td>
2649
     *                              </tr>
2650
     *                              <tr valign="top">
2651
     *                              <td><b>ENT_HTML401</b></td>
2652
     *                              <td>
2653
     *                              Handle code as HTML 4.01.
2654
     *                              </td>
2655
     *                              </tr>
2656
     *                              <tr valign="top">
2657
     *                              <td><b>ENT_XML1</b></td>
2658
     *                              <td>
2659
     *                              Handle code as XML 1.
2660
     *                              </td>
2661
     *                              </tr>
2662
     *                              <tr valign="top">
2663
     *                              <td><b>ENT_XHTML</b></td>
2664
     *                              <td>
2665
     *                              Handle code as XHTML.
2666
     *                              </td>
2667
     *                              </tr>
2668
     *                              <tr valign="top">
2669
     *                              <td><b>ENT_HTML5</b></td>
2670
     *                              <td>
2671
     *                              Handle code as HTML 5.
2672
     *                              </td>
2673
     *                              </tr>
2674
     *                              </table>
2675
     *                              </p>
2676
     * @param string $encoding      [optional] <p>
2677
     *                              Like <b>htmlspecialchars</b>,
2678
     *                              <b>htmlentities</b> takes an optional third argument
2679
     *                              <i>encoding</i> which defines encoding used in
2680
     *                              conversion.
2681
     *                              Although this argument is technically optional, you are highly
2682
     *                              encouraged to specify the correct value for your code.
2683
     *                              </p>
2684
     * @param bool   $double_encode [optional] <p>
2685
     *                              When <i>double_encode</i> is turned off PHP will not
2686
     *                              encode existing html entities. The default is to convert everything.
2687
     *                              </p>
2688
     *
2689
     * @return string
2690
     *                <p>
2691
     *                The encoded string.
2692
     *                <br><br>
2693
     *                If the input <i>string</i> contains an invalid code unit
2694
     *                sequence within the given <i>encoding</i> an empty string
2695
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2696
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2697
     *                </p>
2698
     */
2699 9
    public static function htmlentities(
2700
        string $str,
2701
        int $flags = \ENT_COMPAT,
2702
        string $encoding = 'UTF-8',
2703
        bool $double_encode = true
2704
    ): string {
2705 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2706 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2707
        }
2708
2709 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2710
2711
        /**
2712
         * PHP doesn't replace a backslash to its html entity since this is something
2713
         * that's mostly used to escape characters when inserting in a database. Since
2714
         * we're using a decent database layer, we don't need this shit and we're replacing
2715
         * the double backslashes by its' html entity equivalent.
2716
         *
2717
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2718
         */
2719 9
        $str = \str_replace('\\', '&#92;', $str);
2720
2721 9
        return self::html_encode($str, true, $encoding);
2722
    }
2723
2724
    /**
2725
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2726
     *
2727
     * INFO: Take a look at "UTF8::htmlentities()"
2728
     *
2729
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2730
     *
2731
     * @param string $str           <p>
2732
     *                              The string being converted.
2733
     *                              </p>
2734
     * @param int    $flags         [optional] <p>
2735
     *                              A bitmask of one or more of the following flags, which specify how to handle
2736
     *                              quotes, invalid code unit sequences and the used document type. The default is
2737
     *                              ENT_COMPAT | ENT_HTML401.
2738
     *                              <table>
2739
     *                              Available <i>flags</i> constants
2740
     *                              <tr valign="top">
2741
     *                              <td>Constant Name</td>
2742
     *                              <td>Description</td>
2743
     *                              </tr>
2744
     *                              <tr valign="top">
2745
     *                              <td><b>ENT_COMPAT</b></td>
2746
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2747
     *                              </tr>
2748
     *                              <tr valign="top">
2749
     *                              <td><b>ENT_QUOTES</b></td>
2750
     *                              <td>Will convert both double and single quotes.</td>
2751
     *                              </tr>
2752
     *                              <tr valign="top">
2753
     *                              <td><b>ENT_NOQUOTES</b></td>
2754
     *                              <td>Will leave both double and single quotes unconverted.</td>
2755
     *                              </tr>
2756
     *                              <tr valign="top">
2757
     *                              <td><b>ENT_IGNORE</b></td>
2758
     *                              <td>
2759
     *                              Silently discard invalid code unit sequences instead of returning
2760
     *                              an empty string. Using this flag is discouraged as it
2761
     *                              may have security implications.
2762
     *                              </td>
2763
     *                              </tr>
2764
     *                              <tr valign="top">
2765
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2766
     *                              <td>
2767
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2768
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2769
     *                              string.
2770
     *                              </td>
2771
     *                              </tr>
2772
     *                              <tr valign="top">
2773
     *                              <td><b>ENT_DISALLOWED</b></td>
2774
     *                              <td>
2775
     *                              Replace invalid code points for the given document type with a
2776
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2777
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2778
     *                              instance, to ensure the well-formedness of XML documents with
2779
     *                              embedded external content.
2780
     *                              </td>
2781
     *                              </tr>
2782
     *                              <tr valign="top">
2783
     *                              <td><b>ENT_HTML401</b></td>
2784
     *                              <td>
2785
     *                              Handle code as HTML 4.01.
2786
     *                              </td>
2787
     *                              </tr>
2788
     *                              <tr valign="top">
2789
     *                              <td><b>ENT_XML1</b></td>
2790
     *                              <td>
2791
     *                              Handle code as XML 1.
2792
     *                              </td>
2793
     *                              </tr>
2794
     *                              <tr valign="top">
2795
     *                              <td><b>ENT_XHTML</b></td>
2796
     *                              <td>
2797
     *                              Handle code as XHTML.
2798
     *                              </td>
2799
     *                              </tr>
2800
     *                              <tr valign="top">
2801
     *                              <td><b>ENT_HTML5</b></td>
2802
     *                              <td>
2803
     *                              Handle code as HTML 5.
2804
     *                              </td>
2805
     *                              </tr>
2806
     *                              </table>
2807
     *                              </p>
2808
     * @param string $encoding      [optional] <p>
2809
     *                              Defines encoding used in conversion.
2810
     *                              </p>
2811
     *                              <p>
2812
     *                              For the purposes of this function, the encodings
2813
     *                              ISO-8859-1, ISO-8859-15,
2814
     *                              UTF-8, cp866,
2815
     *                              cp1251, cp1252, and
2816
     *                              KOI8-R are effectively equivalent, provided the
2817
     *                              <i>string</i> itself is valid for the encoding, as
2818
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2819
     *                              the same positions in all of these encodings.
2820
     *                              </p>
2821
     * @param bool   $double_encode [optional] <p>
2822
     *                              When <i>double_encode</i> is turned off PHP will not
2823
     *                              encode existing html entities, the default is to convert everything.
2824
     *                              </p>
2825
     *
2826
     * @return string the converted string.
2827
     *                </p>
2828
     *                <p>
2829
     *                If the input <i>string</i> contains an invalid code unit
2830
     *                sequence within the given <i>encoding</i> an empty string
2831
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2832
     *                <b>ENT_SUBSTITUTE</b> flags are set
2833
     */
2834 8
    public static function htmlspecialchars(
2835
        string $str,
2836
        int $flags = \ENT_COMPAT,
2837
        string $encoding = 'UTF-8',
2838
        bool $double_encode = true
2839
    ): string {
2840 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2841 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2842
        }
2843
2844 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2845
    }
2846
2847
    /**
2848
     * Checks whether iconv is available on the server.
2849
     *
2850
     * @return bool
2851
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2852
     */
2853
    public static function iconv_loaded(): bool
2854
    {
2855
        return \extension_loaded('iconv');
2856
    }
2857
2858
    /**
2859
     * alias for "UTF8::decimal_to_chr()"
2860
     *
2861
     * @see UTF8::decimal_to_chr()
2862
     *
2863
     * @param mixed $int
2864
     *
2865
     * @return string
2866
     */
2867 4
    public static function int_to_chr($int): string
2868
    {
2869 4
        return self::decimal_to_chr($int);
2870
    }
2871
2872
    /**
2873
     * Converts Integer to hexadecimal U+xxxx code point representation.
2874
     *
2875
     * INFO: opposite to UTF8::hex_to_int()
2876
     *
2877
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2878
     * @param string $pfix [optional]
2879
     *
2880
     * @return string the code point, or empty string on failure
2881
     */
2882 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2883
    {
2884 6
        $hex = \dechex($int);
2885
2886 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2887
2888 6
        return $pfix . $hex . '';
2889
    }
2890
2891
    /**
2892
     * Checks whether intl-char is available on the server.
2893
     *
2894
     * @return bool
2895
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2896
     */
2897
    public static function intlChar_loaded(): bool
2898
    {
2899
        return \class_exists('IntlChar');
2900
    }
2901
2902
    /**
2903
     * Checks whether intl is available on the server.
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2907
     */
2908 5
    public static function intl_loaded(): bool
2909
    {
2910 5
        return \extension_loaded('intl');
2911
    }
2912
2913
    /**
2914
     * alias for "UTF8::is_ascii()"
2915
     *
2916
     * @see        UTF8::is_ascii()
2917
     *
2918
     * @param string $str
2919
     *
2920
     * @return bool
2921
     *
2922
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2923
     */
2924 2
    public static function isAscii(string $str): bool
2925
    {
2926 2
        return self::is_ascii($str);
2927
    }
2928
2929
    /**
2930
     * alias for "UTF8::is_base64()"
2931
     *
2932
     * @see        UTF8::is_base64()
2933
     *
2934
     * @param string $str
2935
     *
2936
     * @return bool
2937
     *
2938
     * @deprecated <p>use "UTF8::is_base64()"</p>
2939
     */
2940 2
    public static function isBase64($str): bool
2941
    {
2942 2
        return self::is_base64($str);
2943
    }
2944
2945
    /**
2946
     * alias for "UTF8::is_binary()"
2947
     *
2948
     * @see        UTF8::is_binary()
2949
     *
2950
     * @param mixed $str
2951
     * @param bool  $strict
2952
     *
2953
     * @return bool
2954
     *
2955
     * @deprecated <p>use "UTF8::is_binary()"</p>
2956
     */
2957 4
    public static function isBinary($str, $strict = false): bool
2958
    {
2959 4
        return self::is_binary($str, $strict);
2960
    }
2961
2962
    /**
2963
     * alias for "UTF8::is_bom()"
2964
     *
2965
     * @see        UTF8::is_bom()
2966
     *
2967
     * @param string $utf8_chr
2968
     *
2969
     * @return bool
2970
     *
2971
     * @deprecated <p>use "UTF8::is_bom()"</p>
2972
     */
2973 2
    public static function isBom(string $utf8_chr): bool
2974
    {
2975 2
        return self::is_bom($utf8_chr);
2976
    }
2977
2978
    /**
2979
     * alias for "UTF8::is_html()"
2980
     *
2981
     * @see        UTF8::is_html()
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @deprecated <p>use "UTF8::is_html()"</p>
2988
     */
2989 2
    public static function isHtml(string $str): bool
2990
    {
2991 2
        return self::is_html($str);
2992
    }
2993
2994
    /**
2995
     * alias for "UTF8::is_json()"
2996
     *
2997
     * @see        UTF8::is_json()
2998
     *
2999
     * @param string $str
3000
     *
3001
     * @return bool
3002
     *
3003
     * @deprecated <p>use "UTF8::is_json()"</p>
3004
     */
3005
    public static function isJson(string $str): bool
3006
    {
3007
        return self::is_json($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_utf16()"
3012
     *
3013
     * @see        UTF8::is_utf16()
3014
     *
3015
     * @param mixed $str
3016
     *
3017
     * @return false|int
3018
     *                   <strong>false</strong> if is't not UTF16,<br>
3019
     *                   <strong>1</strong> for UTF-16LE,<br>
3020
     *                   <strong>2</strong> for UTF-16BE
3021
     *
3022
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3023
     */
3024 2
    public static function isUtf16($str)
3025
    {
3026 2
        return self::is_utf16($str);
3027
    }
3028
3029
    /**
3030
     * alias for "UTF8::is_utf32()"
3031
     *
3032
     * @see        UTF8::is_utf32()
3033
     *
3034
     * @param mixed $str
3035
     *
3036
     * @return false|int
3037
     *                   <strong>false</strong> if is't not UTF16,
3038
     *                   <strong>1</strong> for UTF-32LE,
3039
     *                   <strong>2</strong> for UTF-32BE
3040
     *
3041
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3042
     */
3043 2
    public static function isUtf32($str)
3044
    {
3045 2
        return self::is_utf32($str);
3046
    }
3047
3048
    /**
3049
     * alias for "UTF8::is_utf8()"
3050
     *
3051
     * @see        UTF8::is_utf8()
3052
     *
3053
     * @param string $str
3054
     * @param bool   $strict
3055
     *
3056
     * @return bool
3057
     *
3058
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3059
     */
3060 17
    public static function isUtf8($str, $strict = false): bool
3061
    {
3062 17
        return self::is_utf8($str, $strict);
3063
    }
3064
3065
    /**
3066
     * Returns true if the string contains only alphabetic chars, false otherwise.
3067
     *
3068
     * @param string $str
3069
     *
3070
     * @return bool
3071
     *              Whether or not $str contains only alphabetic chars
3072
     */
3073 10
    public static function is_alpha(string $str): bool
3074
    {
3075 10
        if (self::$SUPPORT['mbstring'] === true) {
3076
            /** @noinspection PhpComposerExtensionStubsInspection */
3077 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3078
        }
3079
3080
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3081
    }
3082
3083
    /**
3084
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3085
     *
3086
     * @param string $str
3087
     *
3088
     * @return bool
3089
     *              Whether or not $str contains only alphanumeric chars
3090
     */
3091 13
    public static function is_alphanumeric(string $str): bool
3092
    {
3093 13
        if (self::$SUPPORT['mbstring'] === true) {
3094
            /** @noinspection PhpComposerExtensionStubsInspection */
3095 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3096
        }
3097
3098
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3099
    }
3100
3101
    /**
3102
     * Checks if a string is 7 bit ASCII.
3103
     *
3104
     * @param string $str <p>The string to check.</p>
3105
     *
3106
     * @return bool
3107
     *              <strong>true</strong> if it is ASCII<br>
3108
     *              <strong>false</strong> otherwise
3109
     */
3110 137
    public static function is_ascii(string $str): bool
3111
    {
3112 137
        if ($str === '') {
3113 10
            return true;
3114
        }
3115
3116 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3117
    }
3118
3119
    /**
3120
     * Returns true if the string is base64 encoded, false otherwise.
3121
     *
3122
     * @param mixed|string $str                <p>The input string.</p>
3123
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3124
     *
3125
     * @return bool whether or not $str is base64 encoded
3126
     */
3127 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3128
    {
3129 16
        if ($emptyStringIsValid === false && $str === '') {
3130 3
            return false;
3131
        }
3132
3133
        /**
3134
         * @psalm-suppress RedundantConditionGivenDocblockType
3135
         */
3136 15
        if (\is_string($str) === false) {
3137 2
            return false;
3138
        }
3139
3140 15
        $base64String = \base64_decode($str, true);
3141
3142 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3143
    }
3144
3145
    /**
3146
     * Check if the input is binary... (is look like a hack).
3147
     *
3148
     * @param mixed $input
3149
     * @param bool  $strict
3150
     *
3151
     * @return bool
3152
     */
3153 39
    public static function is_binary($input, bool $strict = false): bool
3154
    {
3155 39
        $input = (string) $input;
3156 39
        if ($input === '') {
3157 10
            return false;
3158
        }
3159
3160 39
        if (\preg_match('~^[01]+$~', $input)) {
3161 13
            return true;
3162
        }
3163
3164 39
        $ext = self::get_file_type($input);
3165 39
        if ($ext['type'] === 'binary') {
3166 7
            return true;
3167
        }
3168
3169 36
        $testLength = \strlen($input);
3170 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3171 36
        if (($testNull / $testLength) > 0.25) {
3172 12
            return true;
3173
        }
3174
3175 34
        if ($strict === true) {
3176 34
            if (self::$SUPPORT['finfo'] === false) {
3177
                throw new \RuntimeException('ext-fileinfo: is not installed');
3178
            }
3179
3180
            /** @noinspection PhpComposerExtensionStubsInspection */
3181 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3182 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3183 15
                return true;
3184
            }
3185
        }
3186
3187 30
        return false;
3188
    }
3189
3190
    /**
3191
     * Check if the file is binary.
3192
     *
3193
     * @param string $file
3194
     *
3195
     * @return bool
3196
     */
3197 6
    public static function is_binary_file($file): bool
3198
    {
3199
        // init
3200 6
        $block = '';
3201
3202 6
        $fp = \fopen($file, 'rb');
3203 6
        if (\is_resource($fp)) {
3204 6
            $block = \fread($fp, 512);
3205 6
            \fclose($fp);
3206
        }
3207
3208 6
        if ($block === '') {
3209 2
            return false;
3210
        }
3211
3212 6
        return self::is_binary($block, true);
3213
    }
3214
3215
    /**
3216
     * Returns true if the string contains only whitespace chars, false otherwise.
3217
     *
3218
     * @param string $str
3219
     *
3220
     * @return bool
3221
     *              Whether or not $str contains only whitespace characters
3222
     */
3223 15
    public static function is_blank(string $str): bool
3224
    {
3225 15
        if (self::$SUPPORT['mbstring'] === true) {
3226
            /** @noinspection PhpComposerExtensionStubsInspection */
3227 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3228
        }
3229
3230
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3231
    }
3232
3233
    /**
3234
     * Checks if the given string is equal to any "Byte Order Mark".
3235
     *
3236
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3237
     *
3238
     * @param string $str <p>The input string.</p>
3239
     *
3240
     * @return bool
3241
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3242
     */
3243 2
    public static function is_bom($str): bool
3244
    {
3245
        /** @noinspection PhpUnusedLocalVariableInspection */
3246 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3247 2
            if ($str === $bomString) {
3248 2
                return true;
3249
            }
3250
        }
3251
3252 2
        return false;
3253
    }
3254
3255
    /**
3256
     * Determine whether the string is considered to be empty.
3257
     *
3258
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3259
     * empty() does not generate a warning if the variable does not exist.
3260
     *
3261
     * @param mixed $str
3262
     *
3263
     * @return bool whether or not $str is empty()
3264
     */
3265
    public static function is_empty($str): bool
3266
    {
3267
        return empty($str);
3268
    }
3269
3270
    /**
3271
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3272
     *
3273
     * @param string $str
3274
     *
3275
     * @return bool
3276
     *              Whether or not $str contains only hexadecimal chars
3277
     */
3278 13
    public static function is_hexadecimal(string $str): bool
3279
    {
3280 13
        if (self::$SUPPORT['mbstring'] === true) {
3281
            /** @noinspection PhpComposerExtensionStubsInspection */
3282 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3283
        }
3284
3285
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3286
    }
3287
3288
    /**
3289
     * Check if the string contains any html-tags <lall>.
3290
     *
3291
     * @param string $str <p>The input string.</p>
3292
     *
3293
     * @return bool
3294
     */
3295 3
    public static function is_html(string $str): bool
3296
    {
3297 3
        if ($str === '') {
3298 3
            return false;
3299
        }
3300
3301
        // init
3302 3
        $matches = [];
3303
3304 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3305
3306 3
        return \count($matches) !== 0;
3307
    }
3308
3309
    /**
3310
     * Try to check if "$str" is an json-string.
3311
     *
3312
     * @param string $str                              <p>The input string.</p>
3313
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3314
     *
3315
     * @return bool
3316
     */
3317 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3318
    {
3319 42
        if ($str === '') {
3320 4
            return false;
3321
        }
3322
3323 40
        if (self::$SUPPORT['json'] === false) {
3324
            throw new \RuntimeException('ext-json: is not installed');
3325
        }
3326
3327 40
        $json = self::json_decode($str);
3328 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3329 18
            return false;
3330
        }
3331
3332
        if (
3333 24
            $onlyArrayOrObjectResultsAreValid === true
3334
            &&
3335 24
            \is_object($json) === false
3336
            &&
3337 24
            \is_array($json) === false
3338
        ) {
3339 5
            return false;
3340
        }
3341
3342
        /** @noinspection PhpComposerExtensionStubsInspection */
3343 19
        return \json_last_error() === \JSON_ERROR_NONE;
3344
    }
3345
3346
    /**
3347
     * @param string $str
3348
     *
3349
     * @return bool
3350
     */
3351 8
    public static function is_lowercase(string $str): bool
3352
    {
3353 8
        if (self::$SUPPORT['mbstring'] === true) {
3354
            /** @noinspection PhpComposerExtensionStubsInspection */
3355 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3356
        }
3357
3358
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3359
    }
3360
3361
    /**
3362
     * Returns true if the string is serialized, false otherwise.
3363
     *
3364
     * @param string $str
3365
     *
3366
     * @return bool whether or not $str is serialized
3367
     */
3368 7
    public static function is_serialized(string $str): bool
3369
    {
3370 7
        if ($str === '') {
3371 1
            return false;
3372
        }
3373
3374
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3375
        /** @noinspection UnserializeExploitsInspection */
3376 6
        return $str === 'b:0;'
3377
               ||
3378 6
               @\unserialize($str) !== false;
3379
    }
3380
3381
    /**
3382
     * Returns true if the string contains only lower case chars, false
3383
     * otherwise.
3384
     *
3385
     * @param string $str <p>The input string.</p>
3386
     *
3387
     * @return bool
3388
     *              Whether or not $str contains only lower case characters
3389
     */
3390 8
    public static function is_uppercase(string $str): bool
3391
    {
3392 8
        if (self::$SUPPORT['mbstring'] === true) {
3393
            /** @noinspection PhpComposerExtensionStubsInspection */
3394 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3395
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3398
    }
3399
3400
    /**
3401
     * Check if the string is UTF-16.
3402
     *
3403
     * @param mixed $str                   <p>The input string.</p>
3404
     * @param bool  $checkIfStringIsBinary
3405
     *
3406
     * @return false|int
3407
     *                   <strong>false</strong> if is't not UTF-16,<br>
3408
     *                   <strong>1</strong> for UTF-16LE,<br>
3409
     *                   <strong>2</strong> for UTF-16BE
3410
     */
3411 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3412
    {
3413
        // init
3414 22
        $str = (string) $str;
3415 22
        $strChars = [];
3416
3417
        if (
3418 22
            $checkIfStringIsBinary === true
3419
            &&
3420 22
            self::is_binary($str, true) === false
3421
        ) {
3422 2
            return false;
3423
        }
3424
3425 22
        if (self::$SUPPORT['mbstring'] === false) {
3426 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3427
        }
3428
3429 22
        $str = self::remove_bom($str);
3430
3431 22
        $maybeUTF16LE = 0;
3432 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3433 22
        if ($test) {
3434 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3435 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3436 15
            if ($test3 === $test) {
3437 15
                if (\count($strChars) === 0) {
3438 15
                    $strChars = self::count_chars($str, true, false);
3439
                }
3440 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3441 15
                    if (\in_array($test3char, $strChars, true) === true) {
3442 15
                        ++$maybeUTF16LE;
3443
                    }
3444
                }
3445 15
                unset($test3charEmpty);
3446
            }
3447
        }
3448
3449 22
        $maybeUTF16BE = 0;
3450 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3451 22
        if ($test) {
3452 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3453 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3454 15
            if ($test3 === $test) {
3455 15
                if (\count($strChars) === 0) {
3456 7
                    $strChars = self::count_chars($str, true, false);
3457
                }
3458 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3459 15
                    if (\in_array($test3char, $strChars, true) === true) {
3460 15
                        ++$maybeUTF16BE;
3461
                    }
3462
                }
3463 15
                unset($test3charEmpty);
3464
            }
3465
        }
3466
3467 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3468 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3469 4
                return 1;
3470
            }
3471
3472 6
            return 2;
3473
        }
3474
3475 18
        return false;
3476
    }
3477
3478
    /**
3479
     * Check if the string is UTF-32.
3480
     *
3481
     * @param mixed $str                   <p>The input string.</p>
3482
     * @param bool  $checkIfStringIsBinary
3483
     *
3484
     * @return false|int
3485
     *                   <strong>false</strong> if is't not UTF-32,<br>
3486
     *                   <strong>1</strong> for UTF-32LE,<br>
3487
     *                   <strong>2</strong> for UTF-32BE
3488
     */
3489 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3490
    {
3491
        // init
3492 18
        $str = (string) $str;
3493 18
        $strChars = [];
3494
3495
        if (
3496 18
            $checkIfStringIsBinary === true
3497
            &&
3498 18
            self::is_binary($str, true) === false
3499
        ) {
3500 2
            return false;
3501
        }
3502
3503 18
        if (self::$SUPPORT['mbstring'] === false) {
3504 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3505
        }
3506
3507 18
        $str = self::remove_bom($str);
3508
3509 18
        $maybeUTF32LE = 0;
3510 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3511 18
        if ($test) {
3512 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3513 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3514 11
            if ($test3 === $test) {
3515 11
                if (\count($strChars) === 0) {
3516 11
                    $strChars = self::count_chars($str, true, false);
3517
                }
3518 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 11
                    if (\in_array($test3char, $strChars, true) === true) {
3520 11
                        ++$maybeUTF32LE;
3521
                    }
3522
                }
3523 11
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 18
        $maybeUTF32BE = 0;
3528 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3529 18
        if ($test) {
3530 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3531 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3532 11
            if ($test3 === $test) {
3533 11
                if (\count($strChars) === 0) {
3534 7
                    $strChars = self::count_chars($str, true, false);
3535
                }
3536 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3537 11
                    if (\in_array($test3char, $strChars, true) === true) {
3538 11
                        ++$maybeUTF32BE;
3539
                    }
3540
                }
3541 11
                unset($test3charEmpty);
3542
            }
3543
        }
3544
3545 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3546 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3547 2
                return 1;
3548
            }
3549
3550 2
            return 2;
3551
        }
3552
3553 18
        return false;
3554
    }
3555
3556
    /**
3557
     * Encode a string with emoji chars into a non-emoji string.
3558
     *
3559
     * @param string $str                        <p>The input string</p>
3560
     * @param bool   $useReversibleStringMapping [optional] <p>
3561
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3562
     *                                           between "emoji_encode" and "emoji_decode"</p>
3563
     *
3564
     * @return string
3565
     */
3566 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3567
    {
3568 9
        self::initEmojiData();
3569
3570 9
        if ($useReversibleStringMapping === true) {
3571 9
            return (string) \str_replace(
3572 9
                (array) self::$EMOJI_VALUES_CACHE,
3573 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3574 9
                $str
3575
            );
3576
        }
3577
3578 1
        return (string) \str_replace(
3579 1
            (array) self::$EMOJI_VALUES_CACHE,
3580 1
            (array) self::$EMOJI_KEYS_CACHE,
3581 1
            $str
3582
        );
3583
    }
3584
3585
    /**
3586
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3587
     *
3588
     * @param string $str                        <p>The input string.</p>
3589
     * @param bool   $useReversibleStringMapping [optional] <p>
3590
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3591
     *                                           between "emoji_encode" and "emoji_decode".</p>
3592
     *
3593
     * @return string
3594
     */
3595 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3596
    {
3597 9
        self::initEmojiData();
3598
3599 9
        if ($useReversibleStringMapping === true) {
3600 9
            return (string) \str_replace(
3601 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3602 9
                (array) self::$EMOJI_VALUES_CACHE,
3603 9
                $str
3604
            );
3605
        }
3606
3607 1
        return (string) \str_replace(
3608 1
            (array) self::$EMOJI_KEYS_CACHE,
3609 1
            (array) self::$EMOJI_VALUES_CACHE,
3610 1
            $str
3611
        );
3612
    }
3613
3614
    /**
3615
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3616
     *
3617
     * @see    http://hsivonen.iki.fi/php-utf8/
3618
     *
3619
     * @param string|string[] $str    <p>The string to be checked.</p>
3620
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3621
     *
3622
     * @return bool
3623
     */
3624 106
    public static function is_utf8($str, bool $strict = false): bool
3625
    {
3626 106
        if (\is_array($str) === true) {
3627 2
            foreach ($str as &$v) {
3628 2
                if (self::is_utf8($v, $strict) === false) {
3629 2
                    return false;
3630
                }
3631
            }
3632
3633
            return true;
3634
        }
3635
3636 106
        if ($str === '') {
3637 12
            return true;
3638
        }
3639
3640 102
        if ($strict === true) {
3641 2
            $isBinary = self::is_binary($str, true);
3642
3643 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3644 2
                return false;
3645
            }
3646
3647
            if ($isBinary && self::is_utf32($str, false) !== false) {
3648
                return false;
3649
            }
3650
        }
3651
3652 102
        if (self::pcre_utf8_support() !== true) {
3653
3654
            // If even just the first character can be matched, when the /u
3655
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3656
            // invalid, nothing at all will match, even if the string contains
3657
            // some valid sequences
3658
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3659
        }
3660
3661 102
        $mState = 0; // cached expected number of octets after the current octet
3662
        // until the beginning of the next UTF8 character sequence
3663 102
        $mUcs4 = 0; // cached Unicode character
3664 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3665
3666 102
        if (self::$ORD === null) {
3667
            self::$ORD = self::getData('ord');
3668
        }
3669
3670 102
        $len = \strlen((string) $str);
3671
        /** @noinspection ForeachInvariantsInspection */
3672 102
        for ($i = 0; $i < $len; ++$i) {
3673 102
            $in = self::$ORD[$str[$i]];
3674 102
            if ($mState === 0) {
3675
                // When mState is zero we expect either a US-ASCII character or a
3676
                // multi-octet sequence.
3677 102
                if ((0x80 & $in) === 0) {
3678
                    // US-ASCII, pass straight through.
3679 97
                    $mBytes = 1;
3680 83
                } elseif ((0xE0 & $in) === 0xC0) {
3681
                    // First octet of 2 octet sequence.
3682 73
                    $mUcs4 = $in;
3683 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3684 73
                    $mState = 1;
3685 73
                    $mBytes = 2;
3686 58
                } elseif ((0xF0 & $in) === 0xE0) {
3687
                    // First octet of 3 octet sequence.
3688 42
                    $mUcs4 = $in;
3689 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3690 42
                    $mState = 2;
3691 42
                    $mBytes = 3;
3692 29
                } elseif ((0xF8 & $in) === 0xF0) {
3693
                    // First octet of 4 octet sequence.
3694 18
                    $mUcs4 = $in;
3695 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3696 18
                    $mState = 3;
3697 18
                    $mBytes = 4;
3698 13
                } elseif ((0xFC & $in) === 0xF8) {
3699
                    /* First octet of 5 octet sequence.
3700
                     *
3701
                     * This is illegal because the encoded codepoint must be either
3702
                     * (a) not the shortest form or
3703
                     * (b) outside the Unicode range of 0-0x10FFFF.
3704
                     * Rather than trying to resynchronize, we will carry on until the end
3705
                     * of the sequence and let the later error handling code catch it.
3706
                     */
3707 5
                    $mUcs4 = $in;
3708 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3709 5
                    $mState = 4;
3710 5
                    $mBytes = 5;
3711 10
                } elseif ((0xFE & $in) === 0xFC) {
3712
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3713 5
                    $mUcs4 = $in;
3714 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3715 5
                    $mState = 5;
3716 5
                    $mBytes = 6;
3717
                } else {
3718
                    // Current octet is neither in the US-ASCII range nor a legal first
3719
                    // octet of a multi-octet sequence.
3720 102
                    return false;
3721
                }
3722 83
            } elseif ((0xC0 & $in) === 0x80) {
3723
3724
                // When mState is non-zero, we expect a continuation of the multi-octet
3725
                // sequence
3726
3727
                // Legal continuation.
3728 75
                $shift = ($mState - 1) * 6;
3729 75
                $tmp = $in;
3730 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3731 75
                $mUcs4 |= $tmp;
3732
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3733
                // Unicode code point to be output.
3734 75
                if (--$mState === 0) {
3735
                    // Check for illegal sequences and code points.
3736
                    //
3737
                    // From Unicode 3.1, non-shortest form is illegal
3738
                    if (
3739 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3740
                        ||
3741 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3742
                        ||
3743 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3744
                        ||
3745 75
                        ($mBytes > 4)
3746
                        ||
3747
                        // From Unicode 3.2, surrogate characters are illegal.
3748 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3749
                        ||
3750
                        // Code points outside the Unicode range are illegal.
3751 75
                        ($mUcs4 > 0x10FFFF)
3752
                    ) {
3753 8
                        return false;
3754
                    }
3755
                    // initialize UTF8 cache
3756 75
                    $mState = 0;
3757 75
                    $mUcs4 = 0;
3758 75
                    $mBytes = 1;
3759
                }
3760
            } else {
3761
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3762
                // Incomplete multi-octet sequence.
3763 35
                return false;
3764
            }
3765
        }
3766
3767 67
        return true;
3768
    }
3769
3770
    /**
3771
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3772
     * Decodes a JSON string
3773
     *
3774
     * @see http://php.net/manual/en/function.json-decode.php
3775
     *
3776
     * @param string $json    <p>
3777
     *                        The <i>json</i> string being decoded.
3778
     *                        </p>
3779
     *                        <p>
3780
     *                        This function only works with UTF-8 encoded strings.
3781
     *                        </p>
3782
     *                        <p>PHP implements a superset of
3783
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3784
     *                        only supports these values when they are nested inside an array or an object.
3785
     *                        </p>
3786
     * @param bool   $assoc   [optional] <p>
3787
     *                        When <b>TRUE</b>, returned objects will be converted into
3788
     *                        associative arrays.
3789
     *                        </p>
3790
     * @param int    $depth   [optional] <p>
3791
     *                        User specified recursion depth.
3792
     *                        </p>
3793
     * @param int    $options [optional] <p>
3794
     *                        Bitmask of JSON decode options. Currently only
3795
     *                        <b>JSON_BIGINT_AS_STRING</b>
3796
     *                        is supported (default is to cast large integers as floats)
3797
     *                        </p>
3798
     *
3799
     * @return mixed
3800
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3801
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3802
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3803
     *               is deeper than the recursion limit.
3804
     */
3805 43
    public static function json_decode(
3806
        string $json,
3807
        bool $assoc = false,
3808
        int $depth = 512,
3809
        int $options = 0
3810
    ) {
3811 43
        $json = self::filter($json);
3812
3813 43
        if (self::$SUPPORT['json'] === false) {
3814
            throw new \RuntimeException('ext-json: is not installed');
3815
        }
3816
3817
        /** @noinspection PhpComposerExtensionStubsInspection */
3818 43
        return \json_decode($json, $assoc, $depth, $options);
3819
    }
3820
3821
    /**
3822
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3823
     * Returns the JSON representation of a value.
3824
     *
3825
     * @see http://php.net/manual/en/function.json-encode.php
3826
     *
3827
     * @param mixed $value   <p>
3828
     *                       The <i>value</i> being encoded. Can be any type except
3829
     *                       a resource.
3830
     *                       </p>
3831
     *                       <p>
3832
     *                       All string data must be UTF-8 encoded.
3833
     *                       </p>
3834
     *                       <p>PHP implements a superset of
3835
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3836
     *                       only supports these values when they are nested inside an array or an object.
3837
     *                       </p>
3838
     * @param int   $options [optional] <p>
3839
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3840
     *                       <b>JSON_HEX_TAG</b>,
3841
     *                       <b>JSON_HEX_AMP</b>,
3842
     *                       <b>JSON_HEX_APOS</b>,
3843
     *                       <b>JSON_NUMERIC_CHECK</b>,
3844
     *                       <b>JSON_PRETTY_PRINT</b>,
3845
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3846
     *                       <b>JSON_FORCE_OBJECT</b>,
3847
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3848
     *                       constants is described on
3849
     *                       the JSON constants page.
3850
     *                       </p>
3851
     * @param int   $depth   [optional] <p>
3852
     *                       Set the maximum depth. Must be greater than zero.
3853
     *                       </p>
3854
     *
3855
     * @return false|string
3856
     *                      A JSON encoded <strong>string</strong> on success or<br>
3857
     *                      <strong>FALSE</strong> on failure
3858
     */
3859 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3860
    {
3861 5
        $value = self::filter($value);
3862
3863 5
        if (self::$SUPPORT['json'] === false) {
3864
            throw new \RuntimeException('ext-json: is not installed');
3865
        }
3866
3867
        /** @noinspection PhpComposerExtensionStubsInspection */
3868 5
        return \json_encode($value, $options, $depth);
3869
    }
3870
3871
    /**
3872
     * Checks whether JSON is available on the server.
3873
     *
3874
     * @return bool
3875
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3876
     */
3877
    public static function json_loaded(): bool
3878
    {
3879
        return \function_exists('json_decode');
3880
    }
3881
3882
    /**
3883
     * Makes string's first char lowercase.
3884
     *
3885
     * @param string      $str                   <p>The input string</p>
3886
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3887
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3888
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3889
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3890
     *
3891
     * @return string the resulting string
3892
     */
3893 46
    public static function lcfirst(
3894
        string $str,
3895
        string $encoding = 'UTF-8',
3896
        bool $cleanUtf8 = false,
3897
        string $lang = null,
3898
        bool $tryToKeepStringLength = false
3899
    ): string {
3900 46
        if ($cleanUtf8 === true) {
3901
            $str = self::clean($str);
3902
        }
3903
3904 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3905
3906 46
        if ($encoding === 'UTF-8') {
3907 43
            $strPartTwo = (string) \mb_substr($str, 1);
3908
3909 43
            if ($useMbFunction === true) {
3910 43
                $strPartOne = \mb_strtolower(
3911 43
                    (string) \mb_substr($str, 0, 1)
3912
                );
3913
            } else {
3914
                $strPartOne = self::strtolower(
3915
                    (string) \mb_substr($str, 0, 1),
3916
                    $encoding,
3917
                    false,
3918
                    $lang,
3919 43
                    $tryToKeepStringLength
3920
                );
3921
            }
3922
        } else {
3923 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3924
3925 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3926
3927 3
            $strPartOne = self::strtolower(
3928 3
                (string) self::substr($str, 0, 1, $encoding),
3929 3
                $encoding,
3930 3
                false,
3931 3
                $lang,
3932 3
                $tryToKeepStringLength
3933
            );
3934
        }
3935
3936 46
        return $strPartOne . $strPartTwo;
3937
    }
3938
3939
    /**
3940
     * alias for "UTF8::lcfirst()"
3941
     *
3942
     * @see UTF8::lcfirst()
3943
     *
3944
     * @param string      $str
3945
     * @param string      $encoding
3946
     * @param bool        $cleanUtf8
3947
     * @param string|null $lang
3948
     * @param bool        $tryToKeepStringLength
3949
     *
3950
     * @return string
3951
     */
3952 2
    public static function lcword(
3953
        string $str,
3954
        string $encoding = 'UTF-8',
3955
        bool $cleanUtf8 = false,
3956
        string $lang = null,
3957
        bool $tryToKeepStringLength = false
3958
    ): string {
3959 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3960
    }
3961
3962
    /**
3963
     * Lowercase for all words in the string.
3964
     *
3965
     * @param string      $str                   <p>The input string.</p>
3966
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3967
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3968
     *                                           a new word.</p>
3969
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3970
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3971
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3972
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3973
     *
3974
     * @return string
3975
     */
3976 2
    public static function lcwords(
3977
        string $str,
3978
        array $exceptions = [],
3979
        string $charlist = '',
3980
        string $encoding = 'UTF-8',
3981
        bool $cleanUtf8 = false,
3982
        string $lang = null,
3983
        bool $tryToKeepStringLength = false
3984
    ): string {
3985 2
        if (!$str) {
3986 2
            return '';
3987
        }
3988
3989 2
        $words = self::str_to_words($str, $charlist);
3990 2
        $useExceptions = \count($exceptions) > 0;
3991
3992 2
        foreach ($words as &$word) {
3993 2
            if (!$word) {
3994 2
                continue;
3995
            }
3996
3997
            if (
3998 2
                $useExceptions === false
3999
                ||
4000 2
                !\in_array($word, $exceptions, true)
4001
            ) {
4002 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4003
            }
4004
        }
4005
4006 2
        return \implode('', $words);
4007
    }
4008
4009
    /**
4010
     * alias for "UTF8::lcfirst()"
4011
     *
4012
     * @see UTF8::lcfirst()
4013
     *
4014
     * @param string      $str
4015
     * @param string      $encoding
4016
     * @param bool        $cleanUtf8
4017
     * @param string|null $lang
4018
     * @param bool        $tryToKeepStringLength
4019
     *
4020
     * @return string
4021
     */
4022 5
    public static function lowerCaseFirst(
4023
        string $str,
4024
        string $encoding = 'UTF-8',
4025
        bool $cleanUtf8 = false,
4026
        string $lang = null,
4027
        bool $tryToKeepStringLength = false
4028
    ): string {
4029 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4030
    }
4031
4032
    /**
4033
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4034
     *
4035
     * @param string      $str   <p>The string to be trimmed</p>
4036
     * @param string|null $chars <p>Optional characters to be stripped</p>
4037
     *
4038
     * @return string the string with unwanted characters stripped from the left
4039
     */
4040 22
    public static function ltrim(string $str = '', string $chars = null): string
4041
    {
4042 22
        if ($str === '') {
4043 3
            return '';
4044
        }
4045
4046 21
        if ($chars) {
4047 10
            $chars = \preg_quote($chars, '/');
4048 10
            $pattern = "^[${chars}]+";
4049
        } else {
4050 14
            $pattern = "^[\s]+";
4051
        }
4052
4053 21
        if (self::$SUPPORT['mbstring'] === true) {
4054
            /** @noinspection PhpComposerExtensionStubsInspection */
4055 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4056
        }
4057
4058
        return self::regex_replace($str, $pattern, '', '', '/');
4059
    }
4060
4061
    /**
4062
     * Returns the UTF-8 character with the maximum code point in the given data.
4063
     *
4064
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4065
     *
4066
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4067
     */
4068 2
    public static function max($arg)
4069
    {
4070 2
        if (\is_array($arg) === true) {
4071 2
            $arg = \implode('', $arg);
4072
        }
4073
4074 2
        $codepoints = self::codepoints($arg, false);
4075 2
        if (\count($codepoints) === 0) {
4076 2
            return null;
4077
        }
4078
4079 2
        $codepoint_max = \max($codepoints);
4080
4081 2
        return self::chr($codepoint_max);
4082
    }
4083
4084
    /**
4085
     * Calculates and returns the maximum number of bytes taken by any
4086
     * UTF-8 encoded character in the given string.
4087
     *
4088
     * @param string $str <p>The original Unicode string.</p>
4089
     *
4090
     * @return int max byte lengths of the given chars
4091
     */
4092 2
    public static function max_chr_width(string $str): int
4093
    {
4094 2
        $bytes = self::chr_size_list($str);
4095 2
        if (\count($bytes) > 0) {
4096 2
            return (int) \max($bytes);
4097
        }
4098
4099 2
        return 0;
4100
    }
4101
4102
    /**
4103
     * Checks whether mbstring is available on the server.
4104
     *
4105
     * @return bool
4106
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4107
     */
4108 27
    public static function mbstring_loaded(): bool
4109
    {
4110 27
        return \extension_loaded('mbstring');
4111
    }
4112
4113
    /**
4114
     * Returns the UTF-8 character with the minimum code point in the given data.
4115
     *
4116
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4117
     *
4118
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4119
     */
4120 2
    public static function min($arg)
4121
    {
4122 2
        if (\is_array($arg) === true) {
4123 2
            $arg = \implode('', $arg);
4124
        }
4125
4126 2
        $codepoints = self::codepoints($arg, false);
4127 2
        if (\count($codepoints) === 0) {
4128 2
            return null;
4129
        }
4130
4131 2
        $codepoint_min = \min($codepoints);
4132
4133 2
        return self::chr($codepoint_min);
4134
    }
4135
4136
    /**
4137
     * alias for "UTF8::normalize_encoding()"
4138
     *
4139
     * @see        UTF8::normalize_encoding()
4140
     *
4141
     * @param mixed $encoding
4142
     * @param mixed $fallback
4143
     *
4144
     * @return mixed
4145
     *
4146
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4147
     */
4148 2
    public static function normalizeEncoding($encoding, $fallback = '')
4149
    {
4150 2
        return self::normalize_encoding($encoding, $fallback);
4151
    }
4152
4153
    /**
4154
     * Normalize the encoding-"name" input.
4155
     *
4156
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4157
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4158
     *
4159
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4160
     */
4161 323
    public static function normalize_encoding($encoding, $fallback = '')
4162
    {
4163 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4164
4165
        // init
4166 323
        $encoding = (string) $encoding;
4167
4168 323
        if (!$encoding) {
4169 278
            return $fallback;
4170
        }
4171
4172
        if (
4173 50
            $encoding === 'UTF-8'
4174
            ||
4175 50
            $encoding === 'UTF8'
4176
        ) {
4177 24
            return 'UTF-8';
4178
        }
4179
4180
        if (
4181 43
            $encoding === '8BIT'
4182
            ||
4183 43
            $encoding === 'BINARY'
4184
        ) {
4185
            return 'CP850';
4186
        }
4187
4188
        if (
4189 43
            $encoding === 'HTML'
4190
            ||
4191 43
            $encoding === 'HTML-ENTITIES'
4192
        ) {
4193 2
            return 'HTML-ENTITIES';
4194
        }
4195
4196
        if (
4197 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4198
            ||
4199 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4200
        ) {
4201 1
            return $fallback;
4202
        }
4203
4204 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4205 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4206
        }
4207
4208 6
        if (self::$ENCODINGS === null) {
4209 1
            self::$ENCODINGS = self::getData('encodings');
4210
        }
4211
4212 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4213 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4214
4215 4
            return $encoding;
4216
        }
4217
4218 5
        $encodingOrig = $encoding;
4219 5
        $encoding = \strtoupper($encoding);
4220 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4221
4222
        $equivalences = [
4223 5
            'ISO8859'     => 'ISO-8859-1',
4224
            'ISO88591'    => 'ISO-8859-1',
4225
            'ISO'         => 'ISO-8859-1',
4226
            'LATIN'       => 'ISO-8859-1',
4227
            'LATIN1'      => 'ISO-8859-1', // Western European
4228
            'ISO88592'    => 'ISO-8859-2',
4229
            'LATIN2'      => 'ISO-8859-2', // Central European
4230
            'ISO88593'    => 'ISO-8859-3',
4231
            'LATIN3'      => 'ISO-8859-3', // Southern European
4232
            'ISO88594'    => 'ISO-8859-4',
4233
            'LATIN4'      => 'ISO-8859-4', // Northern European
4234
            'ISO88595'    => 'ISO-8859-5',
4235
            'ISO88596'    => 'ISO-8859-6', // Greek
4236
            'ISO88597'    => 'ISO-8859-7',
4237
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4238
            'ISO88599'    => 'ISO-8859-9',
4239
            'LATIN5'      => 'ISO-8859-9', // Turkish
4240
            'ISO885911'   => 'ISO-8859-11',
4241
            'TIS620'      => 'ISO-8859-11', // Thai
4242
            'ISO885910'   => 'ISO-8859-10',
4243
            'LATIN6'      => 'ISO-8859-10', // Nordic
4244
            'ISO885913'   => 'ISO-8859-13',
4245
            'LATIN7'      => 'ISO-8859-13', // Baltic
4246
            'ISO885914'   => 'ISO-8859-14',
4247
            'LATIN8'      => 'ISO-8859-14', // Celtic
4248
            'ISO885915'   => 'ISO-8859-15',
4249
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4250
            'ISO885916'   => 'ISO-8859-16',
4251
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4252
            'CP1250'      => 'WINDOWS-1250',
4253
            'WIN1250'     => 'WINDOWS-1250',
4254
            'WINDOWS1250' => 'WINDOWS-1250',
4255
            'CP1251'      => 'WINDOWS-1251',
4256
            'WIN1251'     => 'WINDOWS-1251',
4257
            'WINDOWS1251' => 'WINDOWS-1251',
4258
            'CP1252'      => 'WINDOWS-1252',
4259
            'WIN1252'     => 'WINDOWS-1252',
4260
            'WINDOWS1252' => 'WINDOWS-1252',
4261
            'CP1253'      => 'WINDOWS-1253',
4262
            'WIN1253'     => 'WINDOWS-1253',
4263
            'WINDOWS1253' => 'WINDOWS-1253',
4264
            'CP1254'      => 'WINDOWS-1254',
4265
            'WIN1254'     => 'WINDOWS-1254',
4266
            'WINDOWS1254' => 'WINDOWS-1254',
4267
            'CP1255'      => 'WINDOWS-1255',
4268
            'WIN1255'     => 'WINDOWS-1255',
4269
            'WINDOWS1255' => 'WINDOWS-1255',
4270
            'CP1256'      => 'WINDOWS-1256',
4271
            'WIN1256'     => 'WINDOWS-1256',
4272
            'WINDOWS1256' => 'WINDOWS-1256',
4273
            'CP1257'      => 'WINDOWS-1257',
4274
            'WIN1257'     => 'WINDOWS-1257',
4275
            'WINDOWS1257' => 'WINDOWS-1257',
4276
            'CP1258'      => 'WINDOWS-1258',
4277
            'WIN1258'     => 'WINDOWS-1258',
4278
            'WINDOWS1258' => 'WINDOWS-1258',
4279
            'UTF16'       => 'UTF-16',
4280
            'UTF32'       => 'UTF-32',
4281
            'UTF8'        => 'UTF-8',
4282
            'UTF'         => 'UTF-8',
4283
            'UTF7'        => 'UTF-7',
4284
            '8BIT'        => 'CP850',
4285
            'BINARY'      => 'CP850',
4286
        ];
4287
4288 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4289 4
            $encoding = $equivalences[$encodingUpperHelper];
4290
        }
4291
4292 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4293
4294 5
        return $encoding;
4295
    }
4296
4297
    /**
4298
     * Standardize line ending to unix-like.
4299
     *
4300
     * @param string $str
4301
     *
4302
     * @return string
4303
     */
4304 5
    public static function normalize_line_ending(string $str): string
4305
    {
4306 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4307
    }
4308
4309
    /**
4310
     * Normalize some MS Word special characters.
4311
     *
4312
     * @param string $str <p>The string to be normalized.</p>
4313
     *
4314
     * @return string
4315
     */
4316 38
    public static function normalize_msword(string $str): string
4317
    {
4318 38
        if ($str === '') {
4319 2
            return '';
4320
        }
4321
4322
        $keys = [
4323 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4324
            "\xc2\xbb", // » (U+00BB) in UTF-8
4325
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4326
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4327
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4328
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4329
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4330
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4331
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4332
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4333
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4334
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4335
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4336
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4337
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4338
        ];
4339
4340
        $values = [
4341 38
            '"', // « (U+00AB) in UTF-8
4342
            '"', // » (U+00BB) in UTF-8
4343
            "'", // ‘ (U+2018) in UTF-8
4344
            "'", // ’ (U+2019) in UTF-8
4345
            "'", // ‚ (U+201A) in UTF-8
4346
            "'", // ‛ (U+201B) in UTF-8
4347
            '"', // “ (U+201C) in UTF-8
4348
            '"', // ” (U+201D) in UTF-8
4349
            '"', // „ (U+201E) in UTF-8
4350
            '"', // ‟ (U+201F) in UTF-8
4351
            "'", // ‹ (U+2039) in UTF-8
4352
            "'", // › (U+203A) in UTF-8
4353
            '-', // – (U+2013) in UTF-8
4354
            '-', // — (U+2014) in UTF-8
4355
            '...', // … (U+2026) in UTF-8
4356
        ];
4357
4358 38
        return \str_replace($keys, $values, $str);
4359
    }
4360
4361
    /**
4362
     * Normalize the whitespace.
4363
     *
4364
     * @param string $str                     <p>The string to be normalized.</p>
4365
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4366
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4367
     *                                        bidirectional text chars.</p>
4368
     *
4369
     * @return string
4370
     */
4371 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4372
    {
4373 86
        if ($str === '') {
4374 9
            return '';
4375
        }
4376
4377 86
        static $WHITESPACE_CACHE = [];
4378 86
        $cacheKey = (int) $keepNonBreakingSpace;
4379
4380 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4381 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4382
4383 2
            if ($keepNonBreakingSpace === true) {
4384 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4385
            }
4386
4387 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4388
        }
4389
4390 86
        if ($keepBidiUnicodeControls === false) {
4391 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4392
4393 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4394 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4395
            }
4396
4397 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4398
        }
4399
4400 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4401
    }
4402
4403
    /**
4404
     * Calculates Unicode code point of the given UTF-8 encoded character.
4405
     *
4406
     * INFO: opposite to UTF8::chr()
4407
     *
4408
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4409
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4410
     *
4411
     * @return int
4412
     *             Unicode code point of the given character,<br>
4413
     *             0 on invalid UTF-8 byte sequence
4414
     */
4415 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4416
    {
4417 30
        static $CHAR_CACHE = [];
4418
4419
        // init
4420 30
        $chr = (string) $chr;
4421
4422 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4423 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4424
        }
4425
4426 30
        $cacheKey = $chr . $encoding;
4427 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4428 30
            return $CHAR_CACHE[$cacheKey];
4429
        }
4430
4431
        // check again, if it's still not UTF-8
4432 12
        if ($encoding !== 'UTF-8') {
4433 3
            $chr = self::encode($encoding, $chr);
4434
        }
4435
4436 12
        if (self::$ORD === null) {
4437
            self::$ORD = self::getData('ord');
4438
        }
4439
4440 12
        if (isset(self::$ORD[$chr])) {
4441 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4442
        }
4443
4444
        //
4445
        // fallback via "IntlChar"
4446
        //
4447
4448 6
        if (self::$SUPPORT['intlChar'] === true) {
4449
            /** @noinspection PhpComposerExtensionStubsInspection */
4450 5
            $code = \IntlChar::ord($chr);
4451 5
            if ($code) {
4452 5
                return $CHAR_CACHE[$cacheKey] = $code;
4453
            }
4454
        }
4455
4456
        //
4457
        // fallback via vanilla php
4458
        //
4459
4460
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4461 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4462 1
        $code = $chr ? $chr[1] : 0;
4463
4464 1
        if ($code >= 0xF0 && isset($chr[4])) {
4465
            /** @noinspection UnnecessaryCastingInspection */
4466
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4467
        }
4468
4469 1
        if ($code >= 0xE0 && isset($chr[3])) {
4470
            /** @noinspection UnnecessaryCastingInspection */
4471 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4472
        }
4473
4474 1
        if ($code >= 0xC0 && isset($chr[2])) {
4475
            /** @noinspection UnnecessaryCastingInspection */
4476 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4477
        }
4478
4479
        return $CHAR_CACHE[$cacheKey] = $code;
4480
    }
4481
4482
    /**
4483
     * Parses the string into an array (into the the second parameter).
4484
     *
4485
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4486
     *          if the second parameter is not set!
4487
     *
4488
     * @see http://php.net/manual/en/function.parse-str.php
4489
     *
4490
     * @param string $str       <p>The input string.</p>
4491
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4492
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4493
     *
4494
     * @return bool
4495
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4496
     */
4497 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4498
    {
4499 2
        if ($cleanUtf8 === true) {
4500 2
            $str = self::clean($str);
4501
        }
4502
4503 2
        if (self::$SUPPORT['mbstring'] === true) {
4504 2
            $return = \mb_parse_str($str, $result);
4505
4506 2
            return $return !== false && $result !== [];
4507
        }
4508
4509
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4510
        \parse_str($str, $result);
4511
4512
        return $result !== [];
4513
    }
4514
4515
    /**
4516
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4517
     *
4518
     * @return bool
4519
     *              <strong>true</strong> if support is available,<br>
4520
     *              <strong>false</strong> otherwise
4521
     */
4522 102
    public static function pcre_utf8_support(): bool
4523
    {
4524
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4525 102
        return (bool) @\preg_match('//u', '');
4526
    }
4527
4528
    /**
4529
     * Create an array containing a range of UTF-8 characters.
4530
     *
4531
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4532
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4533
     *
4534
     * @return string[]
4535
     */
4536 2
    public static function range($var1, $var2): array
4537
    {
4538 2
        if (!$var1 || !$var2) {
4539 2
            return [];
4540
        }
4541
4542 2
        if (self::$SUPPORT['ctype'] === false) {
4543
            throw new \RuntimeException('ext-ctype: is not installed');
4544
        }
4545
4546
        /** @noinspection PhpComposerExtensionStubsInspection */
4547 2
        if (\ctype_digit((string) $var1)) {
4548 2
            $start = (int) $var1;
4549 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4550
            $start = (int) self::hex_to_int($var1);
4551
        } else {
4552 2
            $start = self::ord($var1);
4553
        }
4554
4555 2
        if (!$start) {
4556
            return [];
4557
        }
4558
4559
        /** @noinspection PhpComposerExtensionStubsInspection */
4560 2
        if (\ctype_digit((string) $var2)) {
4561 2
            $end = (int) $var2;
4562 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4563
            $end = (int) self::hex_to_int($var2);
4564
        } else {
4565 2
            $end = self::ord($var2);
4566
        }
4567
4568 2
        if (!$end) {
4569
            return [];
4570
        }
4571
4572 2
        return \array_map(
4573
            static function (int $i): string {
4574 2
                return (string) self::chr($i);
4575 2
            },
4576 2
            \range($start, $end)
4577
        );
4578
    }
4579
4580
    /**
4581
     * Multi decode html entity & fix urlencoded-win1252-chars.
4582
     *
4583
     * e.g:
4584
     * 'test+test'                     => 'test+test'
4585
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4586
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4587
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4588
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4589
     * 'Düsseldorf'                   => 'Düsseldorf'
4590
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4591
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4592
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4593
     *
4594
     * @param string $str          <p>The input string.</p>
4595
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4596
     *
4597
     * @return string
4598
     */
4599 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4600
    {
4601 3
        if ($str === '') {
4602 2
            return '';
4603
        }
4604
4605
        if (
4606 3
            \strpos($str, '&') === false
4607
            &&
4608 3
            \strpos($str, '%') === false
4609
            &&
4610 3
            \strpos($str, '+') === false
4611
            &&
4612 3
            \strpos($str, '\u') === false
4613
        ) {
4614 2
            return self::fix_simple_utf8($str);
4615
        }
4616
4617 3
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
4618 3
        if (\preg_match($pattern, $str)) {
4619 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4620
        }
4621
4622 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4623
4624 3
        if ($multi_decode === true) {
4625
            do {
4626 3
                $str_compare = $str;
4627
4628
                /**
4629
                 * @psalm-suppress PossiblyInvalidArgument
4630
                 */
4631 3
                $str = self::fix_simple_utf8(
4632 3
                    \rawurldecode(
4633 3
                        self::html_entity_decode(
4634 3
                            self::to_utf8($str),
4635 3
                            $flags
4636
                        )
4637
                    )
4638
                );
4639 3
            } while ($str_compare !== $str);
4640
        }
4641
4642 3
        return $str;
4643
    }
4644
4645
    /**
4646
     * Replaces all occurrences of $pattern in $str by $replacement.
4647
     *
4648
     * @param string $str         <p>The input string.</p>
4649
     * @param string $pattern     <p>The regular expression pattern.</p>
4650
     * @param string $replacement <p>The string to replace with.</p>
4651
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4652
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4653
     *
4654
     * @return string
4655
     */
4656 18
    public static function regex_replace(
4657
        string $str,
4658
        string $pattern,
4659
        string $replacement,
4660
        string $options = '',
4661
        string $delimiter = '/'
4662
    ): string {
4663 18
        if ($options === 'msr') {
4664 9
            $options = 'ms';
4665
        }
4666
4667
        // fallback
4668 18
        if (!$delimiter) {
4669
            $delimiter = '/';
4670
        }
4671
4672 18
        return (string) \preg_replace(
4673 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4674 18
            $replacement,
4675 18
            $str
4676
        );
4677
    }
4678
4679
    /**
4680
     * alias for "UTF8::remove_bom()"
4681
     *
4682
     * @see        UTF8::remove_bom()
4683
     *
4684
     * @param string $str
4685
     *
4686
     * @return string
4687
     *
4688
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4689
     */
4690
    public static function removeBOM(string $str): string
4691
    {
4692
        return self::remove_bom($str);
4693
    }
4694
4695
    /**
4696
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4697
     *
4698
     * @param string $str <p>The input string.</p>
4699
     *
4700
     * @return string string without UTF-BOM
4701
     */
4702 82
    public static function remove_bom(string $str): string
4703
    {
4704 82
        if ($str === '') {
4705 9
            return '';
4706
        }
4707
4708 82
        $strLength = \strlen($str);
4709 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4710 82
            if (\strpos($str, $bomString, 0) === 0) {
4711 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4712 10
                if ($strTmp === false) {
4713
                    return '';
4714
                }
4715
4716 10
                $strLength -= (int) $bomByteLength;
4717
4718 82
                $str = (string) $strTmp;
4719
            }
4720
        }
4721
4722 82
        return $str;
4723
    }
4724
4725
    /**
4726
     * Removes duplicate occurrences of a string in another string.
4727
     *
4728
     * @param string          $str  <p>The base string.</p>
4729
     * @param string|string[] $what <p>String to search for in the base string.</p>
4730
     *
4731
     * @return string the result string with removed duplicates
4732
     */
4733 2
    public static function remove_duplicates(string $str, $what = ' '): string
4734
    {
4735 2
        if (\is_string($what) === true) {
4736 2
            $what = [$what];
4737
        }
4738
4739 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4740
            /** @noinspection ForeachSourceInspection */
4741 2
            foreach ($what as $item) {
4742 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4743
            }
4744
        }
4745
4746 2
        return $str;
4747
    }
4748
4749
    /**
4750
     * Remove html via "strip_tags()" from the string.
4751
     *
4752
     * @param string $str
4753
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4754
     *                              not be stripped. Default: null
4755
     *                              </p>
4756
     *
4757
     * @return string
4758
     */
4759 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4760
    {
4761 6
        return \strip_tags($str, $allowableTags);
4762
    }
4763
4764
    /**
4765
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4766
     *
4767
     * @param string $str
4768
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4769
     *
4770
     * @return string
4771
     */
4772 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4773
    {
4774 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4775
    }
4776
4777
    /**
4778
     * Remove invisible characters from a string.
4779
     *
4780
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4781
     *
4782
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4783
     *
4784
     * @param string $str
4785
     * @param bool   $url_encoded
4786
     * @param string $replacement
4787
     *
4788
     * @return string
4789
     */
4790 115
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4791
    {
4792
        // init
4793 115
        $non_displayables = [];
4794
4795
        // every control character except newline (dec 10),
4796
        // carriage return (dec 13) and horizontal tab (dec 09)
4797 115
        if ($url_encoded) {
4798 115
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4799 115
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4800
        }
4801
4802 115
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4803
4804
        do {
4805 115
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4806 115
        } while ($count !== 0);
4807
4808 115
        return $str;
4809
    }
4810
4811
    /**
4812
     * Returns a new string with the prefix $substring removed, if present.
4813
     *
4814
     * @param string $str
4815
     * @param string $substring <p>The prefix to remove.</p>
4816
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4817
     *
4818
     * @return string string without the prefix $substring
4819
     */
4820 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4821
    {
4822 12
        if ($substring && \strpos($str, $substring) === 0) {
4823 6
            if ($encoding === 'UTF-8') {
4824 4
                return (string) \mb_substr(
4825 4
                    $str,
4826 4
                    (int) \mb_strlen($substring)
4827
                );
4828
            }
4829
4830 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4831
4832 2
            return (string) self::substr(
4833 2
                $str,
4834 2
                (int) self::strlen($substring, $encoding),
4835 2
                null,
4836 2
                $encoding
4837
            );
4838
        }
4839
4840 6
        return $str;
4841
    }
4842
4843
    /**
4844
     * Returns a new string with the suffix $substring removed, if present.
4845
     *
4846
     * @param string $str
4847
     * @param string $substring <p>The suffix to remove.</p>
4848
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4849
     *
4850
     * @return string string having a $str without the suffix $substring
4851
     */
4852 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4853
    {
4854 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4855 6
            if ($encoding === 'UTF-8') {
4856 4
                return (string) \mb_substr(
4857 4
                    $str,
4858 4
                    0,
4859 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4860
                );
4861
            }
4862
4863 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4864
4865 2
            return (string) self::substr(
4866 2
                $str,
4867 2
                0,
4868 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4869 2
                $encoding
4870
            );
4871
        }
4872
4873 6
        return $str;
4874
    }
4875
4876
    /**
4877
     * Replaces all occurrences of $search in $str by $replacement.
4878
     *
4879
     * @param string $str           <p>The input string.</p>
4880
     * @param string $search        <p>The needle to search for.</p>
4881
     * @param string $replacement   <p>The string to replace with.</p>
4882
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4883
     *
4884
     * @return string string after the replacements
4885
     */
4886 29
    public static function replace(
4887
        string $str,
4888
        string $search,
4889
        string $replacement,
4890
        bool $caseSensitive = true
4891
    ): string {
4892 29
        if ($caseSensitive) {
4893 22
            return \str_replace($search, $replacement, $str);
4894
        }
4895
4896 7
        return self::str_ireplace($search, $replacement, $str);
4897
    }
4898
4899
    /**
4900
     * Replaces all occurrences of $search in $str by $replacement.
4901
     *
4902
     * @param string       $str           <p>The input string.</p>
4903
     * @param array        $search        <p>The elements to search for.</p>
4904
     * @param array|string $replacement   <p>The string to replace with.</p>
4905
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4906
     *
4907
     * @return string string after the replacements
4908
     */
4909 30
    public static function replace_all(
4910
        string $str,
4911
        array $search,
4912
        $replacement,
4913
        bool $caseSensitive = true
4914
    ): string {
4915 30
        if ($caseSensitive) {
4916 23
            return \str_replace($search, $replacement, $str);
4917
        }
4918
4919 7
        return self::str_ireplace($search, $replacement, $str);
4920
    }
4921
4922
    /**
4923
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4924
     *
4925
     * @param string $str                <p>The input string</p>
4926
     * @param string $replacementChar    <p>The replacement character.</p>
4927
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4928
     *
4929
     * @return string
4930
     */
4931 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4932
    {
4933 62
        if ($str === '') {
4934 9
            return '';
4935
        }
4936
4937 62
        if ($processInvalidUtf8 === true) {
4938 62
            $replacementCharHelper = $replacementChar;
4939 62
            if ($replacementChar === '') {
4940 62
                $replacementCharHelper = 'none';
4941
            }
4942
4943 62
            if (self::$SUPPORT['mbstring'] === false) {
4944
                // if there is no native support for "mbstring",
4945
                // then we need to clean the string before ...
4946
                $str = self::clean($str);
4947
            }
4948
4949 62
            $save = \mb_substitute_character();
4950 62
            \mb_substitute_character($replacementCharHelper);
4951
            // the polyfill maybe return false, so cast to string
4952 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4953 62
            \mb_substitute_character($save);
4954
        }
4955
4956 62
        return \str_replace(
4957
            [
4958 62
                "\xEF\xBF\xBD",
4959
                '�',
4960
            ],
4961
            [
4962 62
                $replacementChar,
4963 62
                $replacementChar,
4964
            ],
4965 62
            $str
4966
        );
4967
    }
4968
4969
    /**
4970
     * Strip whitespace or other characters from end of a UTF-8 string.
4971
     *
4972
     * @param string      $str   <p>The string to be trimmed.</p>
4973
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4974
     *
4975
     * @return string the string with unwanted characters stripped from the right
4976
     */
4977 20
    public static function rtrim(string $str = '', string $chars = null): string
4978
    {
4979 20
        if ($str === '') {
4980 3
            return '';
4981
        }
4982
4983 19
        if ($chars) {
4984 8
            $chars = \preg_quote($chars, '/');
4985 8
            $pattern = "[${chars}]+\$";
4986
        } else {
4987 14
            $pattern = "[\s]+\$";
4988
        }
4989
4990 19
        if (self::$SUPPORT['mbstring'] === true) {
4991
            /** @noinspection PhpComposerExtensionStubsInspection */
4992 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4993
        }
4994
4995
        return self::regex_replace($str, $pattern, '', '', '/');
4996
    }
4997
4998
    /**
4999
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5000
     */
5001 2
    public static function showSupport()
5002
    {
5003 2
        echo '<pre>';
5004 2
        foreach (self::$SUPPORT as $key => &$value) {
5005 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5006
        }
5007 2
        unset($value);
5008 2
        echo '</pre>';
5009 2
    }
5010
5011
    /**
5012
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5013
     *
5014
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5015
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5016
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5017
     *
5018
     * @return string the HTML numbered entity
5019
     */
5020 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5021
    {
5022 2
        if ($char === '') {
5023 2
            return '';
5024
        }
5025
5026
        if (
5027 2
            $keepAsciiChars === true
5028
            &&
5029 2
            self::is_ascii($char) === true
5030
        ) {
5031 2
            return $char;
5032
        }
5033
5034 2
        return '&#' . self::ord($char, $encoding) . ';';
5035
    }
5036
5037
    /**
5038
     * @param string $str
5039
     * @param int    $tabLength
5040
     *
5041
     * @return string
5042
     */
5043 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5044
    {
5045 5
        if ($tabLength === 4) {
5046 3
            $tab = '    ';
5047 2
        } elseif ($tabLength === 2) {
5048 1
            $tab = '  ';
5049
        } else {
5050 1
            $tab = \str_repeat(' ', $tabLength);
5051
        }
5052
5053 5
        return \str_replace($tab, "\t", $str);
5054
    }
5055
5056
    /**
5057
     * Convert a string to an array of Unicode characters.
5058
     *
5059
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5060
     * @param int                       $length             [optional] <p>Max character length of each array
5061
     *                                                      element.</p>
5062
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5063
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5064
     *                                                      "mb_substr"</p>
5065
     *
5066
     * @return array
5067
     *               <p>An array containing chunks of the input.</p>
5068
     */
5069 87
    public static function str_split(
5070
        $str,
5071
        int $length = 1,
5072
        bool $cleanUtf8 = false,
5073
        bool $tryToUseMbFunction = true
5074
    ): array {
5075 87
        if ($length <= 0) {
5076 3
            return [];
5077
        }
5078
5079 86
        if (\is_array($str) === true) {
5080 2
            foreach ($str as $k => &$v) {
5081 2
                $v = self::str_split(
5082 2
                    $v,
5083 2
                    $length,
5084 2
                    $cleanUtf8,
5085 2
                    $tryToUseMbFunction
5086
                );
5087
            }
5088
5089 2
            return $str;
5090
        }
5091
5092
        // init
5093 86
        $str = (string) $str;
5094
5095 86
        if ($str === '') {
5096 13
            return [];
5097
        }
5098
5099 83
        if ($cleanUtf8 === true) {
5100 19
            $str = self::clean($str);
5101
        }
5102
5103
        if (
5104 83
            $tryToUseMbFunction === true
5105
            &&
5106 83
            self::$SUPPORT['mbstring'] === true
5107
        ) {
5108 79
            $iMax = \mb_strlen($str);
5109 79
            if ($iMax <= 127) {
5110 73
                $ret = [];
5111 73
                for ($i = 0; $i < $iMax; ++$i) {
5112 73
                    $ret[] = \mb_substr($str, $i, 1);
5113
                }
5114
            } else {
5115 15
                $retArray = [];
5116 15
                \preg_match_all('/./us', $str, $retArray);
5117 79
                $ret = $retArray[0] ?? [];
5118
            }
5119 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5120 17
            $retArray = [];
5121 17
            \preg_match_all('/./us', $str, $retArray);
5122 17
            $ret = $retArray[0] ?? [];
5123
        } else {
5124
5125
            // fallback
5126
5127 8
            $ret = [];
5128 8
            $len = \strlen($str);
5129
5130
            /** @noinspection ForeachInvariantsInspection */
5131 8
            for ($i = 0; $i < $len; ++$i) {
5132 8
                if (($str[$i] & "\x80") === "\x00") {
5133 8
                    $ret[] = $str[$i];
5134
                } elseif (
5135 8
                    isset($str[$i + 1])
5136
                    &&
5137 8
                    ($str[$i] & "\xE0") === "\xC0"
5138
                ) {
5139 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5140 4
                        $ret[] = $str[$i] . $str[$i + 1];
5141
5142 4
                        ++$i;
5143
                    }
5144
                } elseif (
5145 6
                    isset($str[$i + 2])
5146
                    &&
5147 6
                    ($str[$i] & "\xF0") === "\xE0"
5148
                ) {
5149
                    if (
5150 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5151
                        &&
5152 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5153
                    ) {
5154 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5155
5156 6
                        $i += 2;
5157
                    }
5158
                } elseif (
5159
                    isset($str[$i + 3])
5160
                    &&
5161
                    ($str[$i] & "\xF8") === "\xF0"
5162
                ) {
5163
                    if (
5164
                        ($str[$i + 1] & "\xC0") === "\x80"
5165
                        &&
5166
                        ($str[$i + 2] & "\xC0") === "\x80"
5167
                        &&
5168
                        ($str[$i + 3] & "\xC0") === "\x80"
5169
                    ) {
5170
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5171
5172
                        $i += 3;
5173
                    }
5174
                }
5175
            }
5176
        }
5177
5178 83
        if ($length > 1) {
5179 11
            $ret = \array_chunk($ret, $length);
5180
5181 11
            return \array_map(
5182
                static function (array &$item): string {
5183 11
                    return \implode('', $item);
5184 11
                },
5185 11
                $ret
5186
            );
5187
        }
5188
5189 76
        if (isset($ret[0]) && $ret[0] === '') {
5190
            return [];
5191
        }
5192
5193 76
        return $ret;
5194
    }
5195
5196
    /**
5197
     * Returns a camelCase version of the string. Trims surrounding spaces,
5198
     * capitalizes letters following digits, spaces, dashes and underscores,
5199
     * and removes spaces, dashes, as well as underscores.
5200
     *
5201
     * @param string      $str                   <p>The input string.</p>
5202
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5203
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5204
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5205
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5206
     *
5207
     * @return string
5208
     */
5209 32
    public static function str_camelize(
5210
        string $str,
5211
        string $encoding = 'UTF-8',
5212
        bool $cleanUtf8 = false,
5213
        string $lang = null,
5214
        bool $tryToKeepStringLength = false
5215
    ): string {
5216 32
        if ($cleanUtf8 === true) {
5217
            $str = self::clean($str);
5218
        }
5219
5220 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5221 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5222
        }
5223
5224 32
        $str = self::lcfirst(
5225 32
            \trim($str),
5226 32
            $encoding,
5227 32
            false,
5228 32
            $lang,
5229 32
            $tryToKeepStringLength
5230
        );
5231 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5232
5233 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5234
5235 32
        $str = (string) \preg_replace_callback(
5236 32
            '/[-_\s]+(.)?/u',
5237
            /**
5238
             * @param array $match
5239
             *
5240
             * @return string
5241
             */
5242
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5243 27
                if (isset($match[1])) {
5244 27
                    if ($useMbFunction === true) {
5245 27
                        if ($encoding === 'UTF-8') {
5246 27
                            return \mb_strtoupper($match[1]);
5247
                        }
5248
5249
                        return \mb_strtoupper($match[1], $encoding);
5250
                    }
5251
5252
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5253
                }
5254
5255 1
                return '';
5256 32
            },
5257 32
            $str
5258
        );
5259
5260 32
        return (string) \preg_replace_callback(
5261 32
            '/[\d]+(.)?/u',
5262
            /**
5263
             * @param array $match
5264
             *
5265
             * @return string
5266
             */
5267
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5268 6
                if ($useMbFunction === true) {
5269 6
                    if ($encoding === 'UTF-8') {
5270 6
                        return \mb_strtoupper($match[0]);
5271
                    }
5272
5273
                    return \mb_strtoupper($match[0], $encoding);
5274
                }
5275
5276
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5277 32
            },
5278 32
            $str
5279
        );
5280
    }
5281
5282
    /**
5283
     * Returns the string with the first letter of each word capitalized,
5284
     * except for when the word is a name which shouldn't be capitalized.
5285
     *
5286
     * @param string $str
5287
     *
5288
     * @return string string with $str capitalized
5289
     */
5290 1
    public static function str_capitalize_name(string $str): string
5291
    {
5292 1
        return self::str_capitalize_name_helper(
5293 1
            self::str_capitalize_name_helper(
5294 1
                self::collapse_whitespace($str),
5295 1
                ' '
5296
            ),
5297 1
            '-'
5298
        );
5299
    }
5300
5301
    /**
5302
     * Returns true if the string contains $needle, false otherwise. By default
5303
     * the comparison is case-sensitive, but can be made insensitive by setting
5304
     * $caseSensitive to false.
5305
     *
5306
     * @param string $haystack      <p>The input string.</p>
5307
     * @param string $needle        <p>Substring to look for.</p>
5308
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5309
     *
5310
     * @return bool whether or not $haystack contains $needle
5311
     */
5312 21
    public static function str_contains(
5313
        string $haystack,
5314
        string $needle,
5315
        bool $caseSensitive = true
5316
    ): bool {
5317 21
        if ($caseSensitive) {
5318 11
            return \strpos($haystack, $needle) !== false;
5319
        }
5320
5321 10
        return \mb_stripos($haystack, $needle) !== false;
5322
    }
5323
5324
    /**
5325
     * Returns true if the string contains all $needles, false otherwise. By
5326
     * default the comparison is case-sensitive, but can be made insensitive by
5327
     * setting $caseSensitive to false.
5328
     *
5329
     * @param string $haystack      <p>The input string.</p>
5330
     * @param array  $needles       <p>SubStrings to look for.</p>
5331
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5332
     *
5333
     * @return bool whether or not $haystack contains $needle
5334
     */
5335 44
    public static function str_contains_all(
5336
        string $haystack,
5337
        array $needles,
5338
        bool $caseSensitive = true
5339
    ): bool {
5340 44
        if ($haystack === '' || $needles === []) {
5341 1
            return false;
5342
        }
5343
5344
        /** @noinspection LoopWhichDoesNotLoopInspection */
5345 43
        foreach ($needles as &$needle) {
5346 43
            if (!$needle) {
5347 1
                return false;
5348
            }
5349
5350 42
            if ($caseSensitive) {
5351 22
                return \strpos($haystack, $needle) !== false;
5352
            }
5353
5354 20
            return \mb_stripos($haystack, $needle) !== false;
5355
        }
5356
5357
        return true;
5358
    }
5359
5360
    /**
5361
     * Returns true if the string contains any $needles, false otherwise. By
5362
     * default the comparison is case-sensitive, but can be made insensitive by
5363
     * setting $caseSensitive to false.
5364
     *
5365
     * @param string $haystack      <p>The input string.</p>
5366
     * @param array  $needles       <p>SubStrings to look for.</p>
5367
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5368
     *
5369
     * @return bool
5370
     *              Whether or not $str contains $needle
5371
     */
5372 43
    public static function str_contains_any(
5373
        string $haystack,
5374
        array $needles,
5375
        bool $caseSensitive = true
5376
    ): bool {
5377 43
        if ($haystack === '' || $needles === []) {
5378 1
            return false;
5379
        }
5380
5381
        /** @noinspection LoopWhichDoesNotLoopInspection */
5382 42
        foreach ($needles as &$needle) {
5383 42
            if (!$needle) {
5384
                return false;
5385
            }
5386
5387 42
            if ($caseSensitive) {
5388 22
                return \strpos($haystack, $needle) !== false;
5389
            }
5390
5391 20
            return \mb_stripos($haystack, $needle) !== false;
5392
        }
5393
5394
        return false;
5395
    }
5396
5397
    /**
5398
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5399
     * inserted before uppercase characters (with the exception of the first
5400
     * character of the string), and in place of spaces as well as underscores.
5401
     *
5402
     * @param string $str      <p>The input string.</p>
5403
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5404
     *
5405
     * @return string
5406
     */
5407 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5408
    {
5409 19
        return self::str_delimit($str, '-', $encoding);
5410
    }
5411
5412
    /**
5413
     * Returns a lowercase and trimmed string separated by the given delimiter.
5414
     * Delimiters are inserted before uppercase characters (with the exception
5415
     * of the first character of the string), and in place of spaces, dashes,
5416
     * and underscores. Alpha delimiters are not converted to lowercase.
5417
     *
5418
     * @param string      $str                   <p>The input string.</p>
5419
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5420
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5421
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5422
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5423
     *                                           tr</p>
5424
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5425
     *                                           ß</p>
5426
     *
5427
     * @return string
5428
     */
5429 49
    public static function str_delimit(
5430
        string $str,
5431
        string $delimiter,
5432
        string $encoding = 'UTF-8',
5433
        bool $cleanUtf8 = false,
5434
        string $lang = null,
5435
        bool $tryToKeepStringLength = false
5436
    ): string {
5437 49
        if (self::$SUPPORT['mbstring'] === true) {
5438
            /** @noinspection PhpComposerExtensionStubsInspection */
5439 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5440
5441 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5442 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5443 22
                $str = \mb_strtolower($str);
5444
            } else {
5445 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5446
            }
5447
5448
            /** @noinspection PhpComposerExtensionStubsInspection */
5449 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5450
        }
5451
5452
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5453
5454
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5455
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5456
            $str = \mb_strtolower($str);
5457
        } else {
5458
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5459
        }
5460
5461
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5462
    }
5463
5464
    /**
5465
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5466
     *
5467
     * @param string $str <p>The input string.</p>
5468
     *
5469
     * @return false|string
5470
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5471
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5472
     */
5473 30
    public static function str_detect_encoding($str)
5474
    {
5475
        // init
5476 30
        $str = (string) $str;
5477
5478
        //
5479
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5480
        //
5481
5482 30
        if (self::is_binary($str, true) === true) {
5483 11
            $isUtf16 = self::is_utf16($str, false);
5484 11
            if ($isUtf16 === 1) {
5485 2
                return 'UTF-16LE';
5486
            }
5487 11
            if ($isUtf16 === 2) {
5488 2
                return 'UTF-16BE';
5489
            }
5490
5491 9
            $isUtf32 = self::is_utf32($str, false);
5492 9
            if ($isUtf32 === 1) {
5493
                return 'UTF-32LE';
5494
            }
5495 9
            if ($isUtf32 === 2) {
5496
                return 'UTF-32BE';
5497
            }
5498
5499
            // is binary but not "UTF-16" or "UTF-32"
5500 9
            return false;
5501
        }
5502
5503
        //
5504
        // 2.) simple check for ASCII chars
5505
        //
5506
5507 26
        if (self::is_ascii($str) === true) {
5508 9
            return 'ASCII';
5509
        }
5510
5511
        //
5512
        // 3.) simple check for UTF-8 chars
5513
        //
5514
5515 26
        if (self::is_utf8($str) === true) {
5516 19
            return 'UTF-8';
5517
        }
5518
5519
        //
5520
        // 4.) check via "mb_detect_encoding()"
5521
        //
5522
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5523
5524
        $detectOrder = [
5525 15
            'ISO-8859-1',
5526
            'ISO-8859-2',
5527
            'ISO-8859-3',
5528
            'ISO-8859-4',
5529
            'ISO-8859-5',
5530
            'ISO-8859-6',
5531
            'ISO-8859-7',
5532
            'ISO-8859-8',
5533
            'ISO-8859-9',
5534
            'ISO-8859-10',
5535
            'ISO-8859-13',
5536
            'ISO-8859-14',
5537
            'ISO-8859-15',
5538
            'ISO-8859-16',
5539
            'WINDOWS-1251',
5540
            'WINDOWS-1252',
5541
            'WINDOWS-1254',
5542
            'CP932',
5543
            'CP936',
5544
            'CP950',
5545
            'CP866',
5546
            'CP850',
5547
            'CP51932',
5548
            'CP50220',
5549
            'CP50221',
5550
            'CP50222',
5551
            'ISO-2022-JP',
5552
            'ISO-2022-KR',
5553
            'JIS',
5554
            'JIS-ms',
5555
            'EUC-CN',
5556
            'EUC-JP',
5557
        ];
5558
5559 15
        if (self::$SUPPORT['mbstring'] === true) {
5560
            // info: do not use the symfony polyfill here
5561 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5562 15
            if ($encoding) {
5563 15
                return $encoding;
5564
            }
5565
        }
5566
5567
        //
5568
        // 5.) check via "iconv()"
5569
        //
5570
5571
        if (self::$ENCODINGS === null) {
5572
            self::$ENCODINGS = self::getData('encodings');
5573
        }
5574
5575
        foreach (self::$ENCODINGS as $encodingTmp) {
5576
            // INFO: //IGNORE but still throw notice
5577
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5578
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5579
                return $encodingTmp;
5580
            }
5581
        }
5582
5583
        return false;
5584
    }
5585
5586
    /**
5587
     * Check if the string ends with the given substring.
5588
     *
5589
     * @param string $haystack <p>The string to search in.</p>
5590
     * @param string $needle   <p>The substring to search for.</p>
5591
     *
5592
     * @return bool
5593
     */
5594 9
    public static function str_ends_with(string $haystack, string $needle): bool
5595
    {
5596 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5597
    }
5598
5599
    /**
5600
     * Returns true if the string ends with any of $substrings, false otherwise.
5601
     *
5602
     * - case-sensitive
5603
     *
5604
     * @param string   $str        <p>The input string.</p>
5605
     * @param string[] $substrings <p>Substrings to look for.</p>
5606
     *
5607
     * @return bool whether or not $str ends with $substring
5608
     */
5609 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5610
    {
5611 7
        if ($substrings === []) {
5612
            return false;
5613
        }
5614
5615 7
        foreach ($substrings as &$substring) {
5616 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5617 7
                return true;
5618
            }
5619
        }
5620
5621 6
        return false;
5622
    }
5623
5624
    /**
5625
     * Ensures that the string begins with $substring. If it doesn't, it's
5626
     * prepended.
5627
     *
5628
     * @param string $str       <p>The input string.</p>
5629
     * @param string $substring <p>The substring to add if not present.</p>
5630
     *
5631
     * @return string
5632
     */
5633 10
    public static function str_ensure_left(string $str, string $substring): string
5634
    {
5635
        if (
5636 10
            $substring !== ''
5637
            &&
5638 10
            \strpos($str, $substring) === 0
5639
        ) {
5640 6
            return $str;
5641
        }
5642
5643 4
        return $substring . $str;
5644
    }
5645
5646
    /**
5647
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5648
     *
5649
     * @param string $str       <p>The input string.</p>
5650
     * @param string $substring <p>The substring to add if not present.</p>
5651
     *
5652
     * @return string
5653
     */
5654 10
    public static function str_ensure_right(string $str, string $substring): string
5655
    {
5656
        if (
5657 10
            $str === ''
5658
            ||
5659 10
            $substring === ''
5660
            ||
5661 10
            \substr($str, -\strlen($substring)) !== $substring
5662
        ) {
5663 4
            $str .= $substring;
5664
        }
5665
5666 10
        return $str;
5667
    }
5668
5669
    /**
5670
     * Capitalizes the first word of the string, replaces underscores with
5671
     * spaces, and strips '_id'.
5672
     *
5673
     * @param string $str
5674
     *
5675
     * @return string
5676
     */
5677 3
    public static function str_humanize($str): string
5678
    {
5679 3
        $str = \str_replace(
5680
            [
5681 3
                '_id',
5682
                '_',
5683
            ],
5684
            [
5685 3
                '',
5686
                ' ',
5687
            ],
5688 3
            $str
5689
        );
5690
5691 3
        return self::ucfirst(\trim($str));
5692
    }
5693
5694
    /**
5695
     * Check if the string ends with the given substring, case insensitive.
5696
     *
5697
     * @param string $haystack <p>The string to search in.</p>
5698
     * @param string $needle   <p>The substring to search for.</p>
5699
     *
5700
     * @return bool
5701
     */
5702 12
    public static function str_iends_with(string $haystack, string $needle): bool
5703
    {
5704 12
        if ($haystack === '' || $needle === '') {
5705 2
            return false;
5706
        }
5707
5708 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5709
    }
5710
5711
    /**
5712
     * Returns true if the string ends with any of $substrings, false otherwise.
5713
     *
5714
     * - case-insensitive
5715
     *
5716
     * @param string   $str        <p>The input string.</p>
5717
     * @param string[] $substrings <p>Substrings to look for.</p>
5718
     *
5719
     * @return bool whether or not $str ends with $substring
5720
     */
5721 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5722
    {
5723 4
        if ($substrings === []) {
5724
            return false;
5725
        }
5726
5727 4
        foreach ($substrings as &$substring) {
5728 4
            if (self::str_iends_with($str, $substring)) {
5729 4
                return true;
5730
            }
5731
        }
5732
5733
        return false;
5734
    }
5735
5736
    /**
5737
     * Returns the index of the first occurrence of $needle in the string,
5738
     * and false if not found. Accepts an optional offset from which to begin
5739
     * the search.
5740
     *
5741
     * @param string $str      <p>The input string.</p>
5742
     * @param string $needle   <p>Substring to look for.</p>
5743
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5745
     *
5746
     * @return false|int
5747
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5748
     */
5749 2
    public static function str_iindex_first(
5750
        string $str,
5751
        string $needle,
5752
        int $offset = 0,
5753
        string $encoding = 'UTF-8'
5754
    ) {
5755 2
        return self::stripos(
5756 2
            $str,
5757 2
            $needle,
5758 2
            $offset,
5759 2
            $encoding
5760
        );
5761
    }
5762
5763
    /**
5764
     * Returns the index of the last occurrence of $needle in the string,
5765
     * and false if not found. Accepts an optional offset from which to begin
5766
     * the search. Offsets may be negative to count from the last character
5767
     * in the string.
5768
     *
5769
     * @param string $str      <p>The input string.</p>
5770
     * @param string $needle   <p>Substring to look for.</p>
5771
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5772
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5773
     *
5774
     * @return false|int
5775
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5776
     */
5777
    public static function str_iindex_last(
5778
        string $str,
5779
        string $needle,
5780
        int $offset = 0,
5781
        string $encoding = 'UTF-8'
5782
    ) {
5783
        return self::strripos(
5784
            $str,
5785
            $needle,
5786
            $offset,
5787
            $encoding
5788
        );
5789
    }
5790
5791
    /**
5792
     * Returns the index of the first occurrence of $needle in the string,
5793
     * and false if not found. Accepts an optional offset from which to begin
5794
     * the search.
5795
     *
5796
     * @param string $str      <p>The input string.</p>
5797
     * @param string $needle   <p>Substring to look for.</p>
5798
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5799
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5800
     *
5801
     * @return false|int
5802
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5803
     */
5804 10
    public static function str_index_first(
5805
        string $str,
5806
        string $needle,
5807
        int $offset = 0,
5808
        string $encoding = 'UTF-8'
5809
    ) {
5810 10
        return self::strpos(
5811 10
            $str,
5812 10
            $needle,
5813 10
            $offset,
5814 10
            $encoding
5815
        );
5816
    }
5817
5818
    /**
5819
     * Returns the index of the last occurrence of $needle in the string,
5820
     * and false if not found. Accepts an optional offset from which to begin
5821
     * the search. Offsets may be negative to count from the last character
5822
     * in the string.
5823
     *
5824
     * @param string $str      <p>The input string.</p>
5825
     * @param string $needle   <p>Substring to look for.</p>
5826
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5827
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5828
     *
5829
     * @return false|int
5830
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5831
     */
5832 10
    public static function str_index_last(
5833
        string $str,
5834
        string $needle,
5835
        int $offset = 0,
5836
        string $encoding = 'UTF-8'
5837
    ) {
5838 10
        return self::strrpos(
5839 10
            $str,
5840 10
            $needle,
5841 10
            $offset,
5842 10
            $encoding
5843
        );
5844
    }
5845
5846
    /**
5847
     * Inserts $substring into the string at the $index provided.
5848
     *
5849
     * @param string $str       <p>The input string.</p>
5850
     * @param string $substring <p>String to be inserted.</p>
5851
     * @param int    $index     <p>The index at which to insert the substring.</p>
5852
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5853
     *
5854
     * @return string
5855
     */
5856 8
    public static function str_insert(
5857
        string $str,
5858
        string $substring,
5859
        int $index,
5860
        string $encoding = 'UTF-8'
5861
    ): string {
5862 8
        if ($encoding === 'UTF-8') {
5863 4
            $len = (int) \mb_strlen($str);
5864 4
            if ($index > $len) {
5865
                return $str;
5866
            }
5867
5868
            /** @noinspection UnnecessaryCastingInspection */
5869 4
            return (string) \mb_substr($str, 0, $index) .
5870 4
                   $substring .
5871 4
                   (string) \mb_substr($str, $index, $len);
5872
        }
5873
5874 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5875
5876 4
        $len = (int) self::strlen($str, $encoding);
5877 4
        if ($index > $len) {
5878 1
            return $str;
5879
        }
5880
5881 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5882 3
               $substring .
5883 3
               ((string) self::substr($str, $index, $len, $encoding));
5884
    }
5885
5886
    /**
5887
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5888
     *
5889
     * @see  http://php.net/manual/en/function.str-ireplace.php
5890
     *
5891
     * @param mixed $search  <p>
5892
     *                       Every replacement with search array is
5893
     *                       performed on the result of previous replacement.
5894
     *                       </p>
5895
     * @param mixed $replace <p>
5896
     *                       </p>
5897
     * @param mixed $subject <p>
5898
     *                       If subject is an array, then the search and
5899
     *                       replace is performed with every entry of
5900
     *                       subject, and the return value is an array as
5901
     *                       well.
5902
     *                       </p>
5903
     * @param int   $count   [optional] <p>
5904
     *                       The number of matched and replaced needles will
5905
     *                       be returned in count which is passed by
5906
     *                       reference.
5907
     *                       </p>
5908
     *
5909
     * @return mixed a string or an array of replacements
5910
     */
5911 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5912
    {
5913 29
        $search = (array) $search;
5914
5915
        /** @noinspection AlterInForeachInspection */
5916 29
        foreach ($search as &$s) {
5917 29
            $s = (string) $s;
5918 29
            if ($s === '') {
5919 6
                $s = '/^(?<=.)$/';
5920
            } else {
5921 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5922
            }
5923
        }
5924
5925 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5926 29
        $count = $replace; // used as reference parameter
5927
5928 29
        return $subject;
5929
    }
5930
5931
    /**
5932
     * Replaces $search from the beginning of string with $replacement.
5933
     *
5934
     * @param string $str         <p>The input string.</p>
5935
     * @param string $search      <p>The string to search for.</p>
5936
     * @param string $replacement <p>The replacement.</p>
5937
     *
5938
     * @return string string after the replacements
5939
     */
5940 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5941
    {
5942 17
        if ($str === '') {
5943 4
            if ($replacement === '') {
5944 2
                return '';
5945
            }
5946
5947 2
            if ($search === '') {
5948 2
                return $replacement;
5949
            }
5950
        }
5951
5952 13
        if ($search === '') {
5953 2
            return $str . $replacement;
5954
        }
5955
5956 11
        if (\stripos($str, $search) === 0) {
5957 10
            return $replacement . \substr($str, \strlen($search));
5958
        }
5959
5960 1
        return $str;
5961
    }
5962
5963
    /**
5964
     * Replaces $search from the ending of string with $replacement.
5965
     *
5966
     * @param string $str         <p>The input string.</p>
5967
     * @param string $search      <p>The string to search for.</p>
5968
     * @param string $replacement <p>The replacement.</p>
5969
     *
5970
     * @return string string after the replacements
5971
     */
5972 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5973
    {
5974 17
        if ($str === '') {
5975 4
            if ($replacement === '') {
5976 2
                return '';
5977
            }
5978
5979 2
            if ($search === '') {
5980 2
                return $replacement;
5981
            }
5982
        }
5983
5984 13
        if ($search === '') {
5985 2
            return $str . $replacement;
5986
        }
5987
5988 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5989 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5990
        }
5991
5992 11
        return $str;
5993
    }
5994
5995
    /**
5996
     * Check if the string starts with the given substring, case insensitive.
5997
     *
5998
     * @param string $haystack <p>The string to search in.</p>
5999
     * @param string $needle   <p>The substring to search for.</p>
6000
     *
6001
     * @return bool
6002
     */
6003 12
    public static function str_istarts_with(string $haystack, string $needle): bool
6004
    {
6005 12
        if ($haystack === '' || $needle === '') {
6006 2
            return false;
6007
        }
6008
6009 12
        return self::stripos($haystack, $needle) === 0;
6010
    }
6011
6012
    /**
6013
     * Returns true if the string begins with any of $substrings, false otherwise.
6014
     *
6015
     * - case-insensitive
6016
     *
6017
     * @param string $str        <p>The input string.</p>
6018
     * @param array  $substrings <p>Substrings to look for.</p>
6019
     *
6020
     * @return bool whether or not $str starts with $substring
6021
     */
6022 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
6023
    {
6024 4
        if ($str === '') {
6025
            return false;
6026
        }
6027
6028 4
        if ($substrings === []) {
6029
            return false;
6030
        }
6031
6032 4
        foreach ($substrings as &$substring) {
6033 4
            if (self::str_istarts_with($str, $substring)) {
6034 4
                return true;
6035
            }
6036
        }
6037
6038
        return false;
6039
    }
6040
6041
    /**
6042
     * Gets the substring after the first occurrence of a separator.
6043
     *
6044
     * @param string $str       <p>The input string.</p>
6045
     * @param string $separator <p>The string separator.</p>
6046
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6047
     *
6048
     * @return string
6049
     */
6050 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6051
    {
6052 1
        if ($separator === '' || $str === '') {
6053 1
            return '';
6054
        }
6055
6056 1
        $offset = self::str_iindex_first($str, $separator);
6057 1
        if ($offset === false) {
6058 1
            return '';
6059
        }
6060
6061 1
        if ($encoding === 'UTF-8') {
6062 1
            return (string) \mb_substr(
6063 1
                $str,
6064 1
                $offset + (int) \mb_strlen($separator)
6065
            );
6066
        }
6067
6068
        return (string) self::substr(
6069
            $str,
6070
            $offset + (int) self::strlen($separator, $encoding),
6071
            null,
6072
            $encoding
6073
        );
6074
    }
6075
6076
    /**
6077
     * Gets the substring after the last occurrence of a separator.
6078
     *
6079
     * @param string $str       <p>The input string.</p>
6080
     * @param string $separator <p>The string separator.</p>
6081
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6082
     *
6083
     * @return string
6084
     */
6085 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6086
    {
6087 1
        if ($separator === '' || $str === '') {
6088 1
            return '';
6089
        }
6090
6091 1
        $offset = self::strripos($str, $separator);
6092 1
        if ($offset === false) {
6093 1
            return '';
6094
        }
6095
6096 1
        if ($encoding === 'UTF-8') {
6097 1
            return (string) \mb_substr(
6098 1
                $str,
6099 1
                $offset + (int) self::strlen($separator)
6100
            );
6101
        }
6102
6103
        return (string) self::substr(
6104
            $str,
6105
            $offset + (int) self::strlen($separator, $encoding),
6106
            null,
6107
            $encoding
6108
        );
6109
    }
6110
6111
    /**
6112
     * Gets the substring before the first occurrence of a separator.
6113
     *
6114
     * @param string $str       <p>The input string.</p>
6115
     * @param string $separator <p>The string separator.</p>
6116
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6117
     *
6118
     * @return string
6119
     */
6120 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6121
    {
6122 1
        if ($separator === '' || $str === '') {
6123 1
            return '';
6124
        }
6125
6126 1
        $offset = self::str_iindex_first($str, $separator);
6127 1
        if ($offset === false) {
6128 1
            return '';
6129
        }
6130
6131 1
        if ($encoding === 'UTF-8') {
6132 1
            return (string) \mb_substr($str, 0, $offset);
6133
        }
6134
6135
        return (string) self::substr($str, 0, $offset, $encoding);
6136
    }
6137
6138
    /**
6139
     * Gets the substring before the last occurrence of a separator.
6140
     *
6141
     * @param string $str       <p>The input string.</p>
6142
     * @param string $separator <p>The string separator.</p>
6143
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6144
     *
6145
     * @return string
6146
     */
6147 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6148
    {
6149 1
        if ($separator === '' || $str === '') {
6150 1
            return '';
6151
        }
6152
6153 1
        if ($encoding === 'UTF-8') {
6154 1
            $offset = \mb_strripos($str, $separator);
6155 1
            if ($offset === false) {
6156 1
                return '';
6157
            }
6158
6159 1
            return (string) \mb_substr($str, 0, $offset);
6160
        }
6161
6162
        $offset = self::strripos($str, $separator, 0, $encoding);
6163
        if ($offset === false) {
6164
            return '';
6165
        }
6166
6167
        return (string) self::substr($str, 0, $offset, $encoding);
6168
    }
6169
6170
    /**
6171
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6172
     *
6173
     * @param string $str          <p>The input string.</p>
6174
     * @param string $needle       <p>The string to look for.</p>
6175
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6176
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6177
     *
6178
     * @return string
6179
     */
6180 2
    public static function str_isubstr_first(
6181
        string $str,
6182
        string $needle,
6183
        bool $beforeNeedle = false,
6184
        string $encoding = 'UTF-8'
6185
    ): string {
6186
        if (
6187 2
            $needle === ''
6188
            ||
6189 2
            $str === ''
6190
        ) {
6191 2
            return '';
6192
        }
6193
6194 2
        $part = self::stristr(
6195 2
            $str,
6196 2
            $needle,
6197 2
            $beforeNeedle,
6198 2
            $encoding
6199
        );
6200 2
        if ($part === false) {
6201 2
            return '';
6202
        }
6203
6204 2
        return $part;
6205
    }
6206
6207
    /**
6208
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6209
     *
6210
     * @param string $str          <p>The input string.</p>
6211
     * @param string $needle       <p>The string to look for.</p>
6212
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6213
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6214
     *
6215
     * @return string
6216
     */
6217 1
    public static function str_isubstr_last(
6218
        string $str,
6219
        string $needle,
6220
        bool $beforeNeedle = false,
6221
        string $encoding = 'UTF-8'
6222
    ): string {
6223
        if (
6224 1
            $needle === ''
6225
            ||
6226 1
            $str === ''
6227
        ) {
6228 1
            return '';
6229
        }
6230
6231 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6232 1
        if ($part === false) {
6233 1
            return '';
6234
        }
6235
6236 1
        return $part;
6237
    }
6238
6239
    /**
6240
     * Returns the last $n characters of the string.
6241
     *
6242
     * @param string $str      <p>The input string.</p>
6243
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6244
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6245
     *
6246
     * @return string
6247
     */
6248 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6249
    {
6250 12
        if ($str === '' || $n <= 0) {
6251 4
            return '';
6252
        }
6253
6254 8
        if ($encoding === 'UTF-8') {
6255 4
            return (string) \mb_substr($str, -$n);
6256
        }
6257
6258 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6259
6260 4
        return (string) self::substr($str, -$n, null, $encoding);
6261
    }
6262
6263
    /**
6264
     * Limit the number of characters in a string.
6265
     *
6266
     * @param string $str      <p>The input string.</p>
6267
     * @param int    $length   [optional] <p>Default: 100</p>
6268
     * @param string $strAddOn [optional] <p>Default: …</p>
6269
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6270
     *
6271
     * @return string
6272
     */
6273 2
    public static function str_limit(
6274
        string $str,
6275
        int $length = 100,
6276
        string $strAddOn = '…',
6277
        string $encoding = 'UTF-8'
6278
    ): string {
6279 2
        if ($str === '' || $length <= 0) {
6280 2
            return '';
6281
        }
6282
6283 2
        if ($encoding === 'UTF-8') {
6284 2
            if ((int) \mb_strlen($str) <= $length) {
6285 2
                return $str;
6286
            }
6287
6288
            /** @noinspection UnnecessaryCastingInspection */
6289 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6290
        }
6291
6292
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6293
6294
        if ((int) self::strlen($str, $encoding) <= $length) {
6295
            return $str;
6296
        }
6297
6298
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6299
    }
6300
6301
    /**
6302
     * Limit the number of characters in a string, but also after the next word.
6303
     *
6304
     * @param string $str      <p>The input string.</p>
6305
     * @param int    $length   [optional] <p>Default: 100</p>
6306
     * @param string $strAddOn [optional] <p>Default: …</p>
6307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6308
     *
6309
     * @return string
6310
     */
6311 6
    public static function str_limit_after_word(
6312
        string $str,
6313
        int $length = 100,
6314
        string $strAddOn = '…',
6315
        string $encoding = 'UTF-8'
6316
    ): string {
6317 6
        if ($str === '' || $length <= 0) {
6318 2
            return '';
6319
        }
6320
6321 6
        if ($encoding === 'UTF-8') {
6322
            /** @noinspection UnnecessaryCastingInspection */
6323 2
            if ((int) \mb_strlen($str) <= $length) {
6324 2
                return $str;
6325
            }
6326
6327 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6328 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6329
            }
6330
6331 2
            $str = \mb_substr($str, 0, $length);
6332
6333 2
            $array = \explode(' ', $str);
6334 2
            \array_pop($array);
6335 2
            $new_str = \implode(' ', $array);
6336
6337 2
            if ($new_str === '') {
6338 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6339
            }
6340
        } else {
6341 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6342
                return $str;
6343
            }
6344
6345 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6346 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6347
            }
6348
6349 1
            $str = self::substr($str, 0, $length, $encoding);
6350 1
            if ($str === false) {
6351
                return '' . $strAddOn;
6352
            }
6353
6354 1
            $array = \explode(' ', $str);
6355 1
            \array_pop($array);
6356 1
            $new_str = \implode(' ', $array);
6357
6358 1
            if ($new_str === '') {
6359
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6360
            }
6361
        }
6362
6363 3
        return $new_str . $strAddOn;
6364
    }
6365
6366
    /**
6367
     * Returns the longest common prefix between the string and $otherStr.
6368
     *
6369
     * @param string $str      <p>The input sting.</p>
6370
     * @param string $otherStr <p>Second string for comparison.</p>
6371
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6372
     *
6373
     * @return string
6374
     */
6375 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6376
    {
6377
        // init
6378 10
        $longestCommonPrefix = '';
6379
6380 10
        if ($encoding === 'UTF-8') {
6381 5
            $maxLength = (int) \min(
6382 5
                \mb_strlen($str),
6383 5
                \mb_strlen($otherStr)
6384
            );
6385
6386 5
            for ($i = 0; $i < $maxLength; ++$i) {
6387 4
                $char = \mb_substr($str, $i, 1);
6388
6389
                if (
6390 4
                    $char !== false
6391
                    &&
6392 4
                    $char === \mb_substr($otherStr, $i, 1)
6393
                ) {
6394 3
                    $longestCommonPrefix .= $char;
6395
                } else {
6396 3
                    break;
6397
                }
6398
            }
6399
        } else {
6400 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6401
6402 5
            $maxLength = (int) \min(
6403 5
                self::strlen($str, $encoding),
6404 5
                self::strlen($otherStr, $encoding)
6405
            );
6406
6407 5
            for ($i = 0; $i < $maxLength; ++$i) {
6408 4
                $char = self::substr($str, $i, 1, $encoding);
6409
6410
                if (
6411 4
                    $char !== false
6412
                    &&
6413 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6414
                ) {
6415 3
                    $longestCommonPrefix .= $char;
6416
                } else {
6417 3
                    break;
6418
                }
6419
            }
6420
        }
6421
6422 10
        return $longestCommonPrefix;
6423
    }
6424
6425
    /**
6426
     * Returns the longest common substring between the string and $otherStr.
6427
     * In the case of ties, it returns that which occurs first.
6428
     *
6429
     * @param string $str
6430
     * @param string $otherStr <p>Second string for comparison.</p>
6431
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6432
     *
6433
     * @return string string with its $str being the longest common substring
6434
     */
6435 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6436
    {
6437 11
        if ($str === '' || $otherStr === '') {
6438 2
            return '';
6439
        }
6440
6441
        // Uses dynamic programming to solve
6442
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6443
6444 9
        if ($encoding === 'UTF-8') {
6445 4
            $strLength = (int) \mb_strlen($str);
6446 4
            $otherLength = (int) \mb_strlen($otherStr);
6447
        } else {
6448 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6449
6450 5
            $strLength = (int) self::strlen($str, $encoding);
6451 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6452
        }
6453
6454
        // Return if either string is empty
6455 9
        if ($strLength === 0 || $otherLength === 0) {
6456
            return '';
6457
        }
6458
6459 9
        $len = 0;
6460 9
        $end = 0;
6461 9
        $table = \array_fill(
6462 9
            0,
6463 9
            $strLength + 1,
6464 9
            \array_fill(0, $otherLength + 1, 0)
6465
        );
6466
6467 9
        if ($encoding === 'UTF-8') {
6468 9
            for ($i = 1; $i <= $strLength; ++$i) {
6469 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6470 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6471 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6472
6473 9
                    if ($strChar === $otherChar) {
6474 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6475 8
                        if ($table[$i][$j] > $len) {
6476 8
                            $len = $table[$i][$j];
6477 8
                            $end = $i;
6478
                        }
6479
                    } else {
6480 9
                        $table[$i][$j] = 0;
6481
                    }
6482
                }
6483
            }
6484
        } else {
6485
            for ($i = 1; $i <= $strLength; ++$i) {
6486
                for ($j = 1; $j <= $otherLength; ++$j) {
6487
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6488
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6489
6490
                    if ($strChar === $otherChar) {
6491
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6492
                        if ($table[$i][$j] > $len) {
6493
                            $len = $table[$i][$j];
6494
                            $end = $i;
6495
                        }
6496
                    } else {
6497
                        $table[$i][$j] = 0;
6498
                    }
6499
                }
6500
            }
6501
        }
6502
6503 9
        if ($encoding === 'UTF-8') {
6504 9
            return (string) \mb_substr($str, $end - $len, $len);
6505
        }
6506
6507
        return (string) self::substr($str, $end - $len, $len, $encoding);
6508
    }
6509
6510
    /**
6511
     * Returns the longest common suffix between the string and $otherStr.
6512
     *
6513
     * @param string $str
6514
     * @param string $otherStr <p>Second string for comparison.</p>
6515
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6516
     *
6517
     * @return string
6518
     */
6519 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6520
    {
6521 10
        if ($str === '' || $otherStr === '') {
6522 2
            return '';
6523
        }
6524
6525 8
        if ($encoding === 'UTF-8') {
6526 4
            $maxLength = (int) \min(
6527 4
                \mb_strlen($str, $encoding),
6528 4
                \mb_strlen($otherStr, $encoding)
6529
            );
6530
6531 4
            $longestCommonSuffix = '';
6532 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6533 4
                $char = \mb_substr($str, -$i, 1);
6534
6535
                if (
6536 4
                    $char !== false
6537
                    &&
6538 4
                    $char === \mb_substr($otherStr, -$i, 1)
6539
                ) {
6540 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6541
                } else {
6542 3
                    break;
6543
                }
6544
            }
6545
        } else {
6546 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6547
6548 4
            $maxLength = (int) \min(
6549 4
                self::strlen($str, $encoding),
6550 4
                self::strlen($otherStr, $encoding)
6551
            );
6552
6553 4
            $longestCommonSuffix = '';
6554 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6555 4
                $char = self::substr($str, -$i, 1, $encoding);
6556
6557
                if (
6558 4
                    $char !== false
6559
                    &&
6560 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6561
                ) {
6562 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6563
                } else {
6564 3
                    break;
6565
                }
6566
            }
6567
        }
6568
6569 8
        return $longestCommonSuffix;
6570
    }
6571
6572
    /**
6573
     * Returns true if $str matches the supplied pattern, false otherwise.
6574
     *
6575
     * @param string $str     <p>The input string.</p>
6576
     * @param string $pattern <p>Regex pattern to match against.</p>
6577
     *
6578
     * @return bool whether or not $str matches the pattern
6579
     */
6580
    public static function str_matches_pattern(string $str, string $pattern): bool
6581
    {
6582
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6583
    }
6584
6585
    /**
6586
     * Returns whether or not a character exists at an index. Offsets may be
6587
     * negative to count from the last character in the string. Implements
6588
     * part of the ArrayAccess interface.
6589
     *
6590
     * @param string $str      <p>The input string.</p>
6591
     * @param int    $offset   <p>The index to check.</p>
6592
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6593
     *
6594
     * @return bool whether or not the index exists
6595
     */
6596 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6597
    {
6598
        // init
6599 6
        $length = (int) self::strlen($str, $encoding);
6600
6601 6
        if ($offset >= 0) {
6602 3
            return $length > $offset;
6603
        }
6604
6605 3
        return $length >= \abs($offset);
6606
    }
6607
6608
    /**
6609
     * Returns the character at the given index. Offsets may be negative to
6610
     * count from the last character in the string. Implements part of the
6611
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6612
     * does not exist.
6613
     *
6614
     * @param string $str      <p>The input string.</p>
6615
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6616
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6617
     *
6618
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6619
     *
6620
     * @return string the character at the specified index
6621
     */
6622 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6623
    {
6624
        // init
6625 2
        $length = (int) self::strlen($str);
6626
6627
        if (
6628 2
            ($index >= 0 && $length <= $index)
6629
            ||
6630 2
            $length < \abs($index)
6631
        ) {
6632 1
            throw new \OutOfBoundsException('No character exists at the index');
6633
        }
6634
6635 1
        return self::char_at($str, $index, $encoding);
6636
    }
6637
6638
    /**
6639
     * Pad a UTF-8 string to given length with another string.
6640
     *
6641
     * @param string     $str        <p>The input string.</p>
6642
     * @param int        $pad_length <p>The length of return string.</p>
6643
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6644
     * @param int|string $pad_type   [optional] <p>
6645
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6646
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6647
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6648
     *                               </p>
6649
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6650
     *
6651
     * @return string returns the padded string
6652
     */
6653 41
    public static function str_pad(
6654
        string $str,
6655
        int $pad_length,
6656
        string $pad_string = ' ',
6657
        $pad_type = \STR_PAD_RIGHT,
6658
        string $encoding = 'UTF-8'
6659
    ): string {
6660 41
        if ($pad_length === 0 || $pad_string === '') {
6661 1
            return $str;
6662
        }
6663
6664 41
        if ($pad_type !== (int) $pad_type) {
6665 13
            if ($pad_type === 'left') {
6666 3
                $pad_type = \STR_PAD_LEFT;
6667 10
            } elseif ($pad_type === 'right') {
6668 6
                $pad_type = \STR_PAD_RIGHT;
6669 4
            } elseif ($pad_type === 'both') {
6670 3
                $pad_type = \STR_PAD_BOTH;
6671
            } else {
6672 1
                throw new \InvalidArgumentException(
6673 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6674
                );
6675
            }
6676
        }
6677
6678 40
        if ($encoding === 'UTF-8') {
6679 25
            $str_length = (int) \mb_strlen($str);
6680
6681 25
            if ($pad_length >= $str_length) {
6682
                switch ($pad_type) {
6683 25
                    case \STR_PAD_LEFT:
6684 8
                        $ps_length = (int) \mb_strlen($pad_string);
6685
6686 8
                        $diff = ($pad_length - $str_length);
6687
6688 8
                        $pre = (string) \mb_substr(
6689 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6690 8
                            0,
6691 8
                            $diff
6692
                        );
6693 8
                        $post = '';
6694
6695 8
                        break;
6696
6697 20
                    case \STR_PAD_BOTH:
6698 14
                        $diff = ($pad_length - $str_length);
6699
6700 14
                        $ps_length_left = (int) \floor($diff / 2);
6701
6702 14
                        $ps_length_right = (int) \ceil($diff / 2);
6703
6704 14
                        $pre = (string) \mb_substr(
6705 14
                            \str_repeat($pad_string, $ps_length_left),
6706 14
                            0,
6707 14
                            $ps_length_left
6708
                        );
6709 14
                        $post = (string) \mb_substr(
6710 14
                            \str_repeat($pad_string, $ps_length_right),
6711 14
                            0,
6712 14
                            $ps_length_right
6713
                        );
6714
6715 14
                        break;
6716
6717 9
                    case \STR_PAD_RIGHT:
6718
                    default:
6719 9
                        $ps_length = (int) \mb_strlen($pad_string);
6720
6721 9
                        $diff = ($pad_length - $str_length);
6722
6723 9
                        $post = (string) \mb_substr(
6724 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6725 9
                            0,
6726 9
                            $diff
6727
                        );
6728 9
                        $pre = '';
6729
                }
6730
6731 25
                return $pre . $str . $post;
6732
            }
6733
6734 3
            return $str;
6735
        }
6736
6737 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6738
6739 15
        $str_length = (int) self::strlen($str, $encoding);
6740
6741 15
        if ($pad_length >= $str_length) {
6742
            switch ($pad_type) {
6743 14
                case \STR_PAD_LEFT:
6744 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6745
6746 5
                    $diff = ($pad_length - $str_length);
6747
6748 5
                    $pre = (string) self::substr(
6749 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6750 5
                        0,
6751 5
                        $diff,
6752 5
                        $encoding
6753
                    );
6754 5
                    $post = '';
6755
6756 5
                    break;
6757
6758 9
                case \STR_PAD_BOTH:
6759 3
                    $diff = ($pad_length - $str_length);
6760
6761 3
                    $ps_length_left = (int) \floor($diff / 2);
6762
6763 3
                    $ps_length_right = (int) \ceil($diff / 2);
6764
6765 3
                    $pre = (string) self::substr(
6766 3
                        \str_repeat($pad_string, $ps_length_left),
6767 3
                        0,
6768 3
                        $ps_length_left,
6769 3
                        $encoding
6770
                    );
6771 3
                    $post = (string) self::substr(
6772 3
                        \str_repeat($pad_string, $ps_length_right),
6773 3
                        0,
6774 3
                        $ps_length_right,
6775 3
                        $encoding
6776
                    );
6777
6778 3
                    break;
6779
6780 6
                case \STR_PAD_RIGHT:
6781
                default:
6782 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6783
6784 6
                    $diff = ($pad_length - $str_length);
6785
6786 6
                    $post = (string) self::substr(
6787 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6788 6
                        0,
6789 6
                        $diff,
6790 6
                        $encoding
6791
                    );
6792 6
                    $pre = '';
6793
            }
6794
6795 14
            return $pre . $str . $post;
6796
        }
6797
6798 1
        return $str;
6799
    }
6800
6801
    /**
6802
     * Returns a new string of a given length such that both sides of the
6803
     * string are padded. Alias for pad() with a $padType of 'both'.
6804
     *
6805
     * @param string $str
6806
     * @param int    $length   <p>Desired string length after padding.</p>
6807
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6808
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6809
     *
6810
     * @return string string with padding applied
6811
     */
6812 11
    public static function str_pad_both(
6813
        string $str,
6814
        int $length,
6815
        string $padStr = ' ',
6816
        string $encoding = 'UTF-8'
6817
    ): string {
6818 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6819
    }
6820
6821
    /**
6822
     * Returns a new string of a given length such that the beginning of the
6823
     * string is padded. Alias for pad() with a $padType of 'left'.
6824
     *
6825
     * @param string $str
6826
     * @param int    $length   <p>Desired string length after padding.</p>
6827
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6828
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6829
     *
6830
     * @return string string with left padding
6831
     */
6832 7
    public static function str_pad_left(
6833
        string $str,
6834
        int $length,
6835
        string $padStr = ' ',
6836
        string $encoding = 'UTF-8'
6837
    ): string {
6838 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6839
    }
6840
6841
    /**
6842
     * Returns a new string of a given length such that the end of the string
6843
     * is padded. Alias for pad() with a $padType of 'right'.
6844
     *
6845
     * @param string $str
6846
     * @param int    $length   <p>Desired string length after padding.</p>
6847
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6848
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6849
     *
6850
     * @return string string with right padding
6851
     */
6852 7
    public static function str_pad_right(
6853
        string $str,
6854
        int $length,
6855
        string $padStr = ' ',
6856
        string $encoding = 'UTF-8'
6857
    ): string {
6858 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6859
    }
6860
6861
    /**
6862
     * Repeat a string.
6863
     *
6864
     * @param string $str        <p>
6865
     *                           The string to be repeated.
6866
     *                           </p>
6867
     * @param int    $multiplier <p>
6868
     *                           Number of time the input string should be
6869
     *                           repeated.
6870
     *                           </p>
6871
     *                           <p>
6872
     *                           multiplier has to be greater than or equal to 0.
6873
     *                           If the multiplier is set to 0, the function
6874
     *                           will return an empty string.
6875
     *                           </p>
6876
     *
6877
     * @return string the repeated string
6878
     */
6879 9
    public static function str_repeat(string $str, int $multiplier): string
6880
    {
6881 9
        $str = self::filter($str);
6882
6883 9
        return \str_repeat($str, $multiplier);
6884
    }
6885
6886
    /**
6887
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6888
     *
6889
     * Replace all occurrences of the search string with the replacement string
6890
     *
6891
     * @see http://php.net/manual/en/function.str-replace.php
6892
     *
6893
     * @param mixed $search  <p>
6894
     *                       The value being searched for, otherwise known as the needle.
6895
     *                       An array may be used to designate multiple needles.
6896
     *                       </p>
6897
     * @param mixed $replace <p>
6898
     *                       The replacement value that replaces found search
6899
     *                       values. An array may be used to designate multiple replacements.
6900
     *                       </p>
6901
     * @param mixed $subject <p>
6902
     *                       The string or array being searched and replaced on,
6903
     *                       otherwise known as the haystack.
6904
     *                       </p>
6905
     *                       <p>
6906
     *                       If subject is an array, then the search and
6907
     *                       replace is performed with every entry of
6908
     *                       subject, and the return value is an array as
6909
     *                       well.
6910
     *                       </p>
6911
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6912
     *
6913
     * @return mixed this function returns a string or an array with the replaced values
6914
     */
6915 12
    public static function str_replace(
6916
        $search,
6917
        $replace,
6918
        $subject,
6919
        int &$count = null
6920
    ) {
6921
        /** @psalm-suppress PossiblyNullArgument */
6922 12
        return \str_replace($search, $replace, $subject, $count);
6923
    }
6924
6925
    /**
6926
     * Replaces $search from the beginning of string with $replacement.
6927
     *
6928
     * @param string $str         <p>The input string.</p>
6929
     * @param string $search      <p>The string to search for.</p>
6930
     * @param string $replacement <p>The replacement.</p>
6931
     *
6932
     * @return string string after the replacements
6933
     */
6934 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6935
    {
6936 17
        if ($str === '') {
6937 4
            if ($replacement === '') {
6938 2
                return '';
6939
            }
6940
6941 2
            if ($search === '') {
6942 2
                return $replacement;
6943
            }
6944
        }
6945
6946 13
        if ($search === '') {
6947 2
            return $str . $replacement;
6948
        }
6949
6950 11
        if (\strpos($str, $search) === 0) {
6951 9
            return $replacement . \substr($str, \strlen($search));
6952
        }
6953
6954 2
        return $str;
6955
    }
6956
6957
    /**
6958
     * Replaces $search from the ending of string with $replacement.
6959
     *
6960
     * @param string $str         <p>The input string.</p>
6961
     * @param string $search      <p>The string to search for.</p>
6962
     * @param string $replacement <p>The replacement.</p>
6963
     *
6964
     * @return string string after the replacements
6965
     */
6966 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6967
    {
6968 17
        if ($str === '') {
6969 4
            if ($replacement === '') {
6970 2
                return '';
6971
            }
6972
6973 2
            if ($search === '') {
6974 2
                return $replacement;
6975
            }
6976
        }
6977
6978 13
        if ($search === '') {
6979 2
            return $str . $replacement;
6980
        }
6981
6982 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6983 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6984
        }
6985
6986 11
        return $str;
6987
    }
6988
6989
    /**
6990
     * Replace the first "$search"-term with the "$replace"-term.
6991
     *
6992
     * @param string $search
6993
     * @param string $replace
6994
     * @param string $subject
6995
     *
6996
     * @return string
6997
     *
6998
     * @psalm-suppress InvalidReturnType
6999
     */
7000 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
7001
    {
7002 2
        $pos = self::strpos($subject, $search);
7003
7004 2
        if ($pos !== false) {
7005
            /** @psalm-suppress InvalidReturnStatement */
7006 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7007
        }
7008
7009 2
        return $subject;
7010
    }
7011
7012
    /**
7013
     * Replace the last "$search"-term with the "$replace"-term.
7014
     *
7015
     * @param string $search
7016
     * @param string $replace
7017
     * @param string $subject
7018
     *
7019
     * @return string
7020
     *
7021
     * @psalm-suppress InvalidReturnType
7022
     */
7023 2
    public static function str_replace_last(
7024
        string $search,
7025
        string $replace,
7026
        string $subject
7027
    ): string {
7028 2
        $pos = self::strrpos($subject, $search);
7029 2
        if ($pos !== false) {
7030
            /** @psalm-suppress InvalidReturnStatement */
7031 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7032
        }
7033
7034 2
        return $subject;
7035
    }
7036
7037
    /**
7038
     * Shuffles all the characters in the string.
7039
     *
7040
     * PS: uses random algorithm which is weak for cryptography purposes
7041
     *
7042
     * @param string $str      <p>The input string</p>
7043
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7044
     *
7045
     * @return string the shuffled string
7046
     */
7047 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7048
    {
7049 5
        if ($encoding === 'UTF-8') {
7050 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7051
            /** @noinspection NonSecureShuffleUsageInspection */
7052 5
            \shuffle($indexes);
7053
7054
            // init
7055 5
            $shuffledStr = '';
7056
7057 5
            foreach ($indexes as &$i) {
7058 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7059 5
                if ($tmpSubStr !== false) {
7060 5
                    $shuffledStr .= $tmpSubStr;
7061
                }
7062
            }
7063
        } else {
7064
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7065
7066
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7067
            /** @noinspection NonSecureShuffleUsageInspection */
7068
            \shuffle($indexes);
7069
7070
            // init
7071
            $shuffledStr = '';
7072
7073
            foreach ($indexes as &$i) {
7074
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7075
                if ($tmpSubStr !== false) {
7076
                    $shuffledStr .= $tmpSubStr;
7077
                }
7078
            }
7079
        }
7080
7081 5
        return $shuffledStr;
7082
    }
7083
7084
    /**
7085
     * Returns the substring beginning at $start, and up to, but not including
7086
     * the index specified by $end. If $end is omitted, the function extracts
7087
     * the remaining string. If $end is negative, it is computed from the end
7088
     * of the string.
7089
     *
7090
     * @param string $str
7091
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7092
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7093
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7094
     *
7095
     * @return false|string
7096
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7097
     *                      characters long, <b>FALSE</b> will be returned.
7098
     */
7099 18
    public static function str_slice(
7100
        string $str,
7101
        int $start,
7102
        int $end = null,
7103
        string $encoding = 'UTF-8'
7104
    ) {
7105 18
        if ($encoding === 'UTF-8') {
7106 7
            if ($end === null) {
7107 1
                $length = (int) \mb_strlen($str);
7108 6
            } elseif ($end >= 0 && $end <= $start) {
7109 2
                return '';
7110 4
            } elseif ($end < 0) {
7111 1
                $length = (int) \mb_strlen($str) + $end - $start;
7112
            } else {
7113 3
                $length = $end - $start;
7114
            }
7115
7116 5
            return \mb_substr($str, $start, $length);
7117
        }
7118
7119 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7120
7121 11
        if ($end === null) {
7122 5
            $length = (int) self::strlen($str, $encoding);
7123 6
        } elseif ($end >= 0 && $end <= $start) {
7124 2
            return '';
7125 4
        } elseif ($end < 0) {
7126 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7127
        } else {
7128 3
            $length = $end - $start;
7129
        }
7130
7131 9
        return self::substr($str, $start, $length, $encoding);
7132
    }
7133
7134
    /**
7135
     * Convert a string to e.g.: "snake_case"
7136
     *
7137
     * @param string $str
7138
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7139
     *
7140
     * @return string string in snake_case
7141
     */
7142 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7143
    {
7144 20
        if ($str === '') {
7145
            return '';
7146
        }
7147
7148 20
        $str = \str_replace(
7149 20
            '-',
7150 20
            '_',
7151 20
            self::normalize_whitespace($str)
7152
        );
7153
7154 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7155 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7156
        }
7157
7158 20
        $str = (string) \preg_replace_callback(
7159 20
            '/([\d|\p{Lu}])/u',
7160
            /**
7161
             * @param string[] $matches
7162
             *
7163
             * @return string
7164
             */
7165
            static function (array $matches) use ($encoding): string {
7166 9
                $match = $matches[1];
7167 9
                $matchInt = (int) $match;
7168
7169 9
                if ((string) $matchInt === $match) {
7170 4
                    return '_' . $match . '_';
7171
                }
7172
7173 5
                if ($encoding === 'UTF-8') {
7174 5
                    return '_' . \mb_strtolower($match);
7175
                }
7176
7177
                return '_' . self::strtolower($match, $encoding);
7178 20
            },
7179 20
            $str
7180
        );
7181
7182 20
        $str = (string) \preg_replace(
7183
            [
7184 20
                '/\s+/u',        // convert spaces to "_"
7185
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7186
                '/_+/',         // remove double "_"
7187
            ],
7188
            [
7189 20
                '_',
7190
                '',
7191
                '_',
7192
            ],
7193 20
            $str
7194
        );
7195
7196 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7197
    }
7198
7199
    /**
7200
     * Sort all characters according to code points.
7201
     *
7202
     * @param string $str    <p>A UTF-8 string.</p>
7203
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7204
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7205
     *
7206
     * @return string string of sorted characters
7207
     */
7208 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7209
    {
7210 2
        $array = self::codepoints($str);
7211
7212 2
        if ($unique) {
7213 2
            $array = \array_flip(\array_flip($array));
7214
        }
7215
7216 2
        if ($desc) {
7217 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7217
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7218
        } else {
7219 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7219
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7220
        }
7221
7222 2
        return self::string($array);
7223
    }
7224
7225
    /**
7226
     * alias for "UTF8::str_split()"
7227
     *
7228
     * @see UTF8::str_split()
7229
     *
7230
     * @param string|string[] $str
7231
     * @param int             $length
7232
     * @param bool            $cleanUtf8
7233
     *
7234
     * @return string[]
7235
     */
7236 9
    public static function split(
7237
        $str,
7238
        int $length = 1,
7239
        bool $cleanUtf8 = false
7240
    ): array {
7241 9
        return self::str_split($str, $length, $cleanUtf8);
7242
    }
7243
7244
    /**
7245
     * Splits the string with the provided regular expression, returning an
7246
     * array of Stringy objects. An optional integer $limit will truncate the
7247
     * results.
7248
     *
7249
     * @param string $str
7250
     * @param string $pattern <p>The regex with which to split the string.</p>
7251
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7252
     *
7253
     * @return string[] an array of strings
7254
     */
7255 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7256
    {
7257 16
        if ($limit === 0) {
7258 2
            return [];
7259
        }
7260
7261 14
        if ($pattern === '') {
7262 1
            return [$str];
7263
        }
7264
7265 13
        if (self::$SUPPORT['mbstring'] === true) {
7266 13
            if ($limit >= 0) {
7267
                /** @noinspection PhpComposerExtensionStubsInspection */
7268 8
                $resultTmp = \mb_split($pattern, $str);
7269
7270 8
                $result = [];
7271 8
                foreach ($resultTmp as $itemTmp) {
7272 8
                    if ($limit === 0) {
7273 4
                        break;
7274
                    }
7275 8
                    --$limit;
7276
7277 8
                    $result[] = $itemTmp;
7278
                }
7279
7280 8
                return $result;
7281
            }
7282
7283
            /** @noinspection PhpComposerExtensionStubsInspection */
7284 5
            return \mb_split($pattern, $str);
7285
        }
7286
7287
        if ($limit > 0) {
7288
            ++$limit;
7289
        } else {
7290
            $limit = -1;
7291
        }
7292
7293
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7294
7295
        if ($array === false) {
7296
            return [];
7297
        }
7298
7299
        if ($limit > 0 && \count($array) === $limit) {
7300
            \array_pop($array);
7301
        }
7302
7303
        return $array;
7304
    }
7305
7306
    /**
7307
     * Check if the string starts with the given substring.
7308
     *
7309
     * @param string $haystack <p>The string to search in.</p>
7310
     * @param string $needle   <p>The substring to search for.</p>
7311
     *
7312
     * @return bool
7313
     */
7314 19
    public static function str_starts_with(string $haystack, string $needle): bool
7315
    {
7316 19
        return \strpos($haystack, $needle) === 0;
7317
    }
7318
7319
    /**
7320
     * Returns true if the string begins with any of $substrings, false otherwise.
7321
     *
7322
     * - case-sensitive
7323
     *
7324
     * @param string $str        <p>The input string.</p>
7325
     * @param array  $substrings <p>Substrings to look for.</p>
7326
     *
7327
     * @return bool whether or not $str starts with $substring
7328
     */
7329 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7330
    {
7331 8
        if ($str === '') {
7332
            return false;
7333
        }
7334
7335 8
        if ($substrings === []) {
7336
            return false;
7337
        }
7338
7339 8
        foreach ($substrings as &$substring) {
7340 8
            if (self::str_starts_with($str, $substring)) {
7341 8
                return true;
7342
            }
7343
        }
7344
7345 6
        return false;
7346
    }
7347
7348
    /**
7349
     * Gets the substring after the first occurrence of a separator.
7350
     *
7351
     * @param string $str       <p>The input string.</p>
7352
     * @param string $separator <p>The string separator.</p>
7353
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7354
     *
7355
     * @return string
7356
     */
7357 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7358
    {
7359 1
        if ($separator === '' || $str === '') {
7360 1
            return '';
7361
        }
7362
7363 1
        if ($encoding === 'UTF-8') {
7364 1
            $offset = \mb_strpos($str, $separator);
7365 1
            if ($offset === false) {
7366 1
                return '';
7367
            }
7368
7369 1
            return (string) \mb_substr(
7370 1
                $str,
7371 1
                $offset + (int) \mb_strlen($separator)
7372
            );
7373
        }
7374
7375
        $offset = self::strpos($str, $separator, 0, $encoding);
7376
        if ($offset === false) {
7377
            return '';
7378
        }
7379
7380
        return (string) \mb_substr(
7381
            $str,
7382
            $offset + (int) self::strlen($separator, $encoding),
7383
            null,
7384
            $encoding
7385
        );
7386
    }
7387
7388
    /**
7389
     * Gets the substring after the last occurrence of a separator.
7390
     *
7391
     * @param string $str       <p>The input string.</p>
7392
     * @param string $separator <p>The string separator.</p>
7393
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7394
     *
7395
     * @return string
7396
     */
7397 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7398
    {
7399 1
        if ($separator === '' || $str === '') {
7400 1
            return '';
7401
        }
7402
7403 1
        if ($encoding === 'UTF-8') {
7404 1
            $offset = \mb_strrpos($str, $separator);
7405 1
            if ($offset === false) {
7406 1
                return '';
7407
            }
7408
7409 1
            return (string) \mb_substr(
7410 1
                $str,
7411 1
                $offset + (int) \mb_strlen($separator)
7412
            );
7413
        }
7414
7415
        $offset = self::strrpos($str, $separator, 0, $encoding);
7416
        if ($offset === false) {
7417
            return '';
7418
        }
7419
7420
        return (string) self::substr(
7421
            $str,
7422
            $offset + (int) self::strlen($separator, $encoding),
7423
            null,
7424
            $encoding
7425
        );
7426
    }
7427
7428
    /**
7429
     * Gets the substring before the first occurrence of a separator.
7430
     *
7431
     * @param string $str       <p>The input string.</p>
7432
     * @param string $separator <p>The string separator.</p>
7433
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7434
     *
7435
     * @return string
7436
     */
7437 1
    public static function str_substr_before_first_separator(
7438
        string $str,
7439
        string $separator,
7440
        string $encoding = 'UTF-8'
7441
    ): string {
7442 1
        if ($separator === '' || $str === '') {
7443 1
            return '';
7444
        }
7445
7446 1
        if ($encoding === 'UTF-8') {
7447 1
            $offset = \mb_strpos($str, $separator);
7448 1
            if ($offset === false) {
7449 1
                return '';
7450
            }
7451
7452 1
            return (string) \mb_substr(
7453 1
                $str,
7454 1
                0,
7455 1
                $offset
7456
            );
7457
        }
7458
7459
        $offset = self::strpos($str, $separator, 0, $encoding);
7460
        if ($offset === false) {
7461
            return '';
7462
        }
7463
7464
        return (string) self::substr(
7465
            $str,
7466
            0,
7467
            $offset,
7468
            $encoding
7469
        );
7470
    }
7471
7472
    /**
7473
     * Gets the substring before the last occurrence of a separator.
7474
     *
7475
     * @param string $str       <p>The input string.</p>
7476
     * @param string $separator <p>The string separator.</p>
7477
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7478
     *
7479
     * @return string
7480
     */
7481 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7482
    {
7483 1
        if ($separator === '' || $str === '') {
7484 1
            return '';
7485
        }
7486
7487 1
        if ($encoding === 'UTF-8') {
7488 1
            $offset = \mb_strrpos($str, $separator);
7489 1
            if ($offset === false) {
7490 1
                return '';
7491
            }
7492
7493 1
            return (string) \mb_substr(
7494 1
                $str,
7495 1
                0,
7496 1
                $offset
7497
            );
7498
        }
7499
7500
        $offset = self::strrpos($str, $separator, 0, $encoding);
7501
        if ($offset === false) {
7502
            return '';
7503
        }
7504
7505
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7506
7507
        return (string) self::substr(
7508
            $str,
7509
            0,
7510
            $offset,
7511
            $encoding
7512
        );
7513
    }
7514
7515
    /**
7516
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7517
     *
7518
     * @param string $str          <p>The input string.</p>
7519
     * @param string $needle       <p>The string to look for.</p>
7520
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7521
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7522
     *
7523
     * @return string
7524
     */
7525 2
    public static function str_substr_first(
7526
        string $str,
7527
        string $needle,
7528
        bool $beforeNeedle = false,
7529
        string $encoding = 'UTF-8'
7530
    ): string {
7531 2
        if ($str === '' || $needle === '') {
7532 2
            return '';
7533
        }
7534
7535 2
        if ($encoding === 'UTF-8') {
7536 2
            if ($beforeNeedle === true) {
7537 1
                $part = \mb_strstr(
7538 1
                    $str,
7539 1
                    $needle,
7540 1
                    $beforeNeedle
7541
                );
7542
            } else {
7543 1
                $part = \mb_strstr(
7544 1
                    $str,
7545 2
                    $needle
7546
                );
7547
            }
7548
        } else {
7549
            $part = self::strstr(
7550
                $str,
7551
                $needle,
7552
                $beforeNeedle,
7553
                $encoding
7554
            );
7555
        }
7556
7557 2
        return $part === false ? '' : $part;
7558
    }
7559
7560
    /**
7561
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7562
     *
7563
     * @param string $str          <p>The input string.</p>
7564
     * @param string $needle       <p>The string to look for.</p>
7565
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7566
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7567
     *
7568
     * @return string
7569
     */
7570 2
    public static function str_substr_last(
7571
        string $str,
7572
        string $needle,
7573
        bool $beforeNeedle = false,
7574
        string $encoding = 'UTF-8'
7575
    ): string {
7576 2
        if ($str === '' || $needle === '') {
7577 2
            return '';
7578
        }
7579
7580 2
        if ($encoding === 'UTF-8') {
7581 2
            if ($beforeNeedle === true) {
7582 1
                $part = \mb_strrchr(
7583 1
                    $str,
7584 1
                    $needle,
7585 1
                    $beforeNeedle
7586
                );
7587
            } else {
7588 1
                $part = \mb_strrchr(
7589 1
                    $str,
7590 2
                    $needle
7591
                );
7592
            }
7593
        } else {
7594
            $part = self::strrchr(
7595
                $str,
7596
                $needle,
7597
                $beforeNeedle,
7598
                $encoding
7599
            );
7600
        }
7601
7602 2
        return $part === false ? '' : $part;
7603
    }
7604
7605
    /**
7606
     * Surrounds $str with the given substring.
7607
     *
7608
     * @param string $str
7609
     * @param string $substring <p>The substring to add to both sides.</P>
7610
     *
7611
     * @return string string with the substring both prepended and appended
7612
     */
7613 5
    public static function str_surround(string $str, string $substring): string
7614
    {
7615 5
        return $substring . $str . $substring;
7616
    }
7617
7618
    /**
7619
     * Returns a trimmed string with the first letter of each word capitalized.
7620
     * Also accepts an array, $ignore, allowing you to list words not to be
7621
     * capitalized.
7622
     *
7623
     * @param string              $str
7624
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7625
     *                                                   Default: null</p>
7626
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7627
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7628
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7629
     *                                                   tr</p>
7630
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7631
     *                                                   ß</p>
7632
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7633
     *
7634
     * @return string the titleized string
7635
     */
7636 5
    public static function str_titleize(
7637
        string $str,
7638
        array $ignore = null,
7639
        string $encoding = 'UTF-8',
7640
        bool $cleanUtf8 = false,
7641
        string $lang = null,
7642
        bool $tryToKeepStringLength = false,
7643
        bool $useTrimFirst = true
7644
    ): string {
7645 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7646 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7647
        }
7648
7649 5
        if ($useTrimFirst === true) {
7650 5
            $str = \trim($str);
7651
        }
7652
7653 5
        if ($cleanUtf8 === true) {
7654
            $str = self::clean($str);
7655
        }
7656
7657 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7658
7659 5
        return (string) \preg_replace_callback(
7660 5
            '/([\S]+)/u',
7661
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7662 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7663 2
                    return $match[0];
7664
                }
7665
7666 5
                if ($useMbFunction === true) {
7667 5
                    if ($encoding === 'UTF-8') {
7668 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7669 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7670
                    }
7671
7672
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7673
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7674
                }
7675
7676
                return self::ucfirst(
7677
                    self::strtolower(
7678
                        $match[0],
7679
                        $encoding,
7680
                        false,
7681
                        $lang,
7682
                        $tryToKeepStringLength
7683
                    ),
7684
                    $encoding,
7685
                    false,
7686
                    $lang,
7687
                    $tryToKeepStringLength
7688
                );
7689 5
            },
7690 5
            $str
7691
        );
7692
    }
7693
7694
    /**
7695
     * Returns a trimmed string in proper title case.
7696
     *
7697
     * Also accepts an array, $ignore, allowing you to list words not to be
7698
     * capitalized.
7699
     *
7700
     * Adapted from John Gruber's script.
7701
     *
7702
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7703
     *
7704
     * @param string $str
7705
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7706
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7707
     *
7708
     * @return string the titleized string
7709
     */
7710 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7711
    {
7712 35
        $smallWords = \array_merge(
7713
            [
7714 35
                '(?<!q&)a',
7715
                'an',
7716
                'and',
7717
                'as',
7718
                'at(?!&t)',
7719
                'but',
7720
                'by',
7721
                'en',
7722
                'for',
7723
                'if',
7724
                'in',
7725
                'of',
7726
                'on',
7727
                'or',
7728
                'the',
7729
                'to',
7730
                'v[.]?',
7731
                'via',
7732
                'vs[.]?',
7733
            ],
7734 35
            $ignore
7735
        );
7736
7737 35
        $smallWordsRx = \implode('|', $smallWords);
7738 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7739
7740 35
        $str = \trim($str);
7741
7742 35
        if (self::has_lowercase($str) === false) {
7743 2
            $str = self::strtolower($str, $encoding);
7744
        }
7745
7746
        // the main substitutions
7747 35
        $str = (string) \preg_replace_callback(
7748
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7749
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7750 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7751
                        |
7752 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7753
                        |
7754 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7755
                        |
7756 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7757
                      ) (_*) \b                                                           # 6. With trailing underscore
7758
                    ~ux',
7759
            /**
7760
             * @param string[] $matches
7761
             *
7762
             * @return string
7763
             */
7764
            static function (array $matches) use ($encoding): string {
7765
                // preserve leading underscore
7766 35
                $str = $matches[1];
7767 35
                if ($matches[2]) {
7768
                    // preserve URLs, domains, emails and file paths
7769 5
                    $str .= $matches[2];
7770 35
                } elseif ($matches[3]) {
7771
                    // lower-case small words
7772 25
                    $str .= self::strtolower($matches[3], $encoding);
7773 35
                } elseif ($matches[4]) {
7774
                    // capitalize word w/o internal caps
7775 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7776
                } else {
7777
                    // preserve other kinds of word (iPhone)
7778 7
                    $str .= $matches[5];
7779
                }
7780
                // Preserve trailing underscore
7781 35
                $str .= $matches[6];
7782
7783 35
                return $str;
7784 35
            },
7785 35
            $str
7786
        );
7787
7788
        // Exceptions for small words: capitalize at start of title...
7789 35
        $str = (string) \preg_replace_callback(
7790
            '~(  \A [[:punct:]]*                # start of title...
7791
                      |  [:.;?!][ ]+               # or of subsentence...
7792
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7793 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7794
                     ~uxi',
7795
            /**
7796
             * @param string[] $matches
7797
             *
7798
             * @return string
7799
             */
7800
            static function (array $matches) use ($encoding): string {
7801 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7802 35
            },
7803 35
            $str
7804
        );
7805
7806
        // ...and end of title
7807 35
        $str = (string) \preg_replace_callback(
7808 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7809
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7810
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7811
                     ~uxi',
7812
            /**
7813
             * @param string[] $matches
7814
             *
7815
             * @return string
7816
             */
7817
            static function (array $matches) use ($encoding): string {
7818 3
                return static::str_upper_first($matches[1], $encoding);
7819 35
            },
7820 35
            $str
7821
        );
7822
7823
        // Exceptions for small words in hyphenated compound words.
7824
        // e.g. "in-flight" -> In-Flight
7825 35
        $str = (string) \preg_replace_callback(
7826
            '~\b
7827
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7828 35
                        ( ' . $smallWordsRx . ' )
7829
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7830
                       ~uxi',
7831
            /**
7832
             * @param string[] $matches
7833
             *
7834
             * @return string
7835
             */
7836
            static function (array $matches) use ($encoding): string {
7837
                return static::str_upper_first($matches[1], $encoding);
7838 35
            },
7839 35
            $str
7840
        );
7841
7842
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7843 35
        $str = (string) \preg_replace_callback(
7844
            '~\b
7845
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7846
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7847 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7848
                      (?!	- )                   # Negative lookahead for another -
7849
                     ~uxi',
7850
            /**
7851
             * @param string[] $matches
7852
             *
7853
             * @return string
7854
             */
7855
            static function (array $matches) use ($encoding): string {
7856
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7857 35
            },
7858 35
            $str
7859
        );
7860
7861 35
        return $str;
7862
    }
7863
7864
    /**
7865
     * Get a binary representation of a specific string.
7866
     *
7867
     * @param string $str <p>The input string.</p>
7868
     *
7869
     * @return string
7870
     */
7871 2
    public static function str_to_binary(string $str): string
7872
    {
7873 2
        $value = \unpack('H*', $str);
7874
7875 2
        return \base_convert($value[1], 16, 2);
7876
    }
7877
7878
    /**
7879
     * @param string   $str
7880
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7881
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7882
     *
7883
     * @return string[]
7884
     */
7885 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7886
    {
7887 17
        if ($str === '') {
7888 1
            return $removeEmptyValues === true ? [] : [''];
7889
        }
7890
7891 16
        if (self::$SUPPORT['mbstring'] === true) {
7892
            /** @noinspection PhpComposerExtensionStubsInspection */
7893 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7894
        } else {
7895
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7896
        }
7897
7898 16
        if ($return === false) {
7899
            return $removeEmptyValues === true ? [] : [''];
7900
        }
7901
7902
        if (
7903 16
            $removeShortValues === null
7904
            &&
7905 16
            $removeEmptyValues === false
7906
        ) {
7907 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7908
        }
7909
7910
        return self::reduce_string_array(
7911
            $return,
7912
            $removeEmptyValues,
7913
            $removeShortValues
7914
        );
7915
    }
7916
7917
    /**
7918
     * Convert a string into an array of words.
7919
     *
7920
     * @param string   $str
7921
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7922
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7923
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7924
     *
7925
     * @return string[]
7926
     */
7927 13
    public static function str_to_words(
7928
        string $str,
7929
        string $charList = '',
7930
        bool $removeEmptyValues = false,
7931
        int $removeShortValues = null
7932
    ): array {
7933 13
        if ($str === '') {
7934 4
            return $removeEmptyValues === true ? [] : [''];
7935
        }
7936
7937 13
        $charList = self::rxClass($charList, '\pL');
7938
7939 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7940 13
        if ($return === false) {
7941
            return $removeEmptyValues === true ? [] : [''];
7942
        }
7943
7944
        if (
7945 13
            $removeShortValues === null
7946
            &&
7947 13
            $removeEmptyValues === false
7948
        ) {
7949 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7950
        }
7951
7952 2
        $tmpReturn = self::reduce_string_array(
7953 2
            $return,
7954 2
            $removeEmptyValues,
7955 2
            $removeShortValues
7956
        );
7957
7958 2
        foreach ($tmpReturn as &$item) {
7959 2
            $item = (string) $item;
7960
        }
7961
7962 2
        return $tmpReturn;
7963
    }
7964
7965
    /**
7966
     * alias for "UTF8::to_ascii()"
7967
     *
7968
     * @see UTF8::to_ascii()
7969
     *
7970
     * @param string $str
7971
     * @param string $unknown
7972
     * @param bool   $strict
7973
     *
7974
     * @return string
7975
     */
7976 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7977
    {
7978 8
        return self::to_ascii($str, $unknown, $strict);
7979
    }
7980
7981
    /**
7982
     * Truncates the string to a given length. If $substring is provided, and
7983
     * truncating occurs, the string is further truncated so that the substring
7984
     * may be appended without exceeding the desired length.
7985
     *
7986
     * @param string $str
7987
     * @param int    $length    <p>Desired length of the truncated string.</p>
7988
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7989
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7990
     *
7991
     * @return string string after truncating
7992
     */
7993 22
    public static function str_truncate(
7994
        string $str,
7995
        int $length,
7996
        string $substring = '',
7997
        string $encoding = 'UTF-8'
7998
    ): string {
7999 22
        if ($str === '') {
8000
            return '';
8001
        }
8002
8003 22
        if ($encoding === 'UTF-8') {
8004 10
            if ($length >= (int) \mb_strlen($str)) {
8005 2
                return $str;
8006
            }
8007
8008 8
            if ($substring !== '') {
8009 4
                $length -= (int) \mb_strlen($substring);
8010
8011
                /** @noinspection UnnecessaryCastingInspection */
8012 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8013
            }
8014
8015
            /** @noinspection UnnecessaryCastingInspection */
8016 4
            return (string) \mb_substr($str, 0, $length);
8017
        }
8018
8019 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8020
8021 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8022 2
            return $str;
8023
        }
8024
8025 10
        if ($substring !== '') {
8026 6
            $length -= (int) self::strlen($substring, $encoding);
8027
        }
8028
8029
        return (
8030 10
            (string) self::substr(
8031 10
                $str,
8032 10
                0,
8033 10
                $length,
8034 10
                $encoding
8035
            )
8036 10
       ) . $substring;
8037
    }
8038
8039
    /**
8040
     * Truncates the string to a given length, while ensuring that it does not
8041
     * split words. If $substring is provided, and truncating occurs, the
8042
     * string is further truncated so that the substring may be appended without
8043
     * exceeding the desired length.
8044
     *
8045
     * @param string $str
8046
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8047
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8048
     *                                                ''</p>
8049
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8050
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8051
     *
8052
     * @return string string after truncating
8053
     */
8054 47
    public static function str_truncate_safe(
8055
        string $str,
8056
        int $length,
8057
        string $substring = '',
8058
        string $encoding = 'UTF-8',
8059
        bool $ignoreDoNotSplitWordsForOneWord = false
8060
    ): string {
8061 47
        if ($str === '' || $length <= 0) {
8062 1
            return $substring;
8063
        }
8064
8065 47
        if ($encoding === 'UTF-8') {
8066 21
            if ($length >= (int) \mb_strlen($str)) {
8067 5
                return $str;
8068
            }
8069
8070
            // need to further trim the string so we can append the substring
8071 17
            $length -= (int) \mb_strlen($substring);
8072 17
            if ($length <= 0) {
8073 1
                return $substring;
8074
            }
8075
8076 17
            $truncated = \mb_substr($str, 0, $length);
8077
8078 17
            if ($truncated === false) {
8079
                return '';
8080
            }
8081
8082
            // if the last word was truncated
8083 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8084 17
            if ($strPosSpace !== $length) {
8085
                // find pos of the last occurrence of a space, get up to that
8086 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8087
8088
                if (
8089 13
                    $lastPos !== false
8090
                    ||
8091 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8092
                ) {
8093 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8094
                }
8095
            }
8096
        } else {
8097 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8098
8099 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8100 4
                return $str;
8101
            }
8102
8103
            // need to further trim the string so we can append the substring
8104 22
            $length -= (int) self::strlen($substring, $encoding);
8105 22
            if ($length <= 0) {
8106
                return $substring;
8107
            }
8108
8109 22
            $truncated = self::substr($str, 0, $length, $encoding);
8110
8111 22
            if ($truncated === false) {
8112
                return '';
8113
            }
8114
8115
            // if the last word was truncated
8116 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8117 22
            if ($strPosSpace !== $length) {
8118
                // find pos of the last occurrence of a space, get up to that
8119 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8120
8121
                if (
8122 12
                    $lastPos !== false
8123
                    ||
8124 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8125
                ) {
8126 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8127
                }
8128
            }
8129
        }
8130
8131 39
        return $truncated . $substring;
8132
    }
8133
8134
    /**
8135
     * Returns a lowercase and trimmed string separated by underscores.
8136
     * Underscores are inserted before uppercase characters (with the exception
8137
     * of the first character of the string), and in place of spaces as well as
8138
     * dashes.
8139
     *
8140
     * @param string $str
8141
     *
8142
     * @return string the underscored string
8143
     */
8144 16
    public static function str_underscored(string $str): string
8145
    {
8146 16
        return self::str_delimit($str, '_');
8147
    }
8148
8149
    /**
8150
     * Returns an UpperCamelCase version of the supplied string. It trims
8151
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8152
     * and underscores, and removes spaces, dashes, underscores.
8153
     *
8154
     * @param string      $str                   <p>The input string.</p>
8155
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8156
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8157
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8158
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8159
     *
8160
     * @return string string in UpperCamelCase
8161
     */
8162 13
    public static function str_upper_camelize(
8163
        string $str,
8164
        string $encoding = 'UTF-8',
8165
        bool $cleanUtf8 = false,
8166
        string $lang = null,
8167
        bool $tryToKeepStringLength = false
8168
    ): string {
8169 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8170
    }
8171
8172
    /**
8173
     * alias for "UTF8::ucfirst()"
8174
     *
8175
     * @see UTF8::ucfirst()
8176
     *
8177
     * @param string      $str
8178
     * @param string      $encoding
8179
     * @param bool        $cleanUtf8
8180
     * @param string|null $lang
8181
     * @param bool        $tryToKeepStringLength
8182
     *
8183
     * @return string
8184
     */
8185 39
    public static function str_upper_first(
8186
        string $str,
8187
        string $encoding = 'UTF-8',
8188
        bool $cleanUtf8 = false,
8189
        string $lang = null,
8190
        bool $tryToKeepStringLength = false
8191
    ): string {
8192 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8193
    }
8194
8195
    /**
8196
     * Counts number of words in the UTF-8 string.
8197
     *
8198
     * @param string $str      <p>The input string.</p>
8199
     * @param int    $format   [optional] <p>
8200
     *                         <strong>0</strong> => return a number of words (default)<br>
8201
     *                         <strong>1</strong> => return an array of words<br>
8202
     *                         <strong>2</strong> => return an array of words with word-offset as key
8203
     *                         </p>
8204
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8205
     *
8206
     * @return int|string[] The number of words in the string
8207
     */
8208 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8209
    {
8210 2
        $strParts = self::str_to_words($str, $charlist);
8211
8212 2
        $len = \count($strParts);
8213
8214 2
        if ($format === 1) {
8215 2
            $numberOfWords = [];
8216 2
            for ($i = 1; $i < $len; $i += 2) {
8217 2
                $numberOfWords[] = $strParts[$i];
8218
            }
8219 2
        } elseif ($format === 2) {
8220 2
            $numberOfWords = [];
8221 2
            $offset = (int) self::strlen($strParts[0]);
8222 2
            for ($i = 1; $i < $len; $i += 2) {
8223 2
                $numberOfWords[$offset] = $strParts[$i];
8224 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8225
            }
8226
        } else {
8227 2
            $numberOfWords = (int) (($len - 1) / 2);
8228
        }
8229
8230 2
        return $numberOfWords;
8231
    }
8232
8233
    /**
8234
     * Case-insensitive string comparison.
8235
     *
8236
     * INFO: Case-insensitive version of UTF8::strcmp()
8237
     *
8238
     * @param string $str1     <p>The first string.</p>
8239
     * @param string $str2     <p>The second string.</p>
8240
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8241
     *
8242
     * @return int
8243
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8244
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8245
     *             <strong>0</strong> if they are equal
8246
     */
8247 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8248
    {
8249 23
        return self::strcmp(
8250 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8251 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8252
        );
8253
    }
8254
8255
    /**
8256
     * alias for "UTF8::strstr()"
8257
     *
8258
     * @see UTF8::strstr()
8259
     *
8260
     * @param string $haystack
8261
     * @param string $needle
8262
     * @param bool   $before_needle
8263
     * @param string $encoding
8264
     * @param bool   $cleanUtf8
8265
     *
8266
     * @return false|string
8267
     */
8268 2
    public static function strchr(
8269
        string $haystack,
8270
        string $needle,
8271
        bool $before_needle = false,
8272
        string $encoding = 'UTF-8',
8273
        bool $cleanUtf8 = false
8274
    ) {
8275 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8276
    }
8277
8278
    /**
8279
     * Case-sensitive string comparison.
8280
     *
8281
     * @param string $str1 <p>The first string.</p>
8282
     * @param string $str2 <p>The second string.</p>
8283
     *
8284
     * @return int
8285
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8286
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8287
     *             <strong>0</strong> if they are equal
8288
     */
8289 29
    public static function strcmp(string $str1, string $str2): int
8290
    {
8291 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8292 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8293 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8294
        );
8295
    }
8296
8297
    /**
8298
     * Find length of initial segment not matching mask.
8299
     *
8300
     * @param string $str
8301
     * @param string $charList
8302
     * @param int    $offset
8303
     * @param int    $length
8304
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8305
     *
8306
     * @return int
8307
     */
8308 12
    public static function strcspn(
8309
        string $str,
8310
        string $charList,
8311
        int $offset = null,
8312
        int $length = null,
8313
        string $encoding = 'UTF-8'
8314
    ): int {
8315 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8316
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8317
        }
8318
8319 12
        if ($charList === '') {
8320 2
            return (int) self::strlen($str, $encoding);
8321
        }
8322
8323 11
        if ($offset !== null || $length !== null) {
8324 3
            if ($encoding === 'UTF-8') {
8325 3
                if ($length === null) {
8326
                    /** @noinspection UnnecessaryCastingInspection */
8327 2
                    $strTmp = \mb_substr($str, (int) $offset);
8328
                } else {
8329
                    /** @noinspection UnnecessaryCastingInspection */
8330 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8331
                }
8332
            } else {
8333
                /** @noinspection UnnecessaryCastingInspection */
8334
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8335
            }
8336 3
            if ($strTmp === false) {
8337
                return 0;
8338
            }
8339 3
            $str = $strTmp;
8340
        }
8341
8342 11
        if ($str === '') {
8343 2
            return 0;
8344
        }
8345
8346 10
        $matches = [];
8347 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8348 9
            $return = self::strlen($matches[1], $encoding);
8349 9
            if ($return === false) {
8350
                return 0;
8351
            }
8352
8353 9
            return $return;
8354
        }
8355
8356 2
        return (int) self::strlen($str, $encoding);
8357
    }
8358
8359
    /**
8360
     * alias for "UTF8::stristr()"
8361
     *
8362
     * @see UTF8::stristr()
8363
     *
8364
     * @param string $haystack
8365
     * @param string $needle
8366
     * @param bool   $before_needle
8367
     * @param string $encoding
8368
     * @param bool   $cleanUtf8
8369
     *
8370
     * @return false|string
8371
     */
8372 1
    public static function strichr(
8373
        string $haystack,
8374
        string $needle,
8375
        bool $before_needle = false,
8376
        string $encoding = 'UTF-8',
8377
        bool $cleanUtf8 = false
8378
    ) {
8379 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8380
    }
8381
8382
    /**
8383
     * Create a UTF-8 string from code points.
8384
     *
8385
     * INFO: opposite to UTF8::codepoints()
8386
     *
8387
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8388
     *
8389
     * @return string UTF-8 encoded string
8390
     */
8391 4
    public static function string(array $array): string
8392
    {
8393 4
        return \implode(
8394 4
            '',
8395 4
            \array_map(
8396
                [
8397 4
                    self::class,
8398
                    'chr',
8399
                ],
8400 4
                $array
8401
            )
8402
        );
8403
    }
8404
8405
    /**
8406
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8407
     *
8408
     * @param string $str <p>The input string.</p>
8409
     *
8410
     * @return bool
8411
     *              <strong>true</strong> if the string has BOM at the start,<br>
8412
     *              <strong>false</strong> otherwise
8413
     */
8414 6
    public static function string_has_bom(string $str): bool
8415
    {
8416
        /** @noinspection PhpUnusedLocalVariableInspection */
8417 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8418 6
            if (\strpos($str, $bomString) === 0) {
8419 6
                return true;
8420
            }
8421
        }
8422
8423 6
        return false;
8424
    }
8425
8426
    /**
8427
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8428
     *
8429
     * @see http://php.net/manual/en/function.strip-tags.php
8430
     *
8431
     * @param string $str            <p>
8432
     *                               The input string.
8433
     *                               </p>
8434
     * @param string $allowable_tags [optional] <p>
8435
     *                               You can use the optional second parameter to specify tags which should
8436
     *                               not be stripped.
8437
     *                               </p>
8438
     *                               <p>
8439
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8440
     *                               can not be changed with allowable_tags.
8441
     *                               </p>
8442
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8443
     *
8444
     * @return string the stripped string
8445
     */
8446 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8447
    {
8448 4
        if ($str === '') {
8449 1
            return '';
8450
        }
8451
8452 4
        if ($cleanUtf8 === true) {
8453 2
            $str = self::clean($str);
8454
        }
8455
8456 4
        if ($allowable_tags === null) {
8457 4
            return \strip_tags($str);
8458
        }
8459
8460 2
        return \strip_tags($str, $allowable_tags);
8461
    }
8462
8463
    /**
8464
     * Strip all whitespace characters. This includes tabs and newline
8465
     * characters, as well as multibyte whitespace such as the thin space
8466
     * and ideographic space.
8467
     *
8468
     * @param string $str
8469
     *
8470
     * @return string
8471
     */
8472 36
    public static function strip_whitespace(string $str): string
8473
    {
8474 36
        if ($str === '') {
8475 3
            return '';
8476
        }
8477
8478 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8479
    }
8480
8481
    /**
8482
     * Finds position of first occurrence of a string within another, case insensitive.
8483
     *
8484
     * @see http://php.net/manual/en/function.mb-stripos.php
8485
     *
8486
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8487
     * @param string $needle    <p>The string to find in haystack.</p>
8488
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8489
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8490
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8491
     *
8492
     * @return false|int
8493
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8494
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8495
     */
8496 24
    public static function stripos(
8497
        string $haystack,
8498
        string $needle,
8499
        int $offset = 0,
8500
        $encoding = 'UTF-8',
8501
        bool $cleanUtf8 = false
8502
    ) {
8503 24
        if ($haystack === '' || $needle === '') {
8504 5
            return false;
8505
        }
8506
8507 23
        if ($cleanUtf8 === true) {
8508
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8509
            // if invalid characters are found in $haystack before $needle
8510 1
            $haystack = self::clean($haystack);
8511 1
            $needle = self::clean($needle);
8512
        }
8513
8514 23
        if (self::$SUPPORT['mbstring'] === true) {
8515 23
            if ($encoding === 'UTF-8') {
8516 23
                return \mb_stripos($haystack, $needle, $offset);
8517
            }
8518
8519 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8520
8521 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8522
        }
8523
8524 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8525
8526
        if (
8527 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8528
            &&
8529 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8530
            &&
8531 2
            self::$SUPPORT['intl'] === true
8532
        ) {
8533
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8534
            if ($returnTmp !== false) {
8535
                return $returnTmp;
8536
            }
8537
        }
8538
8539
        //
8540
        // fallback for ascii only
8541
        //
8542
8543 2
        if (self::is_ascii($haystack . $needle)) {
8544
            return \stripos($haystack, $needle, $offset);
8545
        }
8546
8547
        //
8548
        // fallback via vanilla php
8549
        //
8550
8551 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8552 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8553
8554 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8555
    }
8556
8557
    /**
8558
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8559
     *
8560
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8561
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8562
     * @param bool   $before_needle [optional] <p>
8563
     *                              If <b>TRUE</b>, it returns the part of the
8564
     *                              haystack before the first occurrence of the needle (excluding the needle).
8565
     *                              </p>
8566
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8567
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8568
     *
8569
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8570
     */
8571 12
    public static function stristr(
8572
        string $haystack,
8573
        string $needle,
8574
        bool $before_needle = false,
8575
        string $encoding = 'UTF-8',
8576
        bool $cleanUtf8 = false
8577
    ) {
8578 12
        if ($haystack === '' || $needle === '') {
8579 3
            return false;
8580
        }
8581
8582 9
        if ($cleanUtf8 === true) {
8583
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8584
            // if invalid characters are found in $haystack before $needle
8585 1
            $needle = self::clean($needle);
8586 1
            $haystack = self::clean($haystack);
8587
        }
8588
8589 9
        if (!$needle) {
8590
            return $haystack;
8591
        }
8592
8593 9
        if (self::$SUPPORT['mbstring'] === true) {
8594 9
            if ($encoding === 'UTF-8') {
8595 9
                return \mb_stristr($haystack, $needle, $before_needle);
8596
            }
8597
8598 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8599
8600 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8601
        }
8602
8603
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8604
8605
        if (
8606
            $encoding !== 'UTF-8'
8607
            &&
8608
            self::$SUPPORT['mbstring'] === false
8609
        ) {
8610
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8611
        }
8612
8613
        if (
8614
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8615
            &&
8616
            self::$SUPPORT['intl'] === true
8617
        ) {
8618
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8619
            if ($returnTmp !== false) {
8620
                return $returnTmp;
8621
            }
8622
        }
8623
8624
        if (self::is_ascii($needle . $haystack)) {
8625
            return \stristr($haystack, $needle, $before_needle);
8626
        }
8627
8628
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8629
8630
        if (!isset($match[1])) {
8631
            return false;
8632
        }
8633
8634
        if ($before_needle) {
8635
            return $match[1];
8636
        }
8637
8638
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8639
    }
8640
8641
    /**
8642
     * Get the string length, not the byte-length!
8643
     *
8644
     * @see     http://php.net/manual/en/function.mb-strlen.php
8645
     *
8646
     * @param string $str       <p>The string being checked for length.</p>
8647
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8648
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8649
     *
8650
     * @return false|int
8651
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8652
     *                   $encoding.
8653
     *                   (One multi-byte character counted as +1).
8654
     *                   <br>
8655
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8656
     *                   chars.
8657
     */
8658 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8659
    {
8660 173
        if ($str === '') {
8661 21
            return 0;
8662
        }
8663
8664 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8665 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8666
        }
8667
8668 171
        if ($cleanUtf8 === true) {
8669
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8670
            // if invalid characters are found in $str
8671 4
            $str = self::clean($str);
8672
        }
8673
8674
        //
8675
        // fallback via mbstring
8676
        //
8677
8678 171
        if (self::$SUPPORT['mbstring'] === true) {
8679 165
            if ($encoding === 'UTF-8') {
8680 165
                return \mb_strlen($str);
8681
            }
8682
8683 4
            return \mb_strlen($str, $encoding);
8684
        }
8685
8686
        //
8687
        // fallback for binary || ascii only
8688
        //
8689
8690
        if (
8691 8
            $encoding === 'CP850'
8692
            ||
8693 8
            $encoding === 'ASCII'
8694
        ) {
8695
            return \strlen($str);
8696
        }
8697
8698
        if (
8699 8
            $encoding !== 'UTF-8'
8700
            &&
8701 8
            self::$SUPPORT['mbstring'] === false
8702
            &&
8703 8
            self::$SUPPORT['iconv'] === false
8704
        ) {
8705 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8706
        }
8707
8708
        //
8709
        // fallback via iconv
8710
        //
8711
8712 8
        if (self::$SUPPORT['iconv'] === true) {
8713
            $returnTmp = \iconv_strlen($str, $encoding);
8714
            if ($returnTmp !== false) {
8715
                return $returnTmp;
8716
            }
8717
        }
8718
8719
        //
8720
        // fallback via intl
8721
        //
8722
8723
        if (
8724 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8725
            &&
8726 8
            self::$SUPPORT['intl'] === true
8727
        ) {
8728
            $returnTmp = \grapheme_strlen($str);
8729
            if ($returnTmp !== null) {
8730
                return $returnTmp;
8731
            }
8732
        }
8733
8734
        //
8735
        // fallback for ascii only
8736
        //
8737
8738 8
        if (self::is_ascii($str)) {
8739 4
            return \strlen($str);
8740
        }
8741
8742
        //
8743
        // fallback via vanilla php
8744
        //
8745
8746 8
        \preg_match_all('/./us', $str, $parts);
8747
8748 8
        $returnTmp = \count($parts[0]);
8749 8
        if ($returnTmp === 0) {
8750
            return false;
8751
        }
8752
8753 8
        return $returnTmp;
8754
    }
8755
8756
    /**
8757
     * Get string length in byte.
8758
     *
8759
     * @param string $str
8760
     *
8761
     * @return int
8762
     */
8763
    public static function strlen_in_byte(string $str): int
8764
    {
8765
        if ($str === '') {
8766
            return 0;
8767
        }
8768
8769
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8770
            // "mb_" is available if overload is used, so use it ...
8771
            return \mb_strlen($str, 'CP850'); // 8-BIT
8772
        }
8773
8774
        return \strlen($str);
8775
    }
8776
8777
    /**
8778
     * Case insensitive string comparisons using a "natural order" algorithm.
8779
     *
8780
     * INFO: natural order version of UTF8::strcasecmp()
8781
     *
8782
     * @param string $str1     <p>The first string.</p>
8783
     * @param string $str2     <p>The second string.</p>
8784
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8785
     *
8786
     * @return int
8787
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8788
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8789
     *             <strong>0</strong> if they are equal
8790
     */
8791 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8792
    {
8793 2
        return self::strnatcmp(
8794 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8795 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8796
        );
8797
    }
8798
8799
    /**
8800
     * String comparisons using a "natural order" algorithm
8801
     *
8802
     * INFO: natural order version of UTF8::strcmp()
8803
     *
8804
     * @see  http://php.net/manual/en/function.strnatcmp.php
8805
     *
8806
     * @param string $str1 <p>The first string.</p>
8807
     * @param string $str2 <p>The second string.</p>
8808
     *
8809
     * @return int
8810
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8811
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8812
     *             <strong>0</strong> if they are equal
8813
     */
8814 4
    public static function strnatcmp(string $str1, string $str2): int
8815
    {
8816 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8817
    }
8818
8819
    /**
8820
     * Case-insensitive string comparison of the first n characters.
8821
     *
8822
     * @see  http://php.net/manual/en/function.strncasecmp.php
8823
     *
8824
     * @param string $str1     <p>The first string.</p>
8825
     * @param string $str2     <p>The second string.</p>
8826
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8827
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8828
     *
8829
     * @return int
8830
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8831
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8832
     *             <strong>0</strong> if they are equal
8833
     */
8834 2
    public static function strncasecmp(
8835
        string $str1,
8836
        string $str2,
8837
        int $len,
8838
        string $encoding = 'UTF-8'
8839
    ): int {
8840 2
        return self::strncmp(
8841 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8842 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8843 2
            $len
8844
        );
8845
    }
8846
8847
    /**
8848
     * String comparison of the first n characters.
8849
     *
8850
     * @see  http://php.net/manual/en/function.strncmp.php
8851
     *
8852
     * @param string $str1     <p>The first string.</p>
8853
     * @param string $str2     <p>The second string.</p>
8854
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8855
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8856
     *
8857
     * @return int
8858
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8859
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8860
     *             <strong>0</strong> if they are equal
8861
     */
8862 4
    public static function strncmp(
8863
        string $str1,
8864
        string $str2,
8865
        int $len,
8866
        string $encoding = 'UTF-8'
8867
    ): int {
8868 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8869
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8870
        }
8871
8872 4
        if ($encoding === 'UTF-8') {
8873 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8874 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8875
        } else {
8876
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8877
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8878
        }
8879
8880 4
        return self::strcmp($str1, $str2);
8881
    }
8882
8883
    /**
8884
     * Search a string for any of a set of characters.
8885
     *
8886
     * @see  http://php.net/manual/en/function.strpbrk.php
8887
     *
8888
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8889
     * @param string $char_list <p>This parameter is case sensitive.</p>
8890
     *
8891
     * @return false|string string starting from the character found, or false if it is not found
8892
     */
8893 2
    public static function strpbrk(string $haystack, string $char_list)
8894
    {
8895 2
        if ($haystack === '' || $char_list === '') {
8896 2
            return false;
8897
        }
8898
8899 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8900 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8901
        }
8902
8903 2
        return false;
8904
    }
8905
8906
    /**
8907
     * Find position of first occurrence of string in a string.
8908
     *
8909
     * @see http://php.net/manual/en/function.mb-strpos.php
8910
     *
8911
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8912
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8913
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8914
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8915
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8916
     *
8917
     * @return false|int
8918
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8919
     *                   string.<br> If needle is not found it returns false.
8920
     */
8921 53
    public static function strpos(
8922
        string $haystack,
8923
        $needle,
8924
        int $offset = 0,
8925
        $encoding = 'UTF-8',
8926
        bool $cleanUtf8 = false
8927
    ) {
8928 53
        if ($haystack === '') {
8929 4
            return false;
8930
        }
8931
8932
        // iconv and mbstring do not support integer $needle
8933 52
        if ((int) $needle === $needle) {
8934
            $needle = (string) self::chr($needle);
8935
        }
8936 52
        $needle = (string) $needle;
8937
8938 52
        if ($needle === '') {
8939 2
            return false;
8940
        }
8941
8942 52
        if ($cleanUtf8 === true) {
8943
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8944
            // if invalid characters are found in $haystack before $needle
8945 3
            $needle = self::clean($needle);
8946 3
            $haystack = self::clean($haystack);
8947
        }
8948
8949 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8950 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8951
        }
8952
8953
        //
8954
        // fallback via mbstring
8955
        //
8956
8957 52
        if (self::$SUPPORT['mbstring'] === true) {
8958 50
            if ($encoding === 'UTF-8') {
8959 50
                return \mb_strpos($haystack, $needle, $offset);
8960
            }
8961
8962 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8963
        }
8964
8965
        //
8966
        // fallback for binary || ascii only
8967
        //
8968
        if (
8969 4
            $encoding === 'CP850'
8970
            ||
8971 4
            $encoding === 'ASCII'
8972
        ) {
8973 2
            return \strpos($haystack, $needle, $offset);
8974
        }
8975
8976
        if (
8977 4
            $encoding !== 'UTF-8'
8978
            &&
8979 4
            self::$SUPPORT['iconv'] === false
8980
            &&
8981 4
            self::$SUPPORT['mbstring'] === false
8982
        ) {
8983 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8984
        }
8985
8986
        //
8987
        // fallback via intl
8988
        //
8989
8990
        if (
8991 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8992
            &&
8993 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8994
            &&
8995 4
            self::$SUPPORT['intl'] === true
8996
        ) {
8997
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8998
            if ($returnTmp !== false) {
8999
                return $returnTmp;
9000
            }
9001
        }
9002
9003
        //
9004
        // fallback via iconv
9005
        //
9006
9007
        if (
9008 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9009
            &&
9010 4
            self::$SUPPORT['iconv'] === true
9011
        ) {
9012
            // ignore invalid negative offset to keep compatibility
9013
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9014
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9015
            if ($returnTmp !== false) {
9016
                return $returnTmp;
9017
            }
9018
        }
9019
9020
        //
9021
        // fallback for ascii only
9022
        //
9023
9024 4
        if (self::is_ascii($haystack . $needle)) {
9025 2
            return \strpos($haystack, $needle, $offset);
9026
        }
9027
9028
        //
9029
        // fallback via vanilla php
9030
        //
9031
9032 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9033 4
        if ($haystackTmp === false) {
9034
            $haystackTmp = '';
9035
        }
9036 4
        $haystack = (string) $haystackTmp;
9037
9038 4
        if ($offset < 0) {
9039
            $offset = 0;
9040
        }
9041
9042 4
        $pos = \strpos($haystack, $needle);
9043 4
        if ($pos === false) {
9044 2
            return false;
9045
        }
9046
9047 4
        if ($pos) {
9048 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9049
        }
9050
9051 2
        return $offset + 0;
9052
    }
9053
9054
    /**
9055
     * Find position of first occurrence of string in a string.
9056
     *
9057
     * @param string $haystack <p>
9058
     *                         The string being checked.
9059
     *                         </p>
9060
     * @param string $needle   <p>
9061
     *                         The position counted from the beginning of haystack.
9062
     *                         </p>
9063
     * @param int    $offset   [optional] <p>
9064
     *                         The search offset. If it is not specified, 0 is used.
9065
     *                         </p>
9066
     *
9067
     * @return false|int The numeric position of the first occurrence of needle in the
9068
     *                   haystack string. If needle is not found, it returns false.
9069
     */
9070
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9071
    {
9072
        if ($haystack === '' || $needle === '') {
9073
            return false;
9074
        }
9075
9076
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9077
            // "mb_" is available if overload is used, so use it ...
9078
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9079
        }
9080
9081
        return \strpos($haystack, $needle, $offset);
9082
    }
9083
9084
    /**
9085
     * Finds the last occurrence of a character in a string within another.
9086
     *
9087
     * @see http://php.net/manual/en/function.mb-strrchr.php
9088
     *
9089
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9090
     * @param string $needle        <p>The string to find in haystack</p>
9091
     * @param bool   $before_needle [optional] <p>
9092
     *                              Determines which portion of haystack
9093
     *                              this function returns.
9094
     *                              If set to true, it returns all of haystack
9095
     *                              from the beginning to the last occurrence of needle.
9096
     *                              If set to false, it returns all of haystack
9097
     *                              from the last occurrence of needle to the end,
9098
     *                              </p>
9099
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9100
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9101
     *
9102
     * @return false|string the portion of haystack or false if needle is not found
9103
     */
9104 2
    public static function strrchr(
9105
        string $haystack,
9106
        string $needle,
9107
        bool $before_needle = false,
9108
        string $encoding = 'UTF-8',
9109
        bool $cleanUtf8 = false
9110
    ) {
9111 2
        if ($haystack === '' || $needle === '') {
9112 2
            return false;
9113
        }
9114
9115 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9116 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9117
        }
9118
9119 2
        if ($cleanUtf8 === true) {
9120
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9121
            // if invalid characters are found in $haystack before $needle
9122 2
            $needle = self::clean($needle);
9123 2
            $haystack = self::clean($haystack);
9124
        }
9125
9126
        //
9127
        // fallback via mbstring
9128
        //
9129
9130 2
        if (self::$SUPPORT['mbstring'] === true) {
9131 2
            if ($encoding === 'UTF-8') {
9132 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9133
            }
9134
9135 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9136
        }
9137
9138
        //
9139
        // fallback for binary || ascii only
9140
        //
9141
9142
        if (
9143
            $before_needle === false
9144
            &&
9145
            (
9146
                $encoding === 'CP850'
9147
                ||
9148
                $encoding === 'ASCII'
9149
            )
9150
        ) {
9151
            return \strrchr($haystack, $needle);
9152
        }
9153
9154
        if (
9155
            $encoding !== 'UTF-8'
9156
            &&
9157
            self::$SUPPORT['mbstring'] === false
9158
        ) {
9159
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9160
        }
9161
9162
        //
9163
        // fallback via iconv
9164
        //
9165
9166
        if (self::$SUPPORT['iconv'] === true) {
9167
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9168
            if ($needleTmp === false) {
9169
                return false;
9170
            }
9171
            $needle = (string) $needleTmp;
9172
9173
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9174
            if ($pos === false) {
9175
                return false;
9176
            }
9177
9178
            if ($before_needle) {
9179
                return self::substr($haystack, 0, $pos, $encoding);
9180
            }
9181
9182
            return self::substr($haystack, $pos, null, $encoding);
9183
        }
9184
9185
        //
9186
        // fallback via vanilla php
9187
        //
9188
9189
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9190
        if ($needleTmp === false) {
9191
            return false;
9192
        }
9193
        $needle = (string) $needleTmp;
9194
9195
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9196
        if ($pos === false) {
9197
            return false;
9198
        }
9199
9200
        if ($before_needle) {
9201
            return self::substr($haystack, 0, $pos, $encoding);
9202
        }
9203
9204
        return self::substr($haystack, $pos, null, $encoding);
9205
    }
9206
9207
    /**
9208
     * Reverses characters order in the string.
9209
     *
9210
     * @param string $str      <p>The input string.</p>
9211
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9212
     *
9213
     * @return string the string with characters in the reverse sequence
9214
     */
9215 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9216
    {
9217 10
        if ($str === '') {
9218 4
            return '';
9219
        }
9220
9221
        // init
9222 8
        $reversed = '';
9223
9224 8
        $str = self::emoji_encode($str, true);
9225
9226 8
        if ($encoding === 'UTF-8') {
9227 8
            if (self::$SUPPORT['intl'] === true) {
9228
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9229 8
                $i = (int) \grapheme_strlen($str);
9230 8
                while ($i--) {
9231 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9232 8
                    if ($reversedTmp !== false) {
9233 8
                        $reversed .= $reversedTmp;
9234
                    }
9235
                }
9236
            } else {
9237
                $i = (int) \mb_strlen($str);
9238 8
                while ($i--) {
9239
                    $reversedTmp = \mb_substr($str, $i, 1);
9240
                    if ($reversedTmp !== false) {
9241
                        $reversed .= $reversedTmp;
9242
                    }
9243
                }
9244
            }
9245
        } else {
9246
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9247
9248
            $i = (int) self::strlen($str, $encoding);
9249
            while ($i--) {
9250
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9251
                if ($reversedTmp !== false) {
9252
                    $reversed .= $reversedTmp;
9253
                }
9254
            }
9255
        }
9256
9257 8
        return self::emoji_decode($reversed, true);
9258
    }
9259
9260
    /**
9261
     * Finds the last occurrence of a character in a string within another, case insensitive.
9262
     *
9263
     * @see http://php.net/manual/en/function.mb-strrichr.php
9264
     *
9265
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9266
     * @param string $needle        <p>The string to find in haystack.</p>
9267
     * @param bool   $before_needle [optional] <p>
9268
     *                              Determines which portion of haystack
9269
     *                              this function returns.
9270
     *                              If set to true, it returns all of haystack
9271
     *                              from the beginning to the last occurrence of needle.
9272
     *                              If set to false, it returns all of haystack
9273
     *                              from the last occurrence of needle to the end,
9274
     *                              </p>
9275
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9276
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9277
     *
9278
     * @return false|string the portion of haystack or<br>false if needle is not found
9279
     */
9280 3
    public static function strrichr(
9281
        string $haystack,
9282
        string $needle,
9283
        bool $before_needle = false,
9284
        string $encoding = 'UTF-8',
9285
        bool $cleanUtf8 = false
9286
    ) {
9287 3
        if ($haystack === '' || $needle === '') {
9288 2
            return false;
9289
        }
9290
9291 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9292 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9293
        }
9294
9295 3
        if ($cleanUtf8 === true) {
9296
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9297
            // if invalid characters are found in $haystack before $needle
9298 2
            $needle = self::clean($needle);
9299 2
            $haystack = self::clean($haystack);
9300
        }
9301
9302
        //
9303
        // fallback via mbstring
9304
        //
9305
9306 3
        if (self::$SUPPORT['mbstring'] === true) {
9307 3
            if ($encoding === 'UTF-8') {
9308 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9309
            }
9310
9311 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9312
        }
9313
9314
        //
9315
        // fallback via vanilla php
9316
        //
9317
9318
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9319
        if ($needleTmp === false) {
9320
            return false;
9321
        }
9322
        $needle = (string) $needleTmp;
9323
9324
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9325
        if ($pos === false) {
9326
            return false;
9327
        }
9328
9329
        if ($before_needle) {
9330
            return self::substr($haystack, 0, $pos, $encoding);
9331
        }
9332
9333
        return self::substr($haystack, $pos, null, $encoding);
9334
    }
9335
9336
    /**
9337
     * Find position of last occurrence of a case-insensitive string.
9338
     *
9339
     * @param string     $haystack  <p>The string to look in.</p>
9340
     * @param int|string $needle    <p>The string to look for.</p>
9341
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9342
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9343
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9344
     *
9345
     * @return false|int
9346
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9347
     *                   string.<br>If needle is not found, it returns false.
9348
     */
9349 3
    public static function strripos(
9350
        string $haystack,
9351
        $needle,
9352
        int $offset = 0,
9353
        string $encoding = 'UTF-8',
9354
        bool $cleanUtf8 = false
9355
    ) {
9356 3
        if ($haystack === '') {
9357
            return false;
9358
        }
9359
9360
        // iconv and mbstring do not support integer $needle
9361 3
        if ((int) $needle === $needle && $needle >= 0) {
9362
            $needle = (string) self::chr($needle);
9363
        }
9364 3
        $needle = (string) $needle;
9365
9366 3
        if ($needle === '') {
9367
            return false;
9368
        }
9369
9370 3
        if ($cleanUtf8 === true) {
9371
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9372 2
            $needle = self::clean($needle);
9373 2
            $haystack = self::clean($haystack);
9374
        }
9375
9376 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9377 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9378
        }
9379
9380
        //
9381
        // fallback via mbstrig
9382
        //
9383
9384 3
        if (self::$SUPPORT['mbstring'] === true) {
9385 3
            if ($encoding === 'UTF-8') {
9386 3
                return \mb_strripos($haystack, $needle, $offset);
9387
            }
9388
9389
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9390
        }
9391
9392
        //
9393
        // fallback for binary || ascii only
9394
        //
9395
9396
        if (
9397
            $encoding === 'CP850'
9398
            ||
9399
            $encoding === 'ASCII'
9400
        ) {
9401
            return \strripos($haystack, $needle, $offset);
9402
        }
9403
9404
        if (
9405
            $encoding !== 'UTF-8'
9406
            &&
9407
            self::$SUPPORT['mbstring'] === false
9408
        ) {
9409
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9410
        }
9411
9412
        //
9413
        // fallback via intl
9414
        //
9415
9416
        if (
9417
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9418
            &&
9419
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9420
            &&
9421
            self::$SUPPORT['intl'] === true
9422
        ) {
9423
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9424
            if ($returnTmp !== false) {
9425
                return $returnTmp;
9426
            }
9427
        }
9428
9429
        //
9430
        // fallback for ascii only
9431
        //
9432
9433
        if (self::is_ascii($haystack . $needle)) {
9434
            return \strripos($haystack, $needle, $offset);
9435
        }
9436
9437
        //
9438
        // fallback via vanilla php
9439
        //
9440
9441
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9442
        $needle = self::strtocasefold($needle, true, false, $encoding);
9443
9444
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9445
    }
9446
9447
    /**
9448
     * Finds position of last occurrence of a string within another, case insensitive.
9449
     *
9450
     * @param string $haystack <p>
9451
     *                         The string from which to get the position of the last occurrence
9452
     *                         of needle.
9453
     *                         </p>
9454
     * @param string $needle   <p>
9455
     *                         The string to find in haystack.
9456
     *                         </p>
9457
     * @param int    $offset   [optional] <p>
9458
     *                         The position in haystack
9459
     *                         to start searching.
9460
     *                         </p>
9461
     *
9462
     * @return false|int return the numeric position of the last occurrence of needle in the
9463
     *                   haystack string, or false if needle is not found
9464
     */
9465
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9466
    {
9467
        if ($haystack === '' || $needle === '') {
9468
            return false;
9469
        }
9470
9471
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9472
            // "mb_" is available if overload is used, so use it ...
9473
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9474
        }
9475
9476
        return \strripos($haystack, $needle, $offset);
9477
    }
9478
9479
    /**
9480
     * Find position of last occurrence of a string in a string.
9481
     *
9482
     * @see http://php.net/manual/en/function.mb-strrpos.php
9483
     *
9484
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9485
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9486
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9487
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9488
     *                              the end of the string.
9489
     *                              </p>
9490
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9491
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9492
     *
9493
     * @return false|int
9494
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9495
     *                   string.<br>If needle is not found, it returns false.
9496
     */
9497 35
    public static function strrpos(
9498
        string $haystack,
9499
        $needle,
9500
        int $offset = 0,
9501
        string $encoding = 'UTF-8',
9502
        bool $cleanUtf8 = false
9503
    ) {
9504 35
        if ($haystack === '') {
9505 3
            return false;
9506
        }
9507
9508
        // iconv and mbstring do not support integer $needle
9509 34
        if ((int) $needle === $needle && $needle >= 0) {
9510 2
            $needle = (string) self::chr($needle);
9511
        }
9512 34
        $needle = (string) $needle;
9513
9514 34
        if ($needle === '') {
9515 2
            return false;
9516
        }
9517
9518 34
        if ($cleanUtf8 === true) {
9519
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9520 4
            $needle = self::clean($needle);
9521 4
            $haystack = self::clean($haystack);
9522
        }
9523
9524 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9525 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9526
        }
9527
9528
        //
9529
        // fallback via mbstring
9530
        //
9531
9532 34
        if (self::$SUPPORT['mbstring'] === true) {
9533 34
            if ($encoding === 'UTF-8') {
9534 34
                return \mb_strrpos($haystack, $needle, $offset);
9535
            }
9536
9537 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9538
        }
9539
9540
        //
9541
        // fallback for binary || ascii only
9542
        //
9543
9544
        if (
9545
            $encoding === 'CP850'
9546
            ||
9547
            $encoding === 'ASCII'
9548
        ) {
9549
            return \strrpos($haystack, $needle, $offset);
9550
        }
9551
9552
        if (
9553
            $encoding !== 'UTF-8'
9554
            &&
9555
            self::$SUPPORT['mbstring'] === false
9556
        ) {
9557
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9558
        }
9559
9560
        //
9561
        // fallback via intl
9562
        //
9563
9564
        if (
9565
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9566
            &&
9567
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9568
            &&
9569
            self::$SUPPORT['intl'] === true
9570
        ) {
9571
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9572
            if ($returnTmp !== false) {
9573
                return $returnTmp;
9574
            }
9575
        }
9576
9577
        //
9578
        // fallback for ascii only
9579
        //
9580
9581
        if (self::is_ascii($haystack . $needle)) {
9582
            return \strrpos($haystack, $needle, $offset);
9583
        }
9584
9585
        //
9586
        // fallback via vanilla php
9587
        //
9588
9589
        $haystackTmp = null;
9590
        if ($offset > 0) {
9591
            $haystackTmp = self::substr($haystack, $offset);
9592
        } elseif ($offset < 0) {
9593
            $haystackTmp = self::substr($haystack, 0, $offset);
9594
            $offset = 0;
9595
        }
9596
9597
        if ($haystackTmp !== null) {
9598
            if ($haystackTmp === false) {
9599
                $haystackTmp = '';
9600
            }
9601
            $haystack = (string) $haystackTmp;
9602
        }
9603
9604
        $pos = \strrpos($haystack, $needle);
9605
        if ($pos === false) {
9606
            return false;
9607
        }
9608
9609
        $strTmp = \substr($haystack, 0, $pos);
9610
        if ($strTmp === false) {
9611
            return false;
9612
        }
9613
9614
        return $offset + (int) self::strlen($strTmp);
9615
    }
9616
9617
    /**
9618
     * Find position of last occurrence of a string in a string.
9619
     *
9620
     * @param string $haystack <p>
9621
     *                         The string being checked, for the last occurrence
9622
     *                         of needle.
9623
     *                         </p>
9624
     * @param string $needle   <p>
9625
     *                         The string to find in haystack.
9626
     *                         </p>
9627
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9628
     *                         the string. Negative values will stop searching at an arbitrary point
9629
     *                         prior to the end of the string.
9630
     *
9631
     * @return false|int The numeric position of the last occurrence of needle in the
9632
     *                   haystack string. If needle is not found, it returns false.
9633
     */
9634
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9635
    {
9636
        if ($haystack === '' || $needle === '') {
9637
            return false;
9638
        }
9639
9640
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9641
            // "mb_" is available if overload is used, so use it ...
9642
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9643
        }
9644
9645
        return \strrpos($haystack, $needle, $offset);
9646
    }
9647
9648
    /**
9649
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9650
     * mask.
9651
     *
9652
     * @param string $str      <p>The input string.</p>
9653
     * @param string $mask     <p>The mask of chars</p>
9654
     * @param int    $offset   [optional]
9655
     * @param int    $length   [optional]
9656
     * @param string $encoding [optional] <p>Set the charset.</p>
9657
     *
9658
     * @return false|int
9659
     */
9660 10
    public static function strspn(
9661
        string $str,
9662
        string $mask,
9663
        int $offset = 0,
9664
        int $length = null,
9665
        string $encoding = 'UTF-8'
9666
    ) {
9667 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9669
        }
9670
9671 10
        if ($offset || $length !== null) {
9672 2
            if ($encoding === 'UTF-8') {
9673 2
                if ($length === null) {
9674
                    $str = (string) \mb_substr($str, $offset);
9675
                } else {
9676 2
                    $str = (string) \mb_substr($str, $offset, $length);
9677
                }
9678
            } else {
9679
                $str = (string) self::substr($str, $offset, $length, $encoding);
9680
            }
9681
        }
9682
9683 10
        if ($str === '' || $mask === '') {
9684 2
            return 0;
9685
        }
9686
9687 8
        $matches = [];
9688
9689 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9690
    }
9691
9692
    /**
9693
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9694
     *
9695
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9696
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9697
     * @param bool   $before_needle [optional] <p>
9698
     *                              If <b>TRUE</b>, strstr() returns the part of the
9699
     *                              haystack before the first occurrence of the needle (excluding the needle).
9700
     *                              </p>
9701
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9702
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9703
     *
9704
     * @return false|string
9705
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9706
     */
9707 3
    public static function strstr(
9708
        string $haystack,
9709
        string $needle,
9710
        bool $before_needle = false,
9711
        string $encoding = 'UTF-8',
9712
        $cleanUtf8 = false
9713
    ) {
9714 3
        if ($haystack === '' || $needle === '') {
9715 2
            return false;
9716
        }
9717
9718 3
        if ($cleanUtf8 === true) {
9719
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9720
            // if invalid characters are found in $haystack before $needle
9721
            $needle = self::clean($needle);
9722
            $haystack = self::clean($haystack);
9723
        }
9724
9725 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9726 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9727
        }
9728
9729
        //
9730
        // fallback via mbstring
9731
        //
9732
9733 3
        if (self::$SUPPORT['mbstring'] === true) {
9734 3
            if ($encoding === 'UTF-8') {
9735 3
                return \mb_strstr($haystack, $needle, $before_needle);
9736
            }
9737
9738 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9739
        }
9740
9741
        //
9742
        // fallback for binary || ascii only
9743
        //
9744
9745
        if (
9746
            $encoding === 'CP850'
9747
            ||
9748
            $encoding === 'ASCII'
9749
        ) {
9750
            return \strstr($haystack, $needle, $before_needle);
9751
        }
9752
9753
        if (
9754
            $encoding !== 'UTF-8'
9755
            &&
9756
            self::$SUPPORT['mbstring'] === false
9757
        ) {
9758
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9759
        }
9760
9761
        //
9762
        // fallback via intl
9763
        //
9764
9765
        if (
9766
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9767
            &&
9768
            self::$SUPPORT['intl'] === true
9769
        ) {
9770
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9771
            if ($returnTmp !== false) {
9772
                return $returnTmp;
9773
            }
9774
        }
9775
9776
        //
9777
        // fallback for ascii only
9778
        //
9779
9780
        if (self::is_ascii($haystack . $needle)) {
9781
            return \strstr($haystack, $needle, $before_needle);
9782
        }
9783
9784
        //
9785
        // fallback via vanilla php
9786
        //
9787
9788
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9789
9790
        if (!isset($match[1])) {
9791
            return false;
9792
        }
9793
9794
        if ($before_needle) {
9795
            return $match[1];
9796
        }
9797
9798
        return self::substr($haystack, (int) self::strlen($match[1]));
9799
    }
9800
9801
    /**
9802
     *  * Finds first occurrence of a string within another.
9803
     *
9804
     * @param string $haystack      <p>
9805
     *                              The string from which to get the first occurrence
9806
     *                              of needle.
9807
     *                              </p>
9808
     * @param string $needle        <p>
9809
     *                              The string to find in haystack.
9810
     *                              </p>
9811
     * @param bool   $before_needle [optional] <p>
9812
     *                              Determines which portion of haystack
9813
     *                              this function returns.
9814
     *                              If set to true, it returns all of haystack
9815
     *                              from the beginning to the first occurrence of needle.
9816
     *                              If set to false, it returns all of haystack
9817
     *                              from the first occurrence of needle to the end,
9818
     *                              </p>
9819
     *
9820
     * @return false|string the portion of haystack,
9821
     *                      or false if needle is not found
9822
     */
9823
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9824
    {
9825
        if ($haystack === '' || $needle === '') {
9826
            return false;
9827
        }
9828
9829
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9830
            // "mb_" is available if overload is used, so use it ...
9831
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9832
        }
9833
9834
        return \strstr($haystack, $needle, $before_needle);
9835
    }
9836
9837
    /**
9838
     * Unicode transformation for case-less matching.
9839
     *
9840
     * @see http://unicode.org/reports/tr21/tr21-5.html
9841
     *
9842
     * @param string      $str       <p>The input string.</p>
9843
     * @param bool        $full      [optional] <p>
9844
     *                               <b>true</b>, replace full case folding chars (default)<br>
9845
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9846
     *                               </p>
9847
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9848
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9849
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9850
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9851
     *                               is for some languages better ...</p>
9852
     *
9853
     * @return string
9854
     */
9855 32
    public static function strtocasefold(
9856
        string $str,
9857
        bool $full = true,
9858
        bool $cleanUtf8 = false,
9859
        string $encoding = 'UTF-8',
9860
        string $lang = null,
9861
        $lower = true
9862
    ): string {
9863 32
        if ($str === '') {
9864 5
            return '';
9865
        }
9866
9867 31
        if ($cleanUtf8 === true) {
9868
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9869
            // if invalid characters are found in $haystack before $needle
9870 2
            $str = self::clean($str);
9871
        }
9872
9873 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9874
9875 31
        if ($lang === null && $encoding === 'UTF-8') {
9876 31
            if ($lower === true) {
9877 2
                return \mb_strtolower($str);
9878
            }
9879
9880 29
            return \mb_strtoupper($str);
9881
        }
9882
9883 2
        if ($lower === true) {
9884
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9885
        }
9886
9887 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9888
    }
9889
9890
    /**
9891
     * Make a string lowercase.
9892
     *
9893
     * @see http://php.net/manual/en/function.mb-strtolower.php
9894
     *
9895
     * @param string      $str                   <p>The string being lowercased.</p>
9896
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9897
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9898
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9899
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9900
     *
9901
     * @return string
9902
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9903
     */
9904 73
    public static function strtolower(
9905
        $str,
9906
        string $encoding = 'UTF-8',
9907
        bool $cleanUtf8 = false,
9908
        string $lang = null,
9909
        bool $tryToKeepStringLength = false
9910
    ): string {
9911
        // init
9912 73
        $str = (string) $str;
9913
9914 73
        if ($str === '') {
9915 1
            return '';
9916
        }
9917
9918 72
        if ($cleanUtf8 === true) {
9919
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9920
            // if invalid characters are found in $haystack before $needle
9921 2
            $str = self::clean($str);
9922
        }
9923
9924
        // hack for old php version or for the polyfill ...
9925 72
        if ($tryToKeepStringLength === true) {
9926
            $str = self::fixStrCaseHelper($str, true);
9927
        }
9928
9929 72
        if ($lang === null && $encoding === 'UTF-8') {
9930 13
            return \mb_strtolower($str);
9931
        }
9932
9933 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9934
9935 61
        if ($lang !== null) {
9936 2
            if (self::$SUPPORT['intl'] === true) {
9937 2
                $langCode = $lang . '-Lower';
9938 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9939
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9940
9941
                    $langCode = 'Any-Lower';
9942
                }
9943
9944
                /** @noinspection PhpComposerExtensionStubsInspection */
9945
                /** @noinspection UnnecessaryCastingInspection */
9946 2
                return (string) \transliterator_transliterate($langCode, $str);
9947
            }
9948
9949
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9950
        }
9951
9952
        // always fallback via symfony polyfill
9953 61
        return \mb_strtolower($str, $encoding);
9954
    }
9955
9956
    /**
9957
     * Make a string uppercase.
9958
     *
9959
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9960
     *
9961
     * @param string      $str                   <p>The string being uppercased.</p>
9962
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9963
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9964
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9965
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9966
     *
9967
     * @return string
9968
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9969
     */
9970 17
    public static function strtoupper(
9971
        $str,
9972
        string $encoding = 'UTF-8',
9973
        bool $cleanUtf8 = false,
9974
        string $lang = null,
9975
        bool $tryToKeepStringLength = false
9976
    ): string {
9977
        // init
9978 17
        $str = (string) $str;
9979
9980 17
        if ($str === '') {
9981 1
            return '';
9982
        }
9983
9984 16
        if ($cleanUtf8 === true) {
9985
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9986
            // if invalid characters are found in $haystack before $needle
9987 2
            $str = self::clean($str);
9988
        }
9989
9990
        // hack for old php version or for the polyfill ...
9991 16
        if ($tryToKeepStringLength === true) {
9992 2
            $str = self::fixStrCaseHelper($str, false);
9993
        }
9994
9995 16
        if ($lang === null && $encoding === 'UTF-8') {
9996 8
            return \mb_strtoupper($str);
9997
        }
9998
9999 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10000
10001 10
        if ($lang !== null) {
10002 2
            if (self::$SUPPORT['intl'] === true) {
10003 2
                $langCode = $lang . '-Upper';
10004 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
10005
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10006
10007
                    $langCode = 'Any-Upper';
10008
                }
10009
10010
                /** @noinspection PhpComposerExtensionStubsInspection */
10011
                /** @noinspection UnnecessaryCastingInspection */
10012 2
                return (string) \transliterator_transliterate($langCode, $str);
10013
            }
10014
10015
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10016
        }
10017
10018
        // always fallback via symfony polyfill
10019 10
        return \mb_strtoupper($str, $encoding);
10020
    }
10021
10022
    /**
10023
     * Translate characters or replace sub-strings.
10024
     *
10025
     * @see  http://php.net/manual/en/function.strtr.php
10026
     *
10027
     * @param string          $str  <p>The string being translated.</p>
10028
     * @param string|string[] $from <p>The string replacing from.</p>
10029
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10030
     *
10031
     * @return string
10032
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10033
     *                corresponding character in to
10034
     */
10035 2
    public static function strtr(string $str, $from, $to = ''): string
10036
    {
10037 2
        if ($str === '') {
10038
            return '';
10039
        }
10040
10041 2
        if ($from === $to) {
10042
            return $str;
10043
        }
10044
10045 2
        if ($to !== '') {
10046 2
            $from = self::str_split($from);
10047 2
            $to = self::str_split($to);
10048 2
            $countFrom = \count($from);
10049 2
            $countTo = \count($to);
10050
10051 2
            if ($countFrom > $countTo) {
10052 2
                $from = \array_slice($from, 0, $countTo);
10053 2
            } elseif ($countFrom < $countTo) {
10054 2
                $to = \array_slice($to, 0, $countFrom);
10055
            }
10056
10057 2
            $from = \array_combine($from, $to);
10058 2
            if ($from === false) {
10059
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10060
            }
10061
        }
10062
10063 2
        if (\is_string($from)) {
10064 2
            return \str_replace($from, '', $str);
10065
        }
10066
10067 2
        return \strtr($str, $from);
10068
    }
10069
10070
    /**
10071
     * Return the width of a string.
10072
     *
10073
     * @param string $str       <p>The input string.</p>
10074
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10075
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10076
     *
10077
     * @return int
10078
     */
10079 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10080
    {
10081 2
        if ($str === '') {
10082 2
            return 0;
10083
        }
10084
10085 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10086 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10087
        }
10088
10089 2
        if ($cleanUtf8 === true) {
10090
            // iconv and mbstring are not tolerant to invalid encoding
10091
            // further, their behaviour is inconsistent with that of PHP's substr
10092 2
            $str = self::clean($str);
10093
        }
10094
10095
        //
10096
        // fallback via mbstring
10097
        //
10098
10099 2
        if (self::$SUPPORT['mbstring'] === true) {
10100 2
            if ($encoding === 'UTF-8') {
10101 2
                return \mb_strwidth($str);
10102
            }
10103
10104
            return \mb_strwidth($str, $encoding);
10105
        }
10106
10107
        //
10108
        // fallback via vanilla php
10109
        //
10110
10111
        if ($encoding !== 'UTF-8') {
10112
            $str = self::encode('UTF-8', $str, false, $encoding);
10113
        }
10114
10115
        $wide = 0;
10116
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10117
10118
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10119
    }
10120
10121
    /**
10122
     * Get part of a string.
10123
     *
10124
     * @see http://php.net/manual/en/function.mb-substr.php
10125
     *
10126
     * @param string $str       <p>The string being checked.</p>
10127
     * @param int    $offset    <p>The first position used in str.</p>
10128
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10129
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10130
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10131
     *
10132
     * @return false|string
10133
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10134
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10135
     *                      characters long, <b>FALSE</b> will be returned.
10136
     */
10137 172
    public static function substr(
10138
        string $str,
10139
        int $offset = 0,
10140
        int $length = null,
10141
        string $encoding = 'UTF-8',
10142
        bool $cleanUtf8 = false
10143
    ) {
10144
        // empty string
10145 172
        if ($str === '' || $length === 0) {
10146 8
            return '';
10147
        }
10148
10149 168
        if ($cleanUtf8 === true) {
10150
            // iconv and mbstring are not tolerant to invalid encoding
10151
            // further, their behaviour is inconsistent with that of PHP's substr
10152 2
            $str = self::clean($str);
10153
        }
10154
10155
        // whole string
10156 168
        if (!$offset && $length === null) {
10157 7
            return $str;
10158
        }
10159
10160 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10161 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10162
        }
10163
10164
        //
10165
        // fallback via mbstring
10166
        //
10167
10168 163
        if (self::$SUPPORT['mbstring'] === true) {
10169 161
            if ($encoding === 'UTF-8') {
10170 161
                if ($length === null) {
10171 64
                    return \mb_substr($str, $offset);
10172
                }
10173
10174 102
                return \mb_substr($str, $offset, $length);
10175
            }
10176
10177
            return self::substr($str, $offset, $length, $encoding);
10178
        }
10179
10180
        //
10181
        // fallback for binary || ascii only
10182
        //
10183
10184
        if (
10185 4
            $encoding === 'CP850'
10186
            ||
10187 4
            $encoding === 'ASCII'
10188
        ) {
10189
            if ($length === null) {
10190
                return \substr($str, $offset);
10191
            }
10192
10193
            return \substr($str, $offset, $length);
10194
        }
10195
10196
        // otherwise we need the string-length
10197 4
        $str_length = 0;
10198 4
        if ($offset || $length === null) {
10199 4
            $str_length = self::strlen($str, $encoding);
10200
        }
10201
10202
        // e.g.: invalid chars + mbstring not installed
10203 4
        if ($str_length === false) {
10204
            return false;
10205
        }
10206
10207
        // empty string
10208 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10209
            return '';
10210
        }
10211
10212
        // impossible
10213 4
        if ($offset && $offset > $str_length) {
10214
            return '';
10215
        }
10216
10217 4
        if ($length === null) {
10218 4
            $length = (int) $str_length;
10219
        } else {
10220 2
            $length = (int) $length;
10221
        }
10222
10223
        if (
10224 4
            $encoding !== 'UTF-8'
10225
            &&
10226 4
            self::$SUPPORT['mbstring'] === false
10227
        ) {
10228 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10229
        }
10230
10231
        //
10232
        // fallback via intl
10233
        //
10234
10235
        if (
10236 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10237
            &&
10238 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10239
            &&
10240 4
            self::$SUPPORT['intl'] === true
10241
        ) {
10242
            $returnTmp = \grapheme_substr($str, $offset, $length);
10243
            if ($returnTmp !== false) {
10244
                return $returnTmp;
10245
            }
10246
        }
10247
10248
        //
10249
        // fallback via iconv
10250
        //
10251
10252
        if (
10253 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10254
            &&
10255 4
            self::$SUPPORT['iconv'] === true
10256
        ) {
10257
            $returnTmp = \iconv_substr($str, $offset, $length);
10258
            if ($returnTmp !== false) {
10259
                return $returnTmp;
10260
            }
10261
        }
10262
10263
        //
10264
        // fallback for ascii only
10265
        //
10266
10267 4
        if (self::is_ascii($str)) {
10268
            return \substr($str, $offset, $length);
10269
        }
10270
10271
        //
10272
        // fallback via vanilla php
10273
        //
10274
10275
        // split to array, and remove invalid characters
10276 4
        $array = self::str_split($str);
10277
10278
        // extract relevant part, and join to make sting again
10279 4
        return \implode('', \array_slice($array, $offset, $length));
10280
    }
10281
10282
    /**
10283
     * Binary safe comparison of two strings from an offset, up to length characters.
10284
     *
10285
     * @param string   $str1               <p>The main string being compared.</p>
10286
     * @param string   $str2               <p>The secondary string being compared.</p>
10287
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10288
     *                                     counting from the end of the string.</p>
10289
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10290
     *                                     of the length of the str compared to the length of main_str less the
10291
     *                                     offset.</p>
10292
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10293
     *                                     insensitive.</p>
10294
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10295
     *
10296
     * @return int
10297
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10298
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10299
     *             <strong>0</strong> if they are equal
10300
     */
10301 2
    public static function substr_compare(
10302
        string $str1,
10303
        string $str2,
10304
        int $offset = 0,
10305
        int $length = null,
10306
        bool $case_insensitivity = false,
10307
        string $encoding = 'UTF-8'
10308
    ): int {
10309
        if (
10310 2
            $offset !== 0
10311
            ||
10312 2
            $length !== null
10313
        ) {
10314 2
            if ($encoding === 'UTF-8') {
10315 2
                if ($length === null) {
10316 2
                    $str1 = (string) \mb_substr($str1, $offset);
10317
                } else {
10318 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10319
                }
10320 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10321
            } else {
10322
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10323
10324
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10325
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10326
            }
10327
        }
10328
10329 2
        if ($case_insensitivity === true) {
10330 2
            return self::strcasecmp($str1, $str2, $encoding);
10331
        }
10332
10333 2
        return self::strcmp($str1, $str2);
10334
    }
10335
10336
    /**
10337
     * Count the number of substring occurrences.
10338
     *
10339
     * @see  http://php.net/manual/en/function.substr-count.php
10340
     *
10341
     * @param string $haystack  <p>The string to search in.</p>
10342
     * @param string $needle    <p>The substring to search for.</p>
10343
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10344
     * @param int    $length    [optional] <p>
10345
     *                          The maximum length after the specified offset to search for the
10346
     *                          substring. It outputs a warning if the offset plus the length is
10347
     *                          greater than the haystack length.
10348
     *                          </p>
10349
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10350
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10351
     *
10352
     * @return false|int this functions returns an integer or false if there isn't a string
10353
     */
10354 5
    public static function substr_count(
10355
        string $haystack,
10356
        string $needle,
10357
        int $offset = 0,
10358
        int $length = null,
10359
        string $encoding = 'UTF-8',
10360
        bool $cleanUtf8 = false
10361
    ) {
10362 5
        if ($haystack === '' || $needle === '') {
10363 2
            return false;
10364
        }
10365
10366 5
        if ($length === 0) {
10367 2
            return 0;
10368
        }
10369
10370 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10371 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10372
        }
10373
10374 5
        if ($cleanUtf8 === true) {
10375
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10376
            // if invalid characters are found in $haystack before $needle
10377
            $needle = self::clean($needle);
10378
            $haystack = self::clean($haystack);
10379
        }
10380
10381 5
        if ($offset || $length > 0) {
10382 2
            if ($length === null) {
10383 2
                $lengthTmp = self::strlen($haystack, $encoding);
10384 2
                if ($lengthTmp === false) {
10385
                    return false;
10386
                }
10387 2
                $length = (int) $lengthTmp;
10388
            }
10389
10390 2
            if ($encoding === 'UTF-8') {
10391 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10392
            } else {
10393 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10394
            }
10395
        }
10396
10397
        if (
10398 5
            $encoding !== 'UTF-8'
10399
            &&
10400 5
            self::$SUPPORT['mbstring'] === false
10401
        ) {
10402
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10403
        }
10404
10405 5
        if (self::$SUPPORT['mbstring'] === true) {
10406 5
            if ($encoding === 'UTF-8') {
10407 5
                return \mb_substr_count($haystack, $needle);
10408
            }
10409
10410 2
            return \mb_substr_count($haystack, $needle, $encoding);
10411
        }
10412
10413
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10414
10415
        return \count($matches);
10416
    }
10417
10418
    /**
10419
     * Count the number of substring occurrences.
10420
     *
10421
     * @param string $haystack <p>
10422
     *                         The string being checked.
10423
     *                         </p>
10424
     * @param string $needle   <p>
10425
     *                         The string being found.
10426
     *                         </p>
10427
     * @param int    $offset   [optional] <p>
10428
     *                         The offset where to start counting
10429
     *                         </p>
10430
     * @param int    $length   [optional] <p>
10431
     *                         The maximum length after the specified offset to search for the
10432
     *                         substring. It outputs a warning if the offset plus the length is
10433
     *                         greater than the haystack length.
10434
     *                         </p>
10435
     *
10436
     * @return false|int the number of times the
10437
     *                   needle substring occurs in the
10438
     *                   haystack string
10439
     */
10440
    public static function substr_count_in_byte(
10441
        string $haystack,
10442
        string $needle,
10443
        int $offset = 0,
10444
        int $length = null
10445
    ) {
10446
        if ($haystack === '' || $needle === '') {
10447
            return 0;
10448
        }
10449
10450
        if (
10451
            ($offset || $length !== null)
10452
            &&
10453
            self::$SUPPORT['mbstring_func_overload'] === true
10454
        ) {
10455
            if ($length === null) {
10456
                $lengthTmp = self::strlen($haystack);
10457
                if ($lengthTmp === false) {
10458
                    return false;
10459
                }
10460
                $length = (int) $lengthTmp;
10461
            }
10462
10463
            if (
10464
                (
10465
                    $length !== 0
10466
                    &&
10467
                    $offset !== 0
10468
                )
10469
                &&
10470
                ($length + $offset) <= 0
10471
                &&
10472
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10473
            ) {
10474
                return false;
10475
            }
10476
10477
            $haystackTmp = \substr($haystack, $offset, $length);
10478
            if ($haystackTmp === false) {
10479
                $haystackTmp = '';
10480
            }
10481
            $haystack = (string) $haystackTmp;
10482
        }
10483
10484
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10485
            // "mb_" is available if overload is used, so use it ...
10486
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10487
        }
10488
10489
        if ($length === null) {
10490
            return \substr_count($haystack, $needle, $offset);
10491
        }
10492
10493
        return \substr_count($haystack, $needle, $offset, $length);
10494
    }
10495
10496
    /**
10497
     * Returns the number of occurrences of $substring in the given string.
10498
     * By default, the comparison is case-sensitive, but can be made insensitive
10499
     * by setting $caseSensitive to false.
10500
     *
10501
     * @param string $str           <p>The input string.</p>
10502
     * @param string $substring     <p>The substring to search for.</p>
10503
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10504
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10505
     *
10506
     * @return int
10507
     */
10508 15
    public static function substr_count_simple(
10509
        string $str,
10510
        string $substring,
10511
        bool $caseSensitive = true,
10512
        string $encoding = 'UTF-8'
10513
    ): int {
10514 15
        if ($str === '' || $substring === '') {
10515 2
            return 0;
10516
        }
10517
10518 13
        if ($encoding === 'UTF-8') {
10519 7
            if ($caseSensitive) {
10520
                return (int) \mb_substr_count($str, $substring);
10521
            }
10522
10523 7
            return (int) \mb_substr_count(
10524 7
                \mb_strtoupper($str),
10525 7
                \mb_strtoupper($substring)
10526
10527
            );
10528
        }
10529
10530 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10531
10532 6
        if ($caseSensitive) {
10533 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10534
        }
10535
10536 3
        return (int) \mb_substr_count(
10537 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10538 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10539 3
            $encoding
10540
        );
10541
    }
10542
10543
    /**
10544
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10545
     *
10546
     * @param string $haystack <p>The string to search in.</p>
10547
     * @param string $needle   <p>The substring to search for.</p>
10548
     *
10549
     * @return string return the sub-string
10550
     */
10551 2
    public static function substr_ileft(string $haystack, string $needle): string
10552
    {
10553 2
        if ($haystack === '') {
10554 2
            return '';
10555
        }
10556
10557 2
        if ($needle === '') {
10558 2
            return $haystack;
10559
        }
10560
10561 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10562 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10563
        }
10564
10565 2
        return $haystack;
10566
    }
10567
10568
    /**
10569
     * Get part of a string process in bytes.
10570
     *
10571
     * @param string $str    <p>The string being checked.</p>
10572
     * @param int    $offset <p>The first position used in str.</p>
10573
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10574
     *
10575
     * @return false|string
10576
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10577
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10578
     *                      characters long, <b>FALSE</b> will be returned.
10579
     */
10580
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10581
    {
10582
        // empty string
10583
        if ($str === '' || $length === 0) {
10584
            return '';
10585
        }
10586
10587
        // whole string
10588
        if (!$offset && $length === null) {
10589
            return $str;
10590
        }
10591
10592
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10593
            // "mb_" is available if overload is used, so use it ...
10594
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10595
        }
10596
10597
        return \substr($str, $offset, $length ?? 2147483647);
10598
    }
10599
10600
    /**
10601
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10602
     *
10603
     * @param string $haystack <p>The string to search in.</p>
10604
     * @param string $needle   <p>The substring to search for.</p>
10605
     *
10606
     * @return string return the sub-string
10607
     */
10608 2
    public static function substr_iright(string $haystack, string $needle): string
10609
    {
10610 2
        if ($haystack === '') {
10611 2
            return '';
10612
        }
10613
10614 2
        if ($needle === '') {
10615 2
            return $haystack;
10616
        }
10617
10618 2
        if (self::str_iends_with($haystack, $needle) === true) {
10619 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10620
        }
10621
10622 2
        return $haystack;
10623
    }
10624
10625
    /**
10626
     * Removes an prefix ($needle) from start of the string ($haystack).
10627
     *
10628
     * @param string $haystack <p>The string to search in.</p>
10629
     * @param string $needle   <p>The substring to search for.</p>
10630
     *
10631
     * @return string return the sub-string
10632
     */
10633 2
    public static function substr_left(string $haystack, string $needle): string
10634
    {
10635 2
        if ($haystack === '') {
10636 2
            return '';
10637
        }
10638
10639 2
        if ($needle === '') {
10640 2
            return $haystack;
10641
        }
10642
10643 2
        if (self::str_starts_with($haystack, $needle) === true) {
10644 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10645
        }
10646
10647 2
        return $haystack;
10648
    }
10649
10650
    /**
10651
     * Replace text within a portion of a string.
10652
     *
10653
     * source: https://gist.github.com/stemar/8287074
10654
     *
10655
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10656
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10657
     * @param int|int[]       $offset      <p>
10658
     *                                     If start is positive, the replacing will begin at the start'th offset
10659
     *                                     into string.
10660
     *                                     <br><br>
10661
     *                                     If start is negative, the replacing will begin at the start'th character
10662
     *                                     from the end of string.
10663
     *                                     </p>
10664
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10665
     *                                     portion of string which is to be replaced. If it is negative, it
10666
     *                                     represents the number of characters from the end of string at which to
10667
     *                                     stop replacing. If it is not given, then it will default to strlen(
10668
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10669
     *                                     length is zero then this function will have the effect of inserting
10670
     *                                     replacement into string at the given start offset.</p>
10671
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10672
     *
10673
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10674
     */
10675 10
    public static function substr_replace(
10676
        $str,
10677
        $replacement,
10678
        $offset,
10679
        $length = null,
10680
        string $encoding = 'UTF-8'
10681
    ) {
10682 10
        if (\is_array($str) === true) {
10683 1
            $num = \count($str);
10684
10685
            // the replacement
10686 1
            if (\is_array($replacement) === true) {
10687 1
                $replacement = \array_slice($replacement, 0, $num);
10688
            } else {
10689 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10690
            }
10691
10692
            // the offset
10693 1
            if (\is_array($offset) === true) {
10694 1
                $offset = \array_slice($offset, 0, $num);
10695 1
                foreach ($offset as &$valueTmp) {
10696 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10697
                }
10698 1
                unset($valueTmp);
10699
            } else {
10700 1
                $offset = \array_pad([$offset], $num, $offset);
10701
            }
10702
10703
            // the length
10704 1
            if ($length === null) {
10705 1
                $length = \array_fill(0, $num, 0);
10706 1
            } elseif (\is_array($length) === true) {
10707 1
                $length = \array_slice($length, 0, $num);
10708 1
                foreach ($length as &$valueTmpV2) {
10709 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10710
                }
10711 1
                unset($valueTmpV2);
10712
            } else {
10713 1
                $length = \array_pad([$length], $num, $length);
10714
            }
10715
10716
            // recursive call
10717 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10718
        }
10719
10720 10
        if (\is_array($replacement) === true) {
10721 1
            if (\count($replacement) > 0) {
10722 1
                $replacement = $replacement[0];
10723
            } else {
10724 1
                $replacement = '';
10725
            }
10726
        }
10727
10728
        // init
10729 10
        $str = (string) $str;
10730 10
        $replacement = (string) $replacement;
10731
10732 10
        if (\is_array($length) === true) {
10733
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10734
        }
10735
10736 10
        if (\is_array($offset) === true) {
10737
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10738
        }
10739
10740 10
        if ($str === '') {
10741 1
            return $replacement;
10742
        }
10743
10744 9
        if (self::$SUPPORT['mbstring'] === true) {
10745 9
            $string_length = (int) self::strlen($str, $encoding);
10746
10747 9
            if ($offset < 0) {
10748 1
                $offset = (int) \max(0, $string_length + $offset);
10749 9
            } elseif ($offset > $string_length) {
10750 1
                $offset = $string_length;
10751
            }
10752
10753 9
            if ($length !== null && $length < 0) {
10754 1
                $length = (int) \max(0, $string_length - $offset + $length);
10755 9
            } elseif ($length === null || $length > $string_length) {
10756 4
                $length = $string_length;
10757
            }
10758
10759
            /** @noinspection AdditionOperationOnArraysInspection */
10760 9
            if (($offset + $length) > $string_length) {
10761 4
                $length = $string_length - $offset;
10762
            }
10763
10764
            /** @noinspection AdditionOperationOnArraysInspection */
10765 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10766 9
                   $replacement .
10767 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10768
        }
10769
10770
        //
10771
        // fallback for ascii only
10772
        //
10773
10774
        if (self::is_ascii($str)) {
10775
            return ($length === null) ?
10776
                \substr_replace($str, $replacement, $offset) :
10777
                \substr_replace($str, $replacement, $offset, $length);
10778
        }
10779
10780
        //
10781
        // fallback via vanilla php
10782
        //
10783
10784
        \preg_match_all('/./us', $str, $smatches);
10785
        \preg_match_all('/./us', $replacement, $rmatches);
10786
10787
        if ($length === null) {
10788
            $lengthTmp = self::strlen($str, $encoding);
10789
            if ($lengthTmp === false) {
10790
                // e.g.: non mbstring support + invalid chars
10791
                return '';
10792
            }
10793
            $length = (int) $lengthTmp;
10794
        }
10795
10796
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10797
10798
        return \implode('', $smatches[0]);
10799
    }
10800
10801
    /**
10802
     * Removes an suffix ($needle) from end of the string ($haystack).
10803
     *
10804
     * @param string $haystack <p>The string to search in.</p>
10805
     * @param string $needle   <p>The substring to search for.</p>
10806
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10807
     *
10808
     * @return string return the sub-string
10809
     */
10810 2
    public static function substr_right(
10811
        string $haystack,
10812
        string $needle,
10813
        string $encoding = 'UTF-8'
10814
    ): string {
10815 2
        if ($haystack === '') {
10816 2
            return '';
10817
        }
10818
10819 2
        if ($needle === '') {
10820 2
            return $haystack;
10821
        }
10822
10823
        if (
10824 2
            $encoding === 'UTF-8'
10825
            &&
10826 2
            \substr($haystack, -\strlen($needle)) === $needle
10827
        ) {
10828 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10829
        }
10830
10831 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10832
            return (string) self::substr(
10833
                $haystack,
10834
                0,
10835
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10836
                $encoding
10837
            );
10838
        }
10839
10840 2
        return $haystack;
10841
    }
10842
10843
    /**
10844
     * Returns a case swapped version of the string.
10845
     *
10846
     * @param string $str       <p>The input string.</p>
10847
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10848
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10849
     *
10850
     * @return string each character's case swapped
10851
     */
10852 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10853
    {
10854 6
        if ($str === '') {
10855 1
            return '';
10856
        }
10857
10858 6
        if ($cleanUtf8 === true) {
10859
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10860
            // if invalid characters are found in $haystack before $needle
10861 2
            $str = self::clean($str);
10862
        }
10863
10864 6
        if ($encoding === 'UTF-8') {
10865 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10866
        }
10867
10868 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10869
    }
10870
10871
    /**
10872
     * Checks whether symfony-polyfills are used.
10873
     *
10874
     * @return bool
10875
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10876
     */
10877
    public static function symfony_polyfill_used(): bool
10878
    {
10879
        // init
10880
        $return = false;
10881
10882
        $returnTmp = \extension_loaded('mbstring');
10883
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10884
            $return = true;
10885
        }
10886
10887
        $returnTmp = \extension_loaded('iconv');
10888
        if ($returnTmp === false && \function_exists('iconv')) {
10889
            $return = true;
10890
        }
10891
10892
        return $return;
10893
    }
10894
10895
    /**
10896
     * @param string $str
10897
     * @param int    $tabLength
10898
     *
10899
     * @return string
10900
     */
10901 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10902
    {
10903 6
        if ($tabLength === 4) {
10904 3
            $spaces = '    ';
10905 3
        } elseif ($tabLength === 2) {
10906 1
            $spaces = '  ';
10907
        } else {
10908 2
            $spaces = \str_repeat(' ', $tabLength);
10909
        }
10910
10911 6
        return \str_replace("\t", $spaces, $str);
10912
    }
10913
10914
    /**
10915
     * Converts the first character of each word in the string to uppercase
10916
     * and all other chars to lowercase.
10917
     *
10918
     * @param string      $str                   <p>The input string.</p>
10919
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10920
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10921
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10922
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10923
     *
10924
     * @return string string with all characters of $str being title-cased
10925
     */
10926 5
    public static function titlecase(
10927
        string $str,
10928
        string $encoding = 'UTF-8',
10929
        bool $cleanUtf8 = false,
10930
        string $lang = null,
10931
        bool $tryToKeepStringLength = false
10932
    ): string {
10933 5
        if ($cleanUtf8 === true) {
10934
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10935
            // if invalid characters are found in $haystack before $needle
10936
            $str = self::clean($str);
10937
        }
10938
10939 5
        if ($lang === null && $tryToKeepStringLength === false) {
10940 5
            if ($encoding === 'UTF-8') {
10941 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10942
            }
10943
10944 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10945
10946 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10947
        }
10948
10949
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10950
    }
10951
10952
    /**
10953
     * alias for "UTF8::to_ascii()"
10954
     *
10955
     * @see        UTF8::to_ascii()
10956
     *
10957
     * @param string $str
10958
     * @param string $subst_chr
10959
     * @param bool   $strict
10960
     *
10961
     * @return string
10962
     *
10963
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10964
     */
10965 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10966
    {
10967 7
        return self::to_ascii($str, $subst_chr, $strict);
10968
    }
10969
10970
    /**
10971
     * alias for "UTF8::to_iso8859()"
10972
     *
10973
     * @see        UTF8::to_iso8859()
10974
     *
10975
     * @param string|string[] $str
10976
     *
10977
     * @return string|string[]
10978
     *
10979
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10980
     */
10981 2
    public static function toIso8859($str)
10982
    {
10983 2
        return self::to_iso8859($str);
10984
    }
10985
10986
    /**
10987
     * alias for "UTF8::to_latin1()"
10988
     *
10989
     * @see        UTF8::to_latin1()
10990
     *
10991
     * @param string|string[] $str
10992
     *
10993
     * @return string|string[]
10994
     *
10995
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10996
     */
10997 2
    public static function toLatin1($str)
10998
    {
10999 2
        return self::to_latin1($str);
11000
    }
11001
11002
    /**
11003
     * alias for "UTF8::to_utf8()"
11004
     *
11005
     * @see        UTF8::to_utf8()
11006
     *
11007
     * @param string|string[] $str
11008
     *
11009
     * @return string|string[]
11010
     *
11011
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11012
     */
11013 2
    public static function toUTF8($str)
11014
    {
11015 2
        return self::to_utf8($str);
11016
    }
11017
11018
    /**
11019
     * Convert a string into ASCII.
11020
     *
11021
     * @param string $str     <p>The input string.</p>
11022
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11023
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11024
     *                        performance</p>
11025
     *
11026
     * @return string
11027
     */
11028 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11029
    {
11030 38
        static $UTF8_TO_ASCII;
11031
11032 38
        if ($str === '') {
11033 3
            return '';
11034
        }
11035
11036
        // check if we only have ASCII, first (better performance)
11037 35
        if (self::is_ascii($str) === true) {
11038 9
            return $str;
11039
        }
11040
11041 28
        $str = self::clean(
11042 28
            $str,
11043 28
            true,
11044 28
            true,
11045 28
            true,
11046 28
            false,
11047 28
            true,
11048 28
            true
11049
        );
11050
11051
        // check again, if we only have ASCII, now ...
11052 28
        if (self::is_ascii($str) === true) {
11053 10
            return $str;
11054
        }
11055
11056
        if (
11057 19
            $strict === true
11058
            &&
11059 19
            self::$SUPPORT['intl'] === true
11060
        ) {
11061
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11062
            /** @noinspection PhpComposerExtensionStubsInspection */
11063
            /** @noinspection UnnecessaryCastingInspection */
11064 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11065
11066
            // check again, if we only have ASCII, now ...
11067 1
            if (self::is_ascii($str) === true) {
11068 1
                return $str;
11069
            }
11070
        }
11071
11072 19
        if (self::$ORD === null) {
11073
            self::$ORD = self::getData('ord');
11074
        }
11075
11076 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11077 19
        $chars = $ar[0];
11078 19
        $ord = null;
11079 19
        foreach ($chars as &$c) {
11080 19
            $ordC0 = self::$ORD[$c[0]];
11081
11082 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11083 15
                continue;
11084
            }
11085
11086 19
            $ordC1 = self::$ORD[$c[1]];
11087
11088
            // ASCII - next please
11089 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11090 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11091
            }
11092
11093 19
            if ($ordC0 >= 224) {
11094 8
                $ordC2 = self::$ORD[$c[2]];
11095
11096 8
                if ($ordC0 <= 239) {
11097 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11098
                }
11099
11100 8
                if ($ordC0 >= 240) {
11101 2
                    $ordC3 = self::$ORD[$c[3]];
11102
11103 2
                    if ($ordC0 <= 247) {
11104 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11105
                    }
11106
11107 2
                    if ($ordC0 >= 248) {
11108
                        $ordC4 = self::$ORD[$c[4]];
11109
11110
                        if ($ordC0 <= 251) {
11111
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11112
                        }
11113
11114
                        if ($ordC0 >= 252) {
11115
                            $ordC5 = self::$ORD[$c[5]];
11116
11117
                            if ($ordC0 <= 253) {
11118
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11119
                            }
11120
                        }
11121
                    }
11122
                }
11123
            }
11124
11125 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11126
                $c = $unknown;
11127
11128
                continue;
11129
            }
11130
11131 19
            if ($ord === null) {
11132
                $c = $unknown;
11133
11134
                continue;
11135
            }
11136
11137 19
            $bank = $ord >> 8;
11138 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11139 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11140 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11141 2
                    $UTF8_TO_ASCII[$bank] = [];
11142
                }
11143
            }
11144
11145 19
            $newchar = $ord & 255;
11146
11147
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11148 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11149
11150
                // keep for debugging
11151
                /*
11152
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11153
                echo "char: " . $c . "\n";
11154
                echo "ord: " . $ord . "\n";
11155
                echo "newchar: " . $newchar . "\n";
11156
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11157
                echo "bank:" . $bank . "\n\n";
11158
                 */
11159
11160 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11161
            } else {
11162
11163
                // keep for debugging missing chars
11164
                /*
11165
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11166
                echo "char: " . $c . "\n";
11167
                echo "ord: " . $ord . "\n";
11168
                echo "newchar: " . $newchar . "\n";
11169
                echo "bank:" . $bank . "\n\n";
11170
                 */
11171
11172 19
                $c = $unknown;
11173
            }
11174
        }
11175
11176 19
        return \implode('', $chars);
11177
    }
11178
11179
    /**
11180
     * @param mixed $str
11181
     *
11182
     * @return bool
11183
     */
11184 19
    public static function to_boolean($str): bool
11185
    {
11186
        // init
11187 19
        $str = (string) $str;
11188
11189 19
        if ($str === '') {
11190 2
            return false;
11191
        }
11192
11193
        // Info: http://php.net/manual/en/filter.filters.validate.php
11194
        $map = [
11195 17
            'true'  => true,
11196
            '1'     => true,
11197
            'on'    => true,
11198
            'yes'   => true,
11199
            'false' => false,
11200
            '0'     => false,
11201
            'off'   => false,
11202
            'no'    => false,
11203
        ];
11204
11205 17
        if (isset($map[$str])) {
11206 11
            return $map[$str];
11207
        }
11208
11209 6
        $key = \strtolower($str);
11210 6
        if (isset($map[$key])) {
11211 2
            return $map[$key];
11212
        }
11213
11214 4
        if (\is_numeric($str)) {
11215 2
            return ((float) $str + 0) > 0;
11216
        }
11217
11218 2
        return (bool) \trim($str);
11219
    }
11220
11221
    /**
11222
     * Convert given string to safe filename (and keep string case).
11223
     *
11224
     * @param string $string
11225
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11226
     *                                  simply replaced with hyphen.
11227
     * @param string $fallback_char
11228
     *
11229
     * @return string
11230
     */
11231 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11232
    {
11233 1
        if ($use_transliterate === true) {
11234 1
            $string = self::str_transliterate($string, $fallback_char);
11235
        }
11236
11237 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11238
11239 1
        $string = (string) \preg_replace(
11240
            [
11241 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11242 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11243 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11244
            ],
11245
            [
11246 1
                '',
11247 1
                $fallback_char,
11248 1
                $fallback_char,
11249
            ],
11250 1
            $string
11251
        );
11252
11253
        // trim "$fallback_char" from beginning and end of the string
11254 1
        return \trim($string, $fallback_char);
11255
    }
11256
11257
    /**
11258
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11259
     *
11260
     * @param string|string[] $str
11261
     *
11262
     * @return string|string[]
11263
     */
11264 8
    public static function to_iso8859($str)
11265
    {
11266 8
        if (\is_array($str) === true) {
11267 2
            foreach ($str as $k => &$v) {
11268 2
                $v = self::to_iso8859($v);
11269
            }
11270
11271 2
            return $str;
11272
        }
11273
11274 8
        $str = (string) $str;
11275 8
        if ($str === '') {
11276 2
            return '';
11277
        }
11278
11279 8
        return self::utf8_decode($str);
11280
    }
11281
11282
    /**
11283
     * alias for "UTF8::to_iso8859()"
11284
     *
11285
     * @see UTF8::to_iso8859()
11286
     *
11287
     * @param string|string[] $str
11288
     *
11289
     * @return string|string[]
11290
     */
11291 2
    public static function to_latin1($str)
11292
    {
11293 2
        return self::to_iso8859($str);
11294
    }
11295
11296
    /**
11297
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11298
     *
11299
     * <ul>
11300
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11301
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11302
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11303
     * case.</li>
11304
     * </ul>
11305
     *
11306
     * @param string|string[] $str                    <p>Any string or array.</p>
11307
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11308
     *
11309
     * @return string|string[] the UTF-8 encoded string
11310
     */
11311 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11312
    {
11313 37
        if (\is_array($str) === true) {
11314 4
            foreach ($str as $k => &$v) {
11315 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11316
            }
11317
11318 4
            return $str;
11319
        }
11320
11321 37
        $str = (string) $str;
11322 37
        if ($str === '') {
11323 6
            return $str;
11324
        }
11325
11326 37
        $max = \strlen($str);
11327 37
        $buf = '';
11328
11329 37
        for ($i = 0; $i < $max; ++$i) {
11330 37
            $c1 = $str[$i];
11331
11332 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11333
11334 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11335
11336 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11337
11338 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11339 16
                        $buf .= $c1 . $c2;
11340 16
                        ++$i;
11341
                    } else { // not valid UTF8 - convert it
11342 30
                        $buf .= self::to_utf8_convert_helper($c1);
11343
                    }
11344 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11345
11346 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11347 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11348
11349 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11350 14
                        $buf .= $c1 . $c2 . $c3;
11351 14
                        $i += 2;
11352
                    } else { // not valid UTF8 - convert it
11353 32
                        $buf .= self::to_utf8_convert_helper($c1);
11354
                    }
11355 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11356
11357 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11358 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11359 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11360
11361 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11362 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11363 8
                        $i += 3;
11364
                    } else { // not valid UTF8 - convert it
11365 25
                        $buf .= self::to_utf8_convert_helper($c1);
11366
                    }
11367
                } else { // doesn't look like UTF8, but should be converted
11368
11369 33
                    $buf .= self::to_utf8_convert_helper($c1);
11370
                }
11371 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11372
11373 3
                $buf .= self::to_utf8_convert_helper($c1);
11374
            } else { // it doesn't need conversion
11375
11376 34
                $buf .= $c1;
11377
            }
11378
        }
11379
11380
        // decode unicode escape sequences + unicode surrogate pairs
11381 37
        $buf = \preg_replace_callback(
11382 37
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11383
            /**
11384
             * @param array $matches
11385
             *
11386
             * @return string
11387
             */
11388
            static function (array $matches): string {
11389 9
                if (isset($matches[3])) {
11390 9
                    $cp = (int) \hexdec($matches[3]);
11391
                } else {
11392
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11393
                    $cp = ((int) \hexdec($matches[1]) << 10)
11394
                          + (int) \hexdec($matches[2])
11395
                          + 0x10000
11396
                          - (0xD800 << 10)
11397
                          - 0xDC00;
11398
                }
11399
11400
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11401
                //
11402
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11403
11404 9
                if ($cp < 0x80) {
11405 7
                    return (string) self::chr($cp);
11406
                }
11407
11408 6
                if ($cp < 0xA0) {
11409
                    /** @noinspection UnnecessaryCastingInspection */
11410
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11411
                }
11412
11413 6
                return self::decimal_to_chr($cp);
11414 37
            },
11415 37
            $buf
11416
        );
11417
11418 37
        if ($buf === null) {
11419
            return '';
11420
        }
11421
11422
        // decode UTF-8 codepoints
11423 37
        if ($decodeHtmlEntityToUtf8 === true) {
11424 2
            $buf = self::html_entity_decode($buf);
11425
        }
11426
11427 37
        return $buf;
11428
    }
11429
11430
    /**
11431
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11432
     *
11433
     * INFO: This is slower then "trim()"
11434
     *
11435
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11436
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11437
     *
11438
     * @param string      $str   <p>The string to be trimmed</p>
11439
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11440
     *
11441
     * @return string the trimmed string
11442
     */
11443 55
    public static function trim(string $str = '', string $chars = null): string
11444
    {
11445 55
        if ($str === '') {
11446 9
            return '';
11447
        }
11448
11449 48
        if ($chars) {
11450 27
            $chars = \preg_quote($chars, '/');
11451 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11452
        } else {
11453 21
            $pattern = "^[\s]+|[\s]+\$";
11454
        }
11455
11456 48
        if (self::$SUPPORT['mbstring'] === true) {
11457
            /** @noinspection PhpComposerExtensionStubsInspection */
11458 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11459
        }
11460
11461 8
        return self::regex_replace($str, $pattern, '', '', '/');
11462
    }
11463
11464
    /**
11465
     * Makes string's first char uppercase.
11466
     *
11467
     * @param string      $str                   <p>The input string.</p>
11468
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11469
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11470
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11471
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11472
     *
11473
     * @return string the resulting string
11474
     */
11475 69
    public static function ucfirst(
11476
        string $str,
11477
        string $encoding = 'UTF-8',
11478
        bool $cleanUtf8 = false,
11479
        string $lang = null,
11480
        bool $tryToKeepStringLength = false
11481
    ): string {
11482 69
        if ($str === '') {
11483 3
            return '';
11484
        }
11485
11486 68
        if ($cleanUtf8 === true) {
11487
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11488
            // if invalid characters are found in $haystack before $needle
11489 1
            $str = self::clean($str);
11490
        }
11491
11492 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11493
11494 68
        if ($encoding === 'UTF-8') {
11495 22
            $strPartTwo = (string) \mb_substr($str, 1);
11496
11497 22
            if ($useMbFunction === true) {
11498 22
                $strPartOne = \mb_strtoupper(
11499 22
                    (string) \mb_substr($str, 0, 1)
11500
                );
11501
            } else {
11502
                $strPartOne = self::strtoupper(
11503
                    (string) \mb_substr($str, 0, 1),
11504
                    $encoding,
11505
                    false,
11506
                    $lang,
11507 22
                    $tryToKeepStringLength
11508
                );
11509
            }
11510
        } else {
11511 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11512
11513 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11514
11515 47
            if ($useMbFunction === true) {
11516 47
                $strPartOne = \mb_strtoupper(
11517 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11518 47
                    $encoding
11519
                );
11520
            } else {
11521
                $strPartOne = self::strtoupper(
11522
                    (string) self::substr($str, 0, 1, $encoding),
11523
                    $encoding,
11524
                    false,
11525
                    $lang,
11526
                    $tryToKeepStringLength
11527
                );
11528
            }
11529
        }
11530
11531 68
        return $strPartOne . $strPartTwo;
11532
    }
11533
11534
    /**
11535
     * alias for "UTF8::ucfirst()"
11536
     *
11537
     * @see UTF8::ucfirst()
11538
     *
11539
     * @param string $str
11540
     * @param string $encoding
11541
     * @param bool   $cleanUtf8
11542
     *
11543
     * @return string
11544
     */
11545 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11546
    {
11547 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11548
    }
11549
11550
    /**
11551
     * Uppercase for all words in the string.
11552
     *
11553
     * @param string   $str        <p>The input string.</p>
11554
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11555
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11556
     *                             word.</p>
11557
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11558
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11559
     *
11560
     * @return string
11561
     */
11562 8
    public static function ucwords(
11563
        string $str,
11564
        array $exceptions = [],
11565
        string $charlist = '',
11566
        string $encoding = 'UTF-8',
11567
        bool $cleanUtf8 = false
11568
    ): string {
11569 8
        if (!$str) {
11570 2
            return '';
11571
        }
11572
11573
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11574
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11575
11576 7
        if ($cleanUtf8 === true) {
11577
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11578
            // if invalid characters are found in $haystack before $needle
11579 1
            $str = self::clean($str);
11580
        }
11581
11582 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11583
11584
        if (
11585 7
            $usePhpDefaultFunctions === true
11586
            &&
11587 7
            self::is_ascii($str) === true
11588
        ) {
11589
            return \ucwords($str);
11590
        }
11591
11592 7
        $words = self::str_to_words($str, $charlist);
11593 7
        $useExceptions = \count($exceptions) > 0;
11594
11595 7
        foreach ($words as &$word) {
11596 7
            if (!$word) {
11597 7
                continue;
11598
            }
11599
11600
            if (
11601 7
                $useExceptions === false
11602
                ||
11603 7
                !\in_array($word, $exceptions, true)
11604
            ) {
11605 7
                $word = self::ucfirst($word, $encoding);
11606
            }
11607
        }
11608
11609 7
        return \implode('', $words);
11610
    }
11611
11612
    /**
11613
     * Multi decode html entity & fix urlencoded-win1252-chars.
11614
     *
11615
     * e.g:
11616
     * 'test+test'                     => 'test test'
11617
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11618
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11619
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11620
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11621
     * 'Düsseldorf'                   => 'Düsseldorf'
11622
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11623
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11624
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11625
     *
11626
     * @param string $str          <p>The input string.</p>
11627
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11628
     *
11629
     * @return string
11630
     */
11631 2
    public static function urldecode(string $str, bool $multi_decode = true): string
11632
    {
11633 2
        if ($str === '') {
11634 2
            return '';
11635
        }
11636
11637
        if (
11638 2
            \strpos($str, '&') === false
11639
            &&
11640 2
            \strpos($str, '%') === false
11641
            &&
11642 2
            \strpos($str, '+') === false
11643
            &&
11644 2
            \strpos($str, '\u') === false
11645
        ) {
11646 2
            return self::fix_simple_utf8($str);
11647
        }
11648
11649 2
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11650 2
        if (\preg_match($pattern, $str)) {
11651 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11652
        }
11653
11654 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11655
11656 2
        if ($multi_decode === true) {
11657
            do {
11658 2
                $str_compare = $str;
11659
11660
                /**
11661
                 * @psalm-suppress PossiblyInvalidArgument
11662
                 */
11663 2
                $str = self::fix_simple_utf8(
11664 2
                    \urldecode(
11665 2
                        self::html_entity_decode(
11666 2
                            self::to_utf8($str),
11667 2
                            $flags
11668
                        )
11669
                    )
11670
                );
11671 2
            } while ($str_compare !== $str);
11672
        }
11673
11674 2
        return $str;
11675
    }
11676
11677
    /**
11678
     * Return a array with "urlencoded"-win1252 -> UTF-8
11679
     *
11680
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11681
     *
11682
     * @return string[]
11683
     */
11684 2
    public static function urldecode_fix_win1252_chars(): array
11685
    {
11686
        return [
11687 2
            '%20' => ' ',
11688
            '%21' => '!',
11689
            '%22' => '"',
11690
            '%23' => '#',
11691
            '%24' => '$',
11692
            '%25' => '%',
11693
            '%26' => '&',
11694
            '%27' => "'",
11695
            '%28' => '(',
11696
            '%29' => ')',
11697
            '%2A' => '*',
11698
            '%2B' => '+',
11699
            '%2C' => ',',
11700
            '%2D' => '-',
11701
            '%2E' => '.',
11702
            '%2F' => '/',
11703
            '%30' => '0',
11704
            '%31' => '1',
11705
            '%32' => '2',
11706
            '%33' => '3',
11707
            '%34' => '4',
11708
            '%35' => '5',
11709
            '%36' => '6',
11710
            '%37' => '7',
11711
            '%38' => '8',
11712
            '%39' => '9',
11713
            '%3A' => ':',
11714
            '%3B' => ';',
11715
            '%3C' => '<',
11716
            '%3D' => '=',
11717
            '%3E' => '>',
11718
            '%3F' => '?',
11719
            '%40' => '@',
11720
            '%41' => 'A',
11721
            '%42' => 'B',
11722
            '%43' => 'C',
11723
            '%44' => 'D',
11724
            '%45' => 'E',
11725
            '%46' => 'F',
11726
            '%47' => 'G',
11727
            '%48' => 'H',
11728
            '%49' => 'I',
11729
            '%4A' => 'J',
11730
            '%4B' => 'K',
11731
            '%4C' => 'L',
11732
            '%4D' => 'M',
11733
            '%4E' => 'N',
11734
            '%4F' => 'O',
11735
            '%50' => 'P',
11736
            '%51' => 'Q',
11737
            '%52' => 'R',
11738
            '%53' => 'S',
11739
            '%54' => 'T',
11740
            '%55' => 'U',
11741
            '%56' => 'V',
11742
            '%57' => 'W',
11743
            '%58' => 'X',
11744
            '%59' => 'Y',
11745
            '%5A' => 'Z',
11746
            '%5B' => '[',
11747
            '%5C' => '\\',
11748
            '%5D' => ']',
11749
            '%5E' => '^',
11750
            '%5F' => '_',
11751
            '%60' => '`',
11752
            '%61' => 'a',
11753
            '%62' => 'b',
11754
            '%63' => 'c',
11755
            '%64' => 'd',
11756
            '%65' => 'e',
11757
            '%66' => 'f',
11758
            '%67' => 'g',
11759
            '%68' => 'h',
11760
            '%69' => 'i',
11761
            '%6A' => 'j',
11762
            '%6B' => 'k',
11763
            '%6C' => 'l',
11764
            '%6D' => 'm',
11765
            '%6E' => 'n',
11766
            '%6F' => 'o',
11767
            '%70' => 'p',
11768
            '%71' => 'q',
11769
            '%72' => 'r',
11770
            '%73' => 's',
11771
            '%74' => 't',
11772
            '%75' => 'u',
11773
            '%76' => 'v',
11774
            '%77' => 'w',
11775
            '%78' => 'x',
11776
            '%79' => 'y',
11777
            '%7A' => 'z',
11778
            '%7B' => '{',
11779
            '%7C' => '|',
11780
            '%7D' => '}',
11781
            '%7E' => '~',
11782
            '%7F' => '',
11783
            '%80' => '`',
11784
            '%81' => '',
11785
            '%82' => '‚',
11786
            '%83' => 'ƒ',
11787
            '%84' => '„',
11788
            '%85' => '…',
11789
            '%86' => '†',
11790
            '%87' => '‡',
11791
            '%88' => 'ˆ',
11792
            '%89' => '‰',
11793
            '%8A' => 'Š',
11794
            '%8B' => '‹',
11795
            '%8C' => 'Œ',
11796
            '%8D' => '',
11797
            '%8E' => 'Ž',
11798
            '%8F' => '',
11799
            '%90' => '',
11800
            '%91' => '‘',
11801
            '%92' => '’',
11802
            '%93' => '“',
11803
            '%94' => '”',
11804
            '%95' => '•',
11805
            '%96' => '–',
11806
            '%97' => '—',
11807
            '%98' => '˜',
11808
            '%99' => '™',
11809
            '%9A' => 'š',
11810
            '%9B' => '›',
11811
            '%9C' => 'œ',
11812
            '%9D' => '',
11813
            '%9E' => 'ž',
11814
            '%9F' => 'Ÿ',
11815
            '%A0' => '',
11816
            '%A1' => '¡',
11817
            '%A2' => '¢',
11818
            '%A3' => '£',
11819
            '%A4' => '¤',
11820
            '%A5' => '¥',
11821
            '%A6' => '¦',
11822
            '%A7' => '§',
11823
            '%A8' => '¨',
11824
            '%A9' => '©',
11825
            '%AA' => 'ª',
11826
            '%AB' => '«',
11827
            '%AC' => '¬',
11828
            '%AD' => '',
11829
            '%AE' => '®',
11830
            '%AF' => '¯',
11831
            '%B0' => '°',
11832
            '%B1' => '±',
11833
            '%B2' => '²',
11834
            '%B3' => '³',
11835
            '%B4' => '´',
11836
            '%B5' => 'µ',
11837
            '%B6' => '¶',
11838
            '%B7' => '·',
11839
            '%B8' => '¸',
11840
            '%B9' => '¹',
11841
            '%BA' => 'º',
11842
            '%BB' => '»',
11843
            '%BC' => '¼',
11844
            '%BD' => '½',
11845
            '%BE' => '¾',
11846
            '%BF' => '¿',
11847
            '%C0' => 'À',
11848
            '%C1' => 'Á',
11849
            '%C2' => 'Â',
11850
            '%C3' => 'Ã',
11851
            '%C4' => 'Ä',
11852
            '%C5' => 'Å',
11853
            '%C6' => 'Æ',
11854
            '%C7' => 'Ç',
11855
            '%C8' => 'È',
11856
            '%C9' => 'É',
11857
            '%CA' => 'Ê',
11858
            '%CB' => 'Ë',
11859
            '%CC' => 'Ì',
11860
            '%CD' => 'Í',
11861
            '%CE' => 'Î',
11862
            '%CF' => 'Ï',
11863
            '%D0' => 'Ð',
11864
            '%D1' => 'Ñ',
11865
            '%D2' => 'Ò',
11866
            '%D3' => 'Ó',
11867
            '%D4' => 'Ô',
11868
            '%D5' => 'Õ',
11869
            '%D6' => 'Ö',
11870
            '%D7' => '×',
11871
            '%D8' => 'Ø',
11872
            '%D9' => 'Ù',
11873
            '%DA' => 'Ú',
11874
            '%DB' => 'Û',
11875
            '%DC' => 'Ü',
11876
            '%DD' => 'Ý',
11877
            '%DE' => 'Þ',
11878
            '%DF' => 'ß',
11879
            '%E0' => 'à',
11880
            '%E1' => 'á',
11881
            '%E2' => 'â',
11882
            '%E3' => 'ã',
11883
            '%E4' => 'ä',
11884
            '%E5' => 'å',
11885
            '%E6' => 'æ',
11886
            '%E7' => 'ç',
11887
            '%E8' => 'è',
11888
            '%E9' => 'é',
11889
            '%EA' => 'ê',
11890
            '%EB' => 'ë',
11891
            '%EC' => 'ì',
11892
            '%ED' => 'í',
11893
            '%EE' => 'î',
11894
            '%EF' => 'ï',
11895
            '%F0' => 'ð',
11896
            '%F1' => 'ñ',
11897
            '%F2' => 'ò',
11898
            '%F3' => 'ó',
11899
            '%F4' => 'ô',
11900
            '%F5' => 'õ',
11901
            '%F6' => 'ö',
11902
            '%F7' => '÷',
11903
            '%F8' => 'ø',
11904
            '%F9' => 'ù',
11905
            '%FA' => 'ú',
11906
            '%FB' => 'û',
11907
            '%FC' => 'ü',
11908
            '%FD' => 'ý',
11909
            '%FE' => 'þ',
11910
            '%FF' => 'ÿ',
11911
        ];
11912
    }
11913
11914
    /**
11915
     * Decodes an UTF-8 string to ISO-8859-1.
11916
     *
11917
     * @param string $str           <p>The input string.</p>
11918
     * @param bool   $keepUtf8Chars
11919
     *
11920
     * @return string
11921
     */
11922 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11923
    {
11924 14
        if ($str === '') {
11925 6
            return '';
11926
        }
11927
11928
        // save for later comparision
11929 14
        $str_backup = $str;
11930 14
        $len = \strlen($str);
11931
11932 14
        if (self::$ORD === null) {
11933
            self::$ORD = self::getData('ord');
11934
        }
11935
11936 14
        if (self::$CHR === null) {
11937
            self::$CHR = self::getData('chr');
11938
        }
11939
11940 14
        $noCharFound = '?';
11941
        /** @noinspection ForeachInvariantsInspection */
11942 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11943 14
            switch ($str[$i] & "\xF0") {
11944 14
                case "\xC0":
11945 13
                case "\xD0":
11946 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11947 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11948
11949 13
                    break;
11950
11951
                /** @noinspection PhpMissingBreakStatementInspection */
11952 13
                case "\xF0":
11953
                    ++$i;
11954
11955
                // no break
11956
11957 13
                case "\xE0":
11958 11
                    $str[$j] = $noCharFound;
11959 11
                    $i += 2;
11960
11961 11
                    break;
11962
11963
                default:
11964 12
                    $str[$j] = $str[$i];
11965
            }
11966
        }
11967
11968 14
        $return = \substr($str, 0, $j);
11969 14
        if ($return === false) {
11970
            $return = '';
11971
        }
11972
11973
        if (
11974 14
            $keepUtf8Chars === true
11975
            &&
11976 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11977
        ) {
11978 2
            return $str_backup;
11979
        }
11980
11981 14
        return $return;
11982
    }
11983
11984
    /**
11985
     * Encodes an ISO-8859-1 string to UTF-8.
11986
     *
11987
     * @param string $str <p>The input string.</p>
11988
     *
11989
     * @return string
11990
     */
11991 14
    public static function utf8_encode(string $str): string
11992
    {
11993 14
        if ($str === '') {
11994 14
            return '';
11995
        }
11996
11997 14
        $str = \utf8_encode($str);
11998
11999
        // the polyfill maybe return false
12000
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12001
        /** @psalm-suppress TypeDoesNotContainType */
12002 14
        if ($str === false) {
12003
            return '';
12004
        }
12005
12006 14
        if (\strpos($str, "\xC2") === false) {
12007 6
            return $str;
12008
        }
12009
12010 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
12011 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
12012
12013 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
12014 1
            if (self::$WIN1252_TO_UTF8 === null) {
12015
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12016
            }
12017
12018 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
12019 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
12020
        }
12021
12022 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
12023
    }
12024
12025
    /**
12026
     * fix -> utf8-win1252 chars
12027
     *
12028
     * @param string $str <p>The input string.</p>
12029
     *
12030
     * @return string
12031
     *
12032
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12033
     */
12034 2
    public static function utf8_fix_win1252_chars(string $str): string
12035
    {
12036 2
        return self::fix_simple_utf8($str);
12037
    }
12038
12039
    /**
12040
     * Returns an array with all utf8 whitespace characters.
12041
     *
12042
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12043
     *
12044
     * @author: Derek E. [email protected]
12045
     *
12046
     * @return string[]
12047
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12048
     *                  as defined in above URL
12049
     */
12050 2
    public static function whitespace_table(): array
12051
    {
12052 2
        return self::$WHITESPACE_TABLE;
12053
    }
12054
12055
    /**
12056
     * Limit the number of words in a string.
12057
     *
12058
     * @param string $str      <p>The input string.</p>
12059
     * @param int    $limit    <p>The limit of words as integer.</p>
12060
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12061
     *
12062
     * @return string
12063
     */
12064 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12065
    {
12066 2
        if ($str === '' || $limit < 1) {
12067 2
            return '';
12068
        }
12069
12070 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12071
12072
        if (
12073 2
            !isset($matches[0])
12074
            ||
12075 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12076
        ) {
12077 2
            return $str;
12078
        }
12079
12080 2
        return \rtrim($matches[0]) . $strAddOn;
12081
    }
12082
12083
    /**
12084
     * Wraps a string to a given number of characters
12085
     *
12086
     * @see  http://php.net/manual/en/function.wordwrap.php
12087
     *
12088
     * @param string $str   <p>The input string.</p>
12089
     * @param int    $width [optional] <p>The column width.</p>
12090
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12091
     * @param bool   $cut   [optional] <p>
12092
     *                      If the cut is set to true, the string is
12093
     *                      always wrapped at or before the specified width. So if you have
12094
     *                      a word that is larger than the given width, it is broken apart.
12095
     *                      </p>
12096
     *
12097
     * @return string
12098
     *                <p>The given string wrapped at the specified column.</p>
12099
     */
12100 10
    public static function wordwrap(
12101
        string $str,
12102
        int $width = 75,
12103
        string $break = "\n",
12104
        bool $cut = false
12105
    ): string {
12106 10
        if ($str === '' || $break === '') {
12107 3
            return '';
12108
        }
12109
12110 8
        $strSplit = \explode($break, $str);
12111 8
        if ($strSplit === false) {
12112
            return '';
12113
        }
12114
12115 8
        $chars = [];
12116 8
        $wordSplit = '';
12117 8
        foreach ($strSplit as $i => $iValue) {
12118 8
            if ($i) {
12119 1
                $chars[] = $break;
12120 1
                $wordSplit .= '#';
12121
            }
12122
12123 8
            foreach (self::str_split($iValue) as $c) {
12124 8
                $chars[] = $c;
12125 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12126
            }
12127
        }
12128
12129 8
        $strReturn = '';
12130 8
        $j = 0;
12131 8
        $b = $i = -1;
12132 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12133
12134 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12135 6
            for (++$i; $i < $b; ++$i) {
12136 6
                $strReturn .= $chars[$j];
12137 6
                unset($chars[$j++]);
12138
            }
12139
12140
            if (
12141 6
                $break === $chars[$j]
12142
                ||
12143 6
                $chars[$j] === ' '
12144
            ) {
12145 3
                unset($chars[$j++]);
12146
            }
12147
12148 6
            $strReturn .= $break;
12149
        }
12150
12151 8
        return $strReturn . \implode('', $chars);
12152
    }
12153
12154
    /**
12155
     * Line-Wrap the string after $limit, but also after the next word.
12156
     *
12157
     * @param string $str
12158
     * @param int    $limit
12159
     *
12160
     * @return string
12161
     */
12162 1
    public static function wordwrap_per_line(string $str, int $limit): string
12163
    {
12164 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12165
12166 1
        $string = '';
12167 1
        foreach ($strings as &$value) {
12168 1
            if ($value === false) {
12169
                continue;
12170
            }
12171
12172 1
            $string .= \wordwrap($value, $limit);
12173 1
            $string .= "\n";
12174
        }
12175
12176 1
        return $string;
12177
    }
12178
12179
    /**
12180
     * Returns an array of Unicode White Space characters.
12181
     *
12182
     * @return string[] an array with numeric code point as key and White Space Character as value
12183
     */
12184 2
    public static function ws(): array
12185
    {
12186 2
        return self::$WHITESPACE;
12187
    }
12188
12189 9
    private static function initEmojiData()
12190
    {
12191 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12192 1
            if (self::$EMOJI === null) {
12193 1
                self::$EMOJI = self::getData('emoji');
12194
            }
12195
12196 1
            \uksort(
12197
                self::$EMOJI,
12198
                static function (string $a, string $b): int {
12199 1
                    return \strlen($b) <=> \strlen($a);
12200 1
                }
12201
            );
12202
12203 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12204 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12205
12206 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12207 1
                $tmpKey = \crc32($key);
12208 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12209
            }
12210
        }
12211 9
    }
12212
12213
    /**
12214
     * @param string $str
12215
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12216
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12217
     *
12218
     * @return string
12219
     */
12220 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12221
    {
12222 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12223 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12224
12225 33
        if ($useLower === true) {
12226 2
            $str = \str_replace(
12227 2
                $upper,
12228 2
                $lower,
12229 2
                $str
12230
            );
12231
        } else {
12232 31
            $str = \str_replace(
12233 31
                $lower,
12234 31
                $upper,
12235 31
                $str
12236
            );
12237
        }
12238
12239 33
        if ($fullCaseFold) {
12240 31
            static $FULL_CASE_FOLD = null;
12241 31
            if ($FULL_CASE_FOLD === null) {
12242 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12243
            }
12244
12245 31
            if ($useLower === true) {
12246 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12247
            } else {
12248 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12249
            }
12250
        }
12251
12252 33
        return $str;
12253
    }
12254
12255
    /**
12256
     * get data from "/data/*.php"
12257
     *
12258
     * @param string $file
12259
     *
12260
     * @return mixed
12261
     */
12262 5
    private static function getData(string $file)
12263
    {
12264
        /** @noinspection PhpIncludeInspection */
12265
        /** @noinspection UsingInclusionReturnValueInspection */
12266
        /** @psalm-suppress UnresolvableInclude */
12267 5
        return include __DIR__ . '/data/' . $file . '.php';
12268
    }
12269
12270
    /**
12271
     * get data from "/data/*.php"
12272
     *
12273
     * @param string $file
12274
     *
12275
     * @return false|mixed will return false on error
12276
     */
12277 9
    private static function getDataIfExists(string $file)
12278
    {
12279 9
        $file = __DIR__ . '/data/' . $file . '.php';
12280 9
        if (\file_exists($file)) {
12281
            /** @noinspection PhpIncludeInspection */
12282
            /** @noinspection UsingInclusionReturnValueInspection */
12283 8
            return include $file;
12284
        }
12285
12286 2
        return false;
12287
    }
12288
12289
    /**
12290
     * Checks whether mbstring "overloaded" is active on the server.
12291
     *
12292
     * @return bool
12293
     */
12294
    private static function mbstring_overloaded(): bool
12295
    {
12296
        /**
12297
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12298
         */
12299
12300
        /** @noinspection PhpComposerExtensionStubsInspection */
12301
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12302
        return \defined('MB_OVERLOAD_STRING')
12303
               &&
12304
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12305
    }
12306
12307
    /**
12308
     * @param array $strings
12309
     * @param bool  $removeEmptyValues
12310
     * @param int   $removeShortValues
12311
     *
12312
     * @return array
12313
     */
12314 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12315
    {
12316
        // init
12317 2
        $return = [];
12318
12319 2
        foreach ($strings as &$str) {
12320
            if (
12321 2
                $removeShortValues !== null
12322
                &&
12323 2
                \mb_strlen($str) <= $removeShortValues
12324
            ) {
12325 2
                continue;
12326
            }
12327
12328
            if (
12329 2
                $removeEmptyValues === true
12330
                &&
12331 2
                \trim($str) === ''
12332
            ) {
12333 2
                continue;
12334
            }
12335
12336 2
            $return[] = $str;
12337
        }
12338
12339 2
        return $return;
12340
    }
12341
12342
    /**
12343
     * rxClass
12344
     *
12345
     * @param string $s
12346
     * @param string $class
12347
     *
12348
     * @return string
12349
     */
12350 33
    private static function rxClass(string $s, string $class = ''): string
12351
    {
12352 33
        static $RX_CLASS_CACHE = [];
12353
12354 33
        $cacheKey = $s . $class;
12355
12356 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12357 21
            return $RX_CLASS_CACHE[$cacheKey];
12358
        }
12359
12360 16
        $classArray = [$class];
12361
12362
        /** @noinspection SuspiciousLoopInspection */
12363
        /** @noinspection AlterInForeachInspection */
12364 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12365 15
            if ($s === '-') {
12366
                $classArray[0] = '-' . $classArray[0];
12367 15
            } elseif (!isset($s[2])) {
12368 15
                $classArray[0] .= \preg_quote($s, '/');
12369 1
            } elseif (self::strlen($s) === 1) {
12370 1
                $classArray[0] .= $s;
12371
            } else {
12372 15
                $classArray[] = $s;
12373
            }
12374
        }
12375
12376 16
        if ($classArray[0]) {
12377 16
            $classArray[0] = '[' . $classArray[0] . ']';
12378
        }
12379
12380 16
        if (\count($classArray) === 1) {
12381 16
            $return = $classArray[0];
12382
        } else {
12383
            $return = '(?:' . \implode('|', $classArray) . ')';
12384
        }
12385
12386 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12387
12388 16
        return $return;
12389
    }
12390
12391
    /**
12392
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12393
     *
12394
     * @param string $names
12395
     * @param string $delimiter
12396
     * @param string $encoding
12397
     *
12398
     * @return string
12399
     */
12400 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12401
    {
12402
        // init
12403 1
        $namesArray = \explode($delimiter, $names);
12404
12405 1
        if ($namesArray === false) {
12406
            return '';
12407
        }
12408
12409
        $specialCases = [
12410 1
            'names' => [
12411
                'ab',
12412
                'af',
12413
                'al',
12414
                'and',
12415
                'ap',
12416
                'bint',
12417
                'binte',
12418
                'da',
12419
                'de',
12420
                'del',
12421
                'den',
12422
                'der',
12423
                'di',
12424
                'dit',
12425
                'ibn',
12426
                'la',
12427
                'mac',
12428
                'nic',
12429
                'of',
12430
                'ter',
12431
                'the',
12432
                'und',
12433
                'van',
12434
                'von',
12435
                'y',
12436
                'zu',
12437
            ],
12438
            'prefixes' => [
12439
                'al-',
12440
                "d'",
12441
                'ff',
12442
                "l'",
12443
                'mac',
12444
                'mc',
12445
                'nic',
12446
            ],
12447
        ];
12448
12449 1
        foreach ($namesArray as &$name) {
12450 1
            if (\in_array($name, $specialCases['names'], true)) {
12451 1
                continue;
12452
            }
12453
12454 1
            $continue = false;
12455
12456 1
            if ($delimiter === '-') {
12457 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12458 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12459 1
                        $continue = true;
12460
                    }
12461
                }
12462 1
                unset($beginning);
12463
            }
12464
12465 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12466 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12467 1
                    $continue = true;
12468
                }
12469
            }
12470 1
            unset($beginning);
12471
12472 1
            if ($continue === true) {
12473 1
                continue;
12474
            }
12475
12476 1
            $name = self::ucfirst($name);
12477
        }
12478
12479 1
        return \implode($delimiter, $namesArray);
12480
    }
12481
12482
    /**
12483
     * Generic case sensitive transformation for collation matching.
12484
     *
12485
     * @param string $str <p>The input string</p>
12486
     *
12487
     * @return string|null
12488
     */
12489 6
    private static function strtonatfold(string $str)
12490
    {
12491 6
        return \preg_replace(
12492 6
            '/\p{Mn}+/u',
12493 6
            '',
12494 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12495
        );
12496
    }
12497
12498
    /**
12499
     * @param int|string $input
12500
     *
12501
     * @return string
12502
     */
12503 29
    private static function to_utf8_convert_helper($input): string
12504
    {
12505
        // init
12506 29
        $buf = '';
12507
12508 29
        if (self::$ORD === null) {
12509 1
            self::$ORD = self::getData('ord');
12510
        }
12511
12512 29
        if (self::$CHR === null) {
12513 1
            self::$CHR = self::getData('chr');
12514
        }
12515
12516 29
        if (self::$WIN1252_TO_UTF8 === null) {
12517 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12518
        }
12519
12520 29
        $ordC1 = self::$ORD[$input];
12521 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12522 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12523
        } else {
12524 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12525 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12526 1
            $buf .= $cc1 . $cc2;
12527
        }
12528
12529 29
        return $buf;
12530
    }
12531
}
12532