Passed
Push — master ( 510775...fad941 )
by Lars
03:41
created

UTF8::str_isubstr_after_last_separator()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 23
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 6.1384

Importance

Changes 0
Metric Value
cc 5
eloc 14
nc 4
nop 3
dl 0
loc 23
ccs 9
cts 14
cp 0.6429
crap 6.1384
rs 9.4888
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $EMOJI;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI_VALUES_CACHE;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_KEYS_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $CHR;
234
235
    /**
236
     * __construct()
237
     */
238 32
    public function __construct()
239
    {
240 32
    }
241
242
    /**
243
     * Return the character at the specified position: $str[1] like functionality.
244
     *
245
     * @param string $str      <p>A UTF-8 string.</p>
246
     * @param int    $pos      <p>The position of character to return.</p>
247
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
248
     *
249
     * @return string single multi-byte character
250
     */
251 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
252
    {
253 3
        if ($str === '' || $pos < 0) {
254 2
            return '';
255
        }
256
257 3
        if ($encoding === 'UTF-8') {
258 3
            return (string) \mb_substr($str, $pos, 1);
259
        }
260
261
        return (string) self::substr($str, $pos, 1, $encoding);
262
    }
263
264
    /**
265
     * Prepends UTF-8 BOM character to the string and returns the whole string.
266
     *
267
     * INFO: If BOM already existed there, the Input string is returned.
268
     *
269
     * @param string $str <p>The input string.</p>
270
     *
271
     * @return string the output string that contains BOM
272
     */
273 2
    public static function add_bom_to_string(string $str): string
274
    {
275 2
        if (self::string_has_bom($str) === false) {
276 2
            $str = self::bom() . $str;
277
        }
278
279 2
        return $str;
280
    }
281
282
    /**
283
     * Changes all keys in an array.
284
     *
285
     * @param array  $array    <p>The array to work on</p>
286
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
287
     *                         or <strong>CASE_LOWER</strong> (default)</p>
288
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
289
     *
290
     * @return string[] an array with its keys lower or uppercased
291
     */
292 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
293
    {
294
        if (
295 2
            $case !== \CASE_LOWER
296
            &&
297 2
            $case !== \CASE_UPPER
298
        ) {
299
            $case = \CASE_LOWER;
300
        }
301
302 2
        $return = [];
303 2
        foreach ($array as $key => &$value) {
304 2
            $key = $case === \CASE_LOWER
305 2
                ? self::strtolower((string) $key, $encoding)
306 2
                : self::strtoupper((string) $key, $encoding);
307
308 2
            $return[$key] = $value;
309
        }
310
311 2
        return $return;
312
    }
313
314
    /**
315
     * Returns the substring between $start and $end, if found, or an empty
316
     * string. An optional offset may be supplied from which to begin the
317
     * search for the start string.
318
     *
319
     * @param string $str
320
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
321
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
322
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
324
     *
325
     * @return string
326
     */
327 16
    public static function between(
328
        string $str,
329
        string $start,
330
        string $end,
331
        int $offset = 0,
332
        string $encoding = 'UTF-8'
333
    ): string {
334 16
        if ($encoding === 'UTF-8') {
335 8
            $posStart = \mb_strpos($str, $start, $offset);
336 8
            if ($posStart === false) {
337 1
                return '';
338
            }
339
340 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
341 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
342
            if (
343 7
                $posEnd === false
344
                ||
345 7
                $posEnd === $substrIndex
346
            ) {
347 2
                return '';
348
            }
349
350 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
351
        }
352
353 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
354
355 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
356 8
        if ($posStart === false) {
357 1
            return '';
358
        }
359
360 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
361 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
362
        if (
363 7
            $posEnd === false
364
            ||
365 7
            $posEnd === $substrIndex
366
        ) {
367 2
            return '';
368
        }
369
370 5
        return (string) self::substr(
371 5
            $str,
372 5
            $substrIndex,
373 5
            $posEnd - $substrIndex,
374 5
            $encoding
375
        );
376
    }
377
378
    /**
379
     * Convert binary into an string.
380
     *
381
     * @param mixed $bin 1|0
382
     *
383
     * @return string
384
     */
385 2
    public static function binary_to_str($bin): string
386
    {
387 2
        if (!isset($bin[0])) {
388
            return '';
389
        }
390
391 2
        $convert = \base_convert($bin, 2, 16);
392 2
        if ($convert === '0') {
393 1
            return '';
394
        }
395
396 2
        return \pack('H*', $convert);
397
    }
398
399
    /**
400
     * Returns the UTF-8 Byte Order Mark Character.
401
     *
402
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
     *
404
     * @return string UTF-8 Byte Order Mark
405
     */
406 4
    public static function bom(): string
407
    {
408 4
        return "\xef\xbb\xbf";
409
    }
410
411
    /**
412
     * @alias of UTF8::chr_map()
413
     *
414
     * @see   UTF8::chr_map()
415
     *
416
     * @param array|string $callback
417
     * @param string       $str
418
     *
419
     * @return string[]
420
     */
421 2
    public static function callback($callback, string $str): array
422
    {
423 2
        return self::chr_map($callback, $str);
424
    }
425
426
    /**
427
     * Returns the character at $index, with indexes starting at 0.
428
     *
429
     * @param string $str      <p>The input string.</p>
430
     * @param int    $index    <p>Position of the character.</p>
431
     * @param string $encoding [optional] <p>Default is UTF-8</p>
432
     *
433
     * @return string the character at $index
434
     */
435 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
    {
437 9
        if ($encoding === 'UTF-8') {
438 5
            return (string) \mb_substr($str, $index, 1);
439
        }
440
441 4
        return (string) self::substr($str, $index, 1, $encoding);
442
    }
443
444
    /**
445
     * Returns an array consisting of the characters in the string.
446
     *
447
     * @param string $str <p>The input string.</p>
448
     *
449
     * @return string[] an array of chars
450
     */
451 3
    public static function chars(string $str): array
452
    {
453 3
        return self::str_split($str);
454
    }
455
456
    /**
457
     * This method will auto-detect your server environment for UTF-8 support.
458
     *
459
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
460
     */
461 5
    public static function checkForSupport()
462
    {
463 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
464
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
465
466
            // http://php.net/manual/en/book.mbstring.php
467
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
468
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
469
            if (self::$SUPPORT['mbstring'] === true) {
470
                \mb_internal_encoding('UTF-8');
471
                /** @noinspection UnusedFunctionResultInspection */
472
                /** @noinspection PhpComposerExtensionStubsInspection */
473
                \mb_regex_encoding('UTF-8');
474
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
475
            }
476
477
            // http://php.net/manual/en/book.iconv.php
478
            self::$SUPPORT['iconv'] = self::iconv_loaded();
479
480
            // http://php.net/manual/en/book.intl.php
481
            self::$SUPPORT['intl'] = self::intl_loaded();
482
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
483
484
            if (
485
                self::$SUPPORT['intl'] === true
486
                &&
487
                \function_exists('transliterator_list_ids') === true
488
            ) {
489
                /** @noinspection PhpComposerExtensionStubsInspection */
490
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
491
            }
492
493
            // http://php.net/manual/en/class.intlchar.php
494
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
495
496
            // http://php.net/manual/en/book.ctype.php
497
            self::$SUPPORT['ctype'] = self::ctype_loaded();
498
499
            // http://php.net/manual/en/class.finfo.php
500
            self::$SUPPORT['finfo'] = self::finfo_loaded();
501
502
            // http://php.net/manual/en/book.json.php
503
            self::$SUPPORT['json'] = self::json_loaded();
504
505
            // http://php.net/manual/en/book.pcre.php
506
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
507
508
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
509
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
510
                \mb_internal_encoding('UTF-8');
511
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
512
            }
513
        }
514 5
    }
515
516
    /**
517
     * Generates a UTF-8 encoded character from the given code point.
518
     *
519
     * INFO: opposite to UTF8::ord()
520
     *
521
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
522
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
523
     *
524
     * @return string|null multi-byte character, returns null on failure or empty input
525
     */
526 24
    public static function chr($code_point, string $encoding = 'UTF-8')
527
    {
528
        // init
529 24
        static $CHAR_CACHE = [];
530
531 24
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
532 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
533
        }
534
535
        if (
536 24
            $encoding !== 'UTF-8'
537
            &&
538 24
            $encoding !== 'ISO-8859-1'
539
            &&
540 24
            $encoding !== 'WINDOWS-1252'
541
            &&
542 24
            self::$SUPPORT['mbstring'] === false
543
        ) {
544
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
545
        }
546
547 24
        $cacheKey = $code_point . $encoding;
548 24
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
549 22
            return $CHAR_CACHE[$cacheKey];
550
        }
551
552 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
553
554 12
            if (self::$CHR === null) {
555
                self::$CHR = (array) self::getData('chr');
556
            }
557
558
            /**
559
             * @psalm-suppress PossiblyNullArrayAccess
560
             */
561 12
            $chr = self::$CHR[$code_point];
562
563 12
            if ($encoding !== 'UTF-8') {
564 1
                $chr = self::encode($encoding, $chr);
565
            }
566
567 12
            return $CHAR_CACHE[$cacheKey] = $chr;
568
        }
569
570
        //
571
        // fallback via "IntlChar"
572
        //
573
574 7
        if (self::$SUPPORT['intlChar'] === true) {
575
            /** @noinspection PhpComposerExtensionStubsInspection */
576 7
            $chr = \IntlChar::chr($code_point);
577
578 7
            if ($encoding !== 'UTF-8') {
579
                $chr = self::encode($encoding, $chr);
580
            }
581
582 7
            return $CHAR_CACHE[$cacheKey] = $chr;
583
        }
584
585
        //
586
        // fallback via vanilla php
587
        //
588
589
        if (self::$CHR === null) {
590
            self::$CHR = (array) self::getData('chr');
591
        }
592
593
        $code_point = (int) $code_point;
594
        if ($code_point <= 0x7F) {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[$code_point];
599
        } elseif ($code_point <= 0x7FF) {
600
            /**
601
             * @psalm-suppress PossiblyNullArrayAccess
602
             */
603
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
604
                   self::$CHR[($code_point & 0x3F) + 0x80];
605
        } elseif ($code_point <= 0xFFFF) {
606
            /**
607
             * @psalm-suppress PossiblyNullArrayAccess
608
             */
609
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
610
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
611
                   self::$CHR[($code_point & 0x3F) + 0x80];
612
        } else {
613
            /**
614
             * @psalm-suppress PossiblyNullArrayAccess
615
             */
616
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
617
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
618
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
619
                   self::$CHR[($code_point & 0x3F) + 0x80];
620
        }
621
622
        if ($encoding !== 'UTF-8') {
623
            $chr = self::encode($encoding, $chr);
624
        }
625
626
        return $CHAR_CACHE[$cacheKey] = $chr;
627
    }
628
629
    /**
630
     * Applies callback to all characters of a string.
631
     *
632
     * @param array|string $callback <p>The callback function.</p>
633
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
634
     *
635
     * @return string[] the outcome of callback
636
     */
637 2
    public static function chr_map($callback, string $str): array
638
    {
639 2
        return \array_map(
640 2
            $callback,
641 2
            self::str_split($str)
642
        );
643
    }
644
645
    /**
646
     * Generates an array of byte length of each character of a Unicode string.
647
     *
648
     * 1 byte => U+0000  - U+007F
649
     * 2 byte => U+0080  - U+07FF
650
     * 3 byte => U+0800  - U+FFFF
651
     * 4 byte => U+10000 - U+10FFFF
652
     *
653
     * @param string $str <p>The original unicode string.</p>
654
     *
655
     * @return int[] an array of byte lengths of each character
656
     */
657 4
    public static function chr_size_list(string $str): array
658
    {
659 4
        if ($str === '') {
660 4
            return [];
661
        }
662
663 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
664
            return \array_map(
665
                static function (string $data): int {
666
                    // "mb_" is available if overload is used, so use it ...
667
                    return \mb_strlen($data, 'CP850'); // 8-BIT
668
                },
669
                self::str_split($str)
670
            );
671
        }
672
673 4
        return \array_map('\strlen', self::str_split($str));
674
    }
675
676
    /**
677
     * Get a decimal code representation of a specific character.
678
     *
679
     * @param string $char <p>The input character.</p>
680
     *
681
     * @return int
682
     */
683 4
    public static function chr_to_decimal(string $char): int
684
    {
685 4
        $code = self::ord($char[0]);
686 4
        $bytes = 1;
687
688 4
        if (!($code & 0x80)) {
689
            // 0xxxxxxx
690 4
            return $code;
691
        }
692
693 4
        if (($code & 0xe0) === 0xc0) {
694
            // 110xxxxx
695 4
            $bytes = 2;
696 4
            $code &= ~0xc0;
697 4
        } elseif (($code & 0xf0) === 0xe0) {
698
            // 1110xxxx
699 4
            $bytes = 3;
700 4
            $code &= ~0xe0;
701 2
        } elseif (($code & 0xf8) === 0xf0) {
702
            // 11110xxx
703 2
            $bytes = 4;
704 2
            $code &= ~0xf0;
705
        }
706
707 4
        for ($i = 2; $i <= $bytes; ++$i) {
708
            // 10xxxxxx
709 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
710
        }
711
712 4
        return $code;
713
    }
714
715
    /**
716
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
717
     *
718
     * @param int|string $char <p>The input character</p>
719
     * @param string     $pfix [optional]
720
     *
721
     * @return string The code point encoded as U+xxxx
722
     */
723 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
724
    {
725 2
        if ($char === '') {
726 2
            return '';
727
        }
728
729 2
        if ($char === '&#0;') {
730 2
            $char = '';
731
        }
732
733 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
734
    }
735
736
    /**
737
     * alias for "UTF8::chr_to_decimal()"
738
     *
739
     * @see UTF8::chr_to_decimal()
740
     *
741
     * @param string $chr
742
     *
743
     * @return int
744
     */
745 2
    public static function chr_to_int(string $chr): int
746
    {
747 2
        return self::chr_to_decimal($chr);
748
    }
749
750
    /**
751
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
752
     *
753
     * @param string $body     <p>The original string to be split.</p>
754
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
755
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
756
     *
757
     * @return string the chunked string
758
     */
759 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
760
    {
761 4
        return \implode($end, self::str_split($body, $chunklen));
762
    }
763
764
    /**
765
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
766
     *
767
     * @param string $str                           <p>The string to be sanitized.</p>
768
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
769
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
770
     *                                              whitespace.</p>
771
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
772
     *                                              e.g.: "…"
773
     *                                              => "..."</p>
774
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
775
     *                                              combination with
776
     *                                              $normalize_whitespace</p>
777
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
778
     *                                              mark e.g.: "�"</p>
779
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
780
     *                                              characters e.g.: "\0"</p>
781
     *
782
     * @return string clean UTF-8 encoded string
783
     */
784 113
    public static function clean(
785
        string $str,
786
        bool $remove_bom = false,
787
        bool $normalize_whitespace = false,
788
        bool $normalize_msword = false,
789
        bool $keep_non_breaking_space = false,
790
        bool $replace_diamond_question_mark = false,
791
        bool $remove_invisible_characters = true
792
    ): string {
793
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
794
        // caused connection reset problem on larger strings
795
796 113
        $regx = '/
797
          (
798
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
799
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
800
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
801
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
802
            ){1,100}                      # ...one or more times
803
          )
804
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
805
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
806
        /x';
807 113
        $str = (string) \preg_replace($regx, '$1', $str);
808
809 113
        if ($replace_diamond_question_mark === true) {
810 60
            $str = self::replace_diamond_question_mark($str, '');
811
        }
812
813 113
        if ($remove_invisible_characters === true) {
814 113
            $str = self::remove_invisible_characters($str);
815
        }
816
817 113
        if ($normalize_whitespace === true) {
818 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
819
        }
820
821 113
        if ($normalize_msword === true) {
822 32
            $str = self::normalize_msword($str);
823
        }
824
825 113
        if ($remove_bom === true) {
826 64
            $str = self::remove_bom($str);
827
        }
828
829 113
        return $str;
830
    }
831
832
    /**
833
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
834
     *
835
     * @param string $str <p>The input string.</p>
836
     *
837
     * @return string
838
     */
839 33
    public static function cleanup($str): string
840
    {
841
        // init
842 33
        $str = (string) $str;
843
844 33
        if ($str === '') {
845 5
            return '';
846
        }
847
848
        // fixed ISO <-> UTF-8 Errors
849 33
        $str = self::fix_simple_utf8($str);
850
851
        // remove all none UTF-8 symbols
852
        // && remove diamond question mark (�)
853
        // && remove remove invisible characters (e.g. "\0")
854
        // && remove BOM
855
        // && normalize whitespace chars (but keep non-breaking-spaces)
856 33
        return self::clean(
857 33
            $str,
858 33
            true,
859 33
            true,
860 33
            false,
861 33
            true,
862 33
            true,
863 33
            true
864
        );
865
    }
866
867
    /**
868
     * Accepts a string or a array of strings and returns an array of Unicode code points.
869
     *
870
     * INFO: opposite to UTF8::string()
871
     *
872
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
873
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
874
     *                                 default, code points will be returned as integers.</p>
875
     *
876
     * @return array<int|string>
877
     *                           The array of code points:<br>
878
     *                           array<int> for $u_style === false<br>
879
     *                           array<string> for $u_style === true<br>
880
     */
881 12
    public static function codepoints($arg, bool $u_style = false): array
882
    {
883 12
        if (\is_string($arg) === true) {
884 12
            $arg = self::str_split($arg);
885
        }
886
887 12
        $arg = \array_map(
888
            [
889 12
                self::class,
890
                'ord',
891
            ],
892 12
            $arg
893
        );
894
895 12
        if (\count($arg) === 0) {
896 7
            return [];
897
        }
898
899 11
        if ($u_style === true) {
900 2
            $arg = \array_map(
901
                [
902 2
                    self::class,
903
                    'int_to_hex',
904
                ],
905 2
                $arg
906
            );
907
        }
908
909 11
        return $arg;
910
    }
911
912
    /**
913
     * Trims the string and replaces consecutive whitespace characters with a
914
     * single space. This includes tabs and newline characters, as well as
915
     * multibyte whitespace such as the thin space and ideographic space.
916
     *
917
     * @param string $str <p>The input string.</p>
918
     *
919
     * @return string string with a trimmed $str and condensed whitespace
920
     */
921 13
    public static function collapse_whitespace(string $str): string
922
    {
923 13
        if (self::$SUPPORT['mbstring'] === true) {
924
            /** @noinspection PhpComposerExtensionStubsInspection */
925 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
926
        }
927
928
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
929
    }
930
931
    /**
932
     * Returns count of characters used in a string.
933
     *
934
     * @param string $str                <p>The input string.</p>
935
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
936
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
937
     *
938
     * @return int[] an associative array of Character as keys and
939
     *               their count as values
940
     */
941 19
    public static function count_chars(
942
        string $str,
943
        bool $cleanUtf8 = false,
944
        bool $tryToUseMbFunction = true
945
    ): array {
946 19
        return \array_count_values(
947 19
            self::str_split(
948 19
                $str,
949 19
                1,
950 19
                $cleanUtf8,
951 19
                $tryToUseMbFunction
952
            )
953
        );
954
    }
955
956
    /**
957
     * Remove css media-queries.
958
     *
959
     * @param string $str
960
     *
961
     * @return string
962
     */
963 1
    public static function css_stripe_media_queries(string $str): string
964
    {
965 1
        return (string) \preg_replace(
966 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
967 1
            '',
968 1
            $str
969
        );
970
    }
971
972
    /**
973
     * Checks whether ctype is available on the server.
974
     *
975
     * @return bool
976
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
977
     */
978
    public static function ctype_loaded(): bool
979
    {
980
        return \extension_loaded('ctype');
981
    }
982
983
    /**
984
     * Converts a int-value into an UTF-8 character.
985
     *
986
     * @param mixed $int
987
     *
988
     * @return string
989
     */
990 16
    public static function decimal_to_chr($int): string
991
    {
992 16
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
993
    }
994
995
    /**
996
     * Decodes a MIME header field
997
     *
998
     * @param string $str
999
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1000
     *
1001
     * @return false|string
1002
     *                      A decoded MIME field on success,
1003
     *                      or false if an error occurs during the decoding
1004
     */
1005
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1006
    {
1007
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1008
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1009
        }
1010
1011
        if (self::$SUPPORT['iconv'] === true) {
1012
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1013
        }
1014
1015
        if ($encoding !== 'UTF-8') {
1016
            $str = self::encode($encoding, $str);
1017
        }
1018
1019
        return \mb_decode_mimeheader($str);
1020
    }
1021
1022
    /**
1023
     * Encode a string with a new charset-encoding.
1024
     *
1025
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1026
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1027
     *
1028
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1029
     * @param string $str                    <p>The input string</p>
1030
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1031
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1032
     *                                       string-encoding</p>
1033
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1034
     *                                       A empty string will trigger the autodetect anyway.</p>
1035
     *
1036
     * @return string
1037
     *
1038
     * @psalm-suppress InvalidReturnStatement
1039
     */
1040 28
    public static function encode(
1041
        string $toEncoding,
1042
        string $str,
1043
        bool $autodetectFromEncoding = true,
1044
        string $fromEncoding = ''
1045
    ): string {
1046 28
        if ($str === '' || $toEncoding === '') {
1047 13
            return $str;
1048
        }
1049
1050 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1051 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1052
        }
1053
1054 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1055 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1056
        }
1057
1058
        if (
1059 28
            $toEncoding
1060
            &&
1061 28
            $fromEncoding
1062
            &&
1063 28
            $fromEncoding === $toEncoding
1064
        ) {
1065
            return $str;
1066
        }
1067
1068 28
        if ($toEncoding === 'JSON') {
1069 1
            $return = self::json_encode($str);
1070 1
            if ($return === false) {
1071
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1072
            }
1073
1074 1
            return $return;
1075
        }
1076 28
        if ($fromEncoding === 'JSON') {
1077 1
            $str = self::json_decode($str);
1078 1
            $fromEncoding = '';
1079
        }
1080
1081 28
        if ($toEncoding === 'BASE64') {
1082 2
            return \base64_encode($str);
1083
        }
1084 28
        if ($fromEncoding === 'BASE64') {
1085 2
            $str = \base64_decode($str, true);
1086 2
            $fromEncoding = '';
1087
        }
1088
1089 28
        if ($toEncoding === 'HTML-ENTITIES') {
1090 2
            return self::html_encode($str, true, 'UTF-8');
1091
        }
1092 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1093 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1094 2
            $fromEncoding = '';
1095
        }
1096
1097 28
        $fromEncodingDetected = false;
1098
        if (
1099 28
            $autodetectFromEncoding === true
1100
            ||
1101 28
            !$fromEncoding
1102
        ) {
1103 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1104
        }
1105
1106
        // DEBUG
1107
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1108
1109 28
        if ($fromEncodingDetected !== false) {
1110 24
            $fromEncoding = $fromEncodingDetected;
1111 7
        } elseif ($autodetectFromEncoding === true) {
1112
            // fallback for the "autodetect"-mode
1113 7
            return self::to_utf8($str);
1114
        }
1115
1116
        if (
1117 24
            !$fromEncoding
1118
            ||
1119 24
            $fromEncoding === $toEncoding
1120
        ) {
1121 15
            return $str;
1122
        }
1123
1124
        if (
1125 18
            $toEncoding === 'UTF-8'
1126
            &&
1127
            (
1128 16
                $fromEncoding === 'WINDOWS-1252'
1129
                ||
1130 18
                $fromEncoding === 'ISO-8859-1'
1131
            )
1132
        ) {
1133 13
            return self::to_utf8($str);
1134
        }
1135
1136
        if (
1137 11
            $toEncoding === 'ISO-8859-1'
1138
            &&
1139
            (
1140 6
                $fromEncoding === 'WINDOWS-1252'
1141
                ||
1142 11
                $fromEncoding === 'UTF-8'
1143
            )
1144
        ) {
1145 6
            return self::to_iso8859($str);
1146
        }
1147
1148
        if (
1149 9
            $toEncoding !== 'UTF-8'
1150
            &&
1151 9
            $toEncoding !== 'ISO-8859-1'
1152
            &&
1153 9
            $toEncoding !== 'WINDOWS-1252'
1154
            &&
1155 9
            self::$SUPPORT['mbstring'] === false
1156
        ) {
1157
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1158
        }
1159
1160 9
        if (self::$SUPPORT['mbstring'] === true) {
1161
            // warning: do not use the symfony polyfill here
1162 9
            $strEncoded = \mb_convert_encoding(
1163 9
                $str,
1164 9
                $toEncoding,
1165 9
                $fromEncoding
1166
            );
1167
1168 9
            if ($strEncoded) {
1169 9
                return $strEncoded;
1170
            }
1171
        }
1172
1173
        $return = \iconv($fromEncoding, $toEncoding, $str);
1174
        if ($return !== false) {
1175
            return $return;
1176
        }
1177
1178
        return $str;
1179
    }
1180
1181
    /**
1182
     * @param string $str
1183
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1184
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1185
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1186
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1187
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1188
     *
1189
     * @return false|string
1190
     *                      An encoded MIME field on success,
1191
     *                      or false if an error occurs during the encoding
1192
     */
1193
    public static function encode_mimeheader(
1194
        $str,
1195
        $fromCharset = 'UTF-8',
1196
        $toCharset = 'UTF-8',
1197
        $transferEncoding = 'Q',
1198
        $linefeed = "\r\n",
1199
        $indent = 76
1200
    ) {
1201
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1202
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1203
        }
1204
1205
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1206
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1207
        }
1208
1209
        return \iconv_mime_encode(
1210
            '',
1211
            $str,
1212
            [
1213
                'scheme'           => $transferEncoding,
1214
                'line-length'      => $indent,
1215
                'input-charset'    => $fromCharset,
1216
                'output-charset'   => $toCharset,
1217
                'line-break-chars' => $linefeed,
1218
            ]
1219
        );
1220
    }
1221
1222
    /**
1223
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1224
     *
1225
     * @param string   $str                    <p>The input string.</p>
1226
     * @param string   $search                 <p>The searched string.</p>
1227
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1228
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1229
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1230
     *
1231
     * @return string
1232
     */
1233 1
    public static function extract_text(
1234
        string $str,
1235
        string $search = '',
1236
        int $length = null,
1237
        string $replacerForSkippedText = '…',
1238
        string $encoding = 'UTF-8'
1239
    ): string {
1240 1
        if ($str === '') {
1241 1
            return '';
1242
        }
1243
1244 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1245
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1246
        }
1247
1248 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1249
1250 1
        if ($length === null) {
1251 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1252
        }
1253
1254 1
        if ($search === '') {
1255 1
            if ($encoding === 'UTF-8') {
1256 1
                if ($length > 0) {
1257 1
                    $stringLength = (int) \mb_strlen($str);
1258 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1259
                } else {
1260 1
                    $end = 0;
1261
                }
1262
1263 1
                $pos = (int) \min(
1264 1
                    \mb_strpos($str, ' ', $end),
1265 1
                    \mb_strpos($str, '.', $end)
1266
                );
1267
            } else {
1268
                if ($length > 0) {
1269
                    $stringLength = (int) self::strlen($str, $encoding);
1270
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1271
                } else {
1272
                    $end = 0;
1273
                }
1274
1275
                $pos = (int) \min(
1276
                    self::strpos($str, ' ', $end, $encoding),
1277
                    self::strpos($str, '.', $end, $encoding)
1278
                );
1279
            }
1280
1281 1
            if ($pos) {
1282 1
                if ($encoding === 'UTF-8') {
1283 1
                    $strSub = \mb_substr($str, 0, $pos);
1284
                } else {
1285
                    $strSub = self::substr($str, 0, $pos, $encoding);
1286
                }
1287
1288 1
                if ($strSub === false) {
1289
                    return '';
1290
                }
1291
1292 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1293
            }
1294
1295
            return $str;
1296
        }
1297
1298 1
        if ($encoding === 'UTF-8') {
1299 1
            $wordPos = (int) \mb_stripos($str, $search);
1300 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1301
        } else {
1302
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1303
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1304
        }
1305
1306 1
        $pos_start = 0;
1307 1
        if ($halfSide > 0) {
1308 1
            if ($encoding === 'UTF-8') {
1309 1
                $halfText = \mb_substr($str, 0, $halfSide);
1310
            } else {
1311
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1312
            }
1313 1
            if ($halfText !== false) {
1314 1
                if ($encoding === 'UTF-8') {
1315 1
                    $pos_start = (int) \max(
1316 1
                        \mb_strrpos($halfText, ' '),
1317 1
                        \mb_strrpos($halfText, '.')
1318
                    );
1319
                } else {
1320
                    $pos_start = (int) \max(
1321
                        self::strrpos($halfText, ' ', 0, $encoding),
1322
                        self::strrpos($halfText, '.', 0, $encoding)
1323
                    );
1324
                }
1325
            }
1326
        }
1327
1328 1
        if ($wordPos && $halfSide > 0) {
1329 1
            $offset = $pos_start + $length - 1;
1330 1
            $realLength = (int) self::strlen($str, $encoding);
1331
1332 1
            if ($offset > $realLength) {
1333
                $offset = $realLength;
1334
            }
1335
1336 1
            if ($encoding === 'UTF-8') {
1337 1
                $pos_end = (int) \min(
1338 1
                    \mb_strpos($str, ' ', $offset),
1339 1
                    \mb_strpos($str, '.', $offset)
1340 1
                ) - $pos_start;
1341
            } else {
1342
                $pos_end = (int) \min(
1343
                    self::strpos($str, ' ', $offset, $encoding),
1344
                    self::strpos($str, '.', $offset, $encoding)
1345
                ) - $pos_start;
1346
            }
1347
1348 1
            if (!$pos_end || $pos_end <= 0) {
1349 1
                if ($encoding === 'UTF-8') {
1350 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1351
                } else {
1352
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1353
                }
1354 1
                if ($strSub !== false) {
1355 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1356
                } else {
1357 1
                    $extract = '';
1358
                }
1359
            } else {
1360 1
                if ($encoding === 'UTF-8') {
1361 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1362
                } else {
1363
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1364
                }
1365 1
                if ($strSub !== false) {
1366 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1367
                } else {
1368 1
                    $extract = '';
1369
                }
1370
            }
1371
        } else {
1372 1
            $offset = $length - 1;
1373 1
            $trueLength = (int) self::strlen($str, $encoding);
1374
1375 1
            if ($offset > $trueLength) {
1376
                $offset = $trueLength;
1377
            }
1378
1379 1
            if ($encoding === 'UTF-8') {
1380 1
                $pos_end = (int) \min(
1381 1
                    \mb_strpos($str, ' ', $offset),
1382 1
                    \mb_strpos($str, '.', $offset)
1383
                );
1384
            } else {
1385
                $pos_end = (int) \min(
1386
                    self::strpos($str, ' ', $offset, $encoding),
1387
                    self::strpos($str, '.', $offset, $encoding)
1388
                );
1389
            }
1390
1391 1
            if ($pos_end) {
1392 1
                if ($encoding === 'UTF-8') {
1393 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1394
                } else {
1395
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1396
                }
1397 1
                if ($strSub !== false) {
1398 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1399
                } else {
1400 1
                    $extract = '';
1401
                }
1402
            } else {
1403 1
                $extract = $str;
1404
            }
1405
        }
1406
1407 1
        return $extract;
1408
    }
1409
1410
    /**
1411
     * Reads entire file into a string.
1412
     *
1413
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1414
     *
1415
     * @see http://php.net/manual/en/function.file-get-contents.php
1416
     *
1417
     * @param string        $filename         <p>
1418
     *                                        Name of the file to read.
1419
     *                                        </p>
1420
     * @param bool          $use_include_path [optional] <p>
1421
     *                                        Prior to PHP 5, this parameter is called
1422
     *                                        use_include_path and is a bool.
1423
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1424
     *                                        to trigger include path
1425
     *                                        search.
1426
     *                                        </p>
1427
     * @param resource|null $context          [optional] <p>
1428
     *                                        A valid context resource created with
1429
     *                                        stream_context_create. If you don't need to use a
1430
     *                                        custom context, you can skip this parameter by &null;.
1431
     *                                        </p>
1432
     * @param int|null      $offset           [optional] <p>
1433
     *                                        The offset where the reading starts.
1434
     *                                        </p>
1435
     * @param int|null      $maxLength        [optional] <p>
1436
     *                                        Maximum length of data read. The default is to read until end
1437
     *                                        of file is reached.
1438
     *                                        </p>
1439
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1440
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1441
     *                                        some files, because they used non default utf-8 chars. Binary files
1442
     *                                        like images or pdf will not be converted.</p>
1443
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1444
     *                                        A empty string will trigger the autodetect anyway.</p>
1445
     *
1446
     * @return false|string the function returns the read data or false on failure
1447
     */
1448 12
    public static function file_get_contents(
1449
        string $filename,
1450
        bool $use_include_path = false,
1451
        $context = null,
1452
        int $offset = null,
1453
        int $maxLength = null,
1454
        int $timeout = 10,
1455
        bool $convertToUtf8 = true,
1456
        string $fromEncoding = ''
1457
    ) {
1458
        // init
1459 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1460
1461 12
        if ($timeout && $context === null) {
1462 9
            $context = \stream_context_create(
1463
                [
1464
                    'http' => [
1465 9
                        'timeout' => $timeout,
1466
                    ],
1467
                ]
1468
            );
1469
        }
1470
1471 12
        if ($offset === null) {
1472 12
            $offset = 0;
1473
        }
1474
1475 12
        if (\is_int($maxLength) === true) {
1476 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1477
        } else {
1478 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1479
        }
1480
1481
        // return false on error
1482 12
        if ($data === false) {
1483
            return false;
1484
        }
1485
1486 12
        if ($convertToUtf8 === true) {
1487
            if (
1488 12
                self::is_binary($data, true) === true
1489
                &&
1490 12
                self::is_utf16($data, false) === false
1491
                &&
1492 12
                self::is_utf32($data, false) === false
1493 7
            ) {
1494
                // do nothing, it's binary and not UTF16 or UTF32
1495
            } else {
1496 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1497 9
                $data = self::cleanup($data);
1498
            }
1499
        }
1500
1501 12
        return $data;
1502
    }
1503
1504
    /**
1505
     * Checks if a file starts with BOM (Byte Order Mark) character.
1506
     *
1507
     * @param string $file_path <p>Path to a valid file.</p>
1508
     *
1509
     * @throws \RuntimeException if file_get_contents() returned false
1510
     *
1511
     * @return bool
1512
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1513
     */
1514 2
    public static function file_has_bom(string $file_path): bool
1515
    {
1516 2
        $file_content = \file_get_contents($file_path);
1517 2
        if ($file_content === false) {
1518
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1519
        }
1520
1521 2
        return self::string_has_bom($file_content);
1522
    }
1523
1524
    /**
1525
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1526
     *
1527
     * @param mixed  $var
1528
     * @param int    $normalization_form
1529
     * @param string $leading_combining
1530
     *
1531
     * @return mixed
1532
     */
1533 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1534
    {
1535 62
        switch (\gettype($var)) {
1536 62
            case 'array':
1537 6
                foreach ($var as $k => &$v) {
1538 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1539
                }
1540 6
                unset($v);
1541
1542 6
                break;
1543 62
            case 'object':
1544 4
                foreach ($var as $k => &$v) {
1545 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1546
                }
1547 4
                unset($v);
1548
1549 4
                break;
1550 62
            case 'string':
1551
1552 62
                if (\strpos($var, "\r") !== false) {
1553
                    // Workaround https://bugs.php.net/65732
1554 3
                    $var = self::normalize_line_ending($var);
1555
                }
1556
1557 62
                if (self::is_ascii($var) === false) {
1558 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1559 27
                        $n = '-';
1560
                    } else {
1561 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1562
1563 12
                        if (isset($n[0])) {
1564 7
                            $var = $n;
1565
                        } else {
1566 8
                            $var = self::encode('UTF-8', $var, true);
1567
                        }
1568
                    }
1569
1570
                    if (
1571 32
                        $var[0] >= "\x80"
1572
                        &&
1573 32
                        isset($n[0], $leading_combining[0])
1574
                        &&
1575 32
                        \preg_match('/^\p{Mn}/u', $var)
1576
                    ) {
1577
                        // Prevent leading combining chars
1578
                        // for NFC-safe concatenations.
1579 3
                        $var = $leading_combining . $var;
1580
                    }
1581
                }
1582
1583 62
                break;
1584
        }
1585
1586 62
        return $var;
1587
    }
1588
1589
    /**
1590
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1591
     *
1592
     * Gets a specific external variable by name and optionally filters it
1593
     *
1594
     * @see  http://php.net/manual/en/function.filter-input.php
1595
     *
1596
     * @param int    $type          <p>
1597
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1598
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1599
     *                              <b>INPUT_ENV</b>.
1600
     *                              </p>
1601
     * @param string $variable_name <p>
1602
     *                              Name of a variable to get.
1603
     *                              </p>
1604
     * @param int    $filter        [optional] <p>
1605
     *                              The ID of the filter to apply. The
1606
     *                              manual page lists the available filters.
1607
     *                              </p>
1608
     * @param mixed  $options       [optional] <p>
1609
     *                              Associative array of options or bitwise disjunction of flags. If filter
1610
     *                              accepts options, flags can be provided in "flags" field of array.
1611
     *                              </p>
1612
     *
1613
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1614
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1615
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1616
     */
1617
    public static function filter_input(
1618
        int $type,
1619
        string $variable_name,
1620
        int $filter = \FILTER_DEFAULT,
1621
        $options = null
1622
    ) {
1623
        if (\func_num_args() < 4) {
1624
            $var = \filter_input($type, $variable_name, $filter);
1625
        } else {
1626
            $var = \filter_input($type, $variable_name, $filter, $options);
1627
        }
1628
1629
        return self::filter($var);
1630
    }
1631
1632
    /**
1633
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1634
     *
1635
     * Gets external variables and optionally filters them
1636
     *
1637
     * @see  http://php.net/manual/en/function.filter-input-array.php
1638
     *
1639
     * @param int   $type       <p>
1640
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1641
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1642
     *                          <b>INPUT_ENV</b>.
1643
     *                          </p>
1644
     * @param mixed $definition [optional] <p>
1645
     *                          An array defining the arguments. A valid key is a string
1646
     *                          containing a variable name and a valid value is either a filter type, or an array
1647
     *                          optionally specifying the filter, flags and options. If the value is an
1648
     *                          array, valid keys are filter which specifies the
1649
     *                          filter type,
1650
     *                          flags which specifies any flags that apply to the
1651
     *                          filter, and options which specifies any options that
1652
     *                          apply to the filter. See the example below for a better understanding.
1653
     *                          </p>
1654
     *                          <p>
1655
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1656
     *                          input array are filtered by this filter.
1657
     *                          </p>
1658
     * @param bool  $add_empty  [optional] <p>
1659
     *                          Add missing keys as <b>NULL</b> to the return value.
1660
     *                          </p>
1661
     *
1662
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1663
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1664
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1665
     *               is not set and <b>NULL</b> if the filter fails.
1666
     */
1667
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1668
    {
1669
        if (\func_num_args() < 2) {
1670
            $a = \filter_input_array($type);
1671
        } else {
1672
            $a = \filter_input_array($type, $definition, $add_empty);
1673
        }
1674
1675
        return self::filter($a);
1676
    }
1677
1678
    /**
1679
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1680
     *
1681
     * Filters a variable with a specified filter
1682
     *
1683
     * @see  http://php.net/manual/en/function.filter-var.php
1684
     *
1685
     * @param mixed $variable <p>
1686
     *                        Value to filter.
1687
     *                        </p>
1688
     * @param int   $filter   [optional] <p>
1689
     *                        The ID of the filter to apply. The
1690
     *                        manual page lists the available filters.
1691
     *                        </p>
1692
     * @param mixed $options  [optional] <p>
1693
     *                        Associative array of options or bitwise disjunction of flags. If filter
1694
     *                        accepts options, flags can be provided in "flags" field of array. For
1695
     *                        the "callback" filter, callable type should be passed. The
1696
     *                        callback must accept one argument, the value to be filtered, and return
1697
     *                        the value after filtering/sanitizing it.
1698
     *                        </p>
1699
     *                        <p>
1700
     *                        <code>
1701
     *                        // for filters that accept options, use this format
1702
     *                        $options = array(
1703
     *                        'options' => array(
1704
     *                        'default' => 3, // value to return if the filter fails
1705
     *                        // other options here
1706
     *                        'min_range' => 0
1707
     *                        ),
1708
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1709
     *                        );
1710
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1711
     *                        // for filter that only accept flags, you can pass them directly
1712
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1713
     *                        // for filter that only accept flags, you can also pass as an array
1714
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1715
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1716
     *                        // callback validate filter
1717
     *                        function foo($value)
1718
     *                        {
1719
     *                        // Expected format: Surname, GivenNames
1720
     *                        if (strpos($value, ", ") === false) return false;
1721
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1722
     *                        $empty = (empty($surname) || empty($givennames));
1723
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1724
     *                        if ($empty || $notstrings) {
1725
     *                        return false;
1726
     *                        } else {
1727
     *                        return $value;
1728
     *                        }
1729
     *                        }
1730
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1731
     *                        </code>
1732
     *                        </p>
1733
     *
1734
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1735
     */
1736 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1737
    {
1738 2
        if (\func_num_args() < 3) {
1739 2
            $variable = \filter_var($variable, $filter);
1740
        } else {
1741 2
            $variable = \filter_var($variable, $filter, $options);
1742
        }
1743
1744 2
        return self::filter($variable);
1745
    }
1746
1747
    /**
1748
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1749
     *
1750
     * Gets multiple variables and optionally filters them
1751
     *
1752
     * @see  http://php.net/manual/en/function.filter-var-array.php
1753
     *
1754
     * @param array $data       <p>
1755
     *                          An array with string keys containing the data to filter.
1756
     *                          </p>
1757
     * @param mixed $definition [optional] <p>
1758
     *                          An array defining the arguments. A valid key is a string
1759
     *                          containing a variable name and a valid value is either a
1760
     *                          filter type, or an
1761
     *                          array optionally specifying the filter, flags and options.
1762
     *                          If the value is an array, valid keys are filter
1763
     *                          which specifies the filter type,
1764
     *                          flags which specifies any flags that apply to the
1765
     *                          filter, and options which specifies any options that
1766
     *                          apply to the filter. See the example below for a better understanding.
1767
     *                          </p>
1768
     *                          <p>
1769
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1770
     *                          input array are filtered by this filter.
1771
     *                          </p>
1772
     * @param bool  $add_empty  [optional] <p>
1773
     *                          Add missing keys as <b>NULL</b> to the return value.
1774
     *                          </p>
1775
     *
1776
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1777
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1778
     *               set
1779
     */
1780 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1781
    {
1782 2
        if (\func_num_args() < 2) {
1783 2
            $a = \filter_var_array($data);
1784
        } else {
1785 2
            $a = \filter_var_array($data, $definition, $add_empty);
1786
        }
1787
1788 2
        return self::filter($a);
1789
    }
1790
1791
    /**
1792
     * Checks whether finfo is available on the server.
1793
     *
1794
     * @return bool
1795
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1796
     */
1797
    public static function finfo_loaded(): bool
1798
    {
1799
        return \class_exists('finfo');
1800
    }
1801
1802
    /**
1803
     * Returns the first $n characters of the string.
1804
     *
1805
     * @param string $str      <p>The input string.</p>
1806
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1807
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1808
     *
1809
     * @return string
1810
     */
1811 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1812
    {
1813 13
        if ($str === '' || $n <= 0) {
1814 5
            return '';
1815
        }
1816
1817 8
        if ($encoding === 'UTF-8') {
1818 4
            return (string) \mb_substr($str, 0, $n);
1819
        }
1820
1821 4
        return (string) self::substr($str, 0, $n, $encoding);
1822
    }
1823
1824
    /**
1825
     * Check if the number of unicode characters are not more than the specified integer.
1826
     *
1827
     * @param string $str      the original string to be checked
1828
     * @param int    $box_size the size in number of chars to be checked against string
1829
     *
1830
     * @return bool true if string is less than or equal to $box_size, false otherwise
1831
     */
1832 2
    public static function fits_inside(string $str, int $box_size): bool
1833
    {
1834 2
        return self::strlen($str) <= $box_size;
1835
    }
1836
1837
    /**
1838
     * Try to fix simple broken UTF-8 strings.
1839
     *
1840
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1841
     *
1842
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1843
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1844
     * See: http://en.wikipedia.org/wiki/Windows-1252
1845
     *
1846
     * @param string $str <p>The input string</p>
1847
     *
1848
     * @return string
1849
     */
1850 42
    public static function fix_simple_utf8(string $str): string
1851
    {
1852 42
        if ($str === '') {
1853 4
            return '';
1854
        }
1855
1856 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1857 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1858
1859 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1860 1
            if (self::$BROKEN_UTF8_FIX === null) {
1861 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1862
            }
1863
1864 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1865 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1866
        }
1867
1868 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1869
    }
1870
1871
    /**
1872
     * Fix a double (or multiple) encoded UTF8 string.
1873
     *
1874
     * @param string|string[] $str you can use a string or an array of strings
1875
     *
1876
     * @return string|string[]
1877
     *                         Will return the fixed input-"array" or
1878
     *                         the fixed input-"string"
1879
     *
1880
     * @psalm-suppress InvalidReturnType
1881
     */
1882 2
    public static function fix_utf8($str)
1883
    {
1884 2
        if (\is_array($str) === true) {
1885 2
            foreach ($str as $k => &$v) {
1886 2
                $v = self::fix_utf8($v);
1887
            }
1888 2
            unset($v);
1889
1890
            /**
1891
             * @psalm-suppress InvalidReturnStatement
1892
             */
1893 2
            return $str;
1894
        }
1895
1896 2
        $str = (string) $str;
1897 2
        $last = '';
1898 2
        while ($last !== $str) {
1899 2
            $last = $str;
1900
            /**
1901
             * @psalm-suppress PossiblyInvalidArgument
1902
             */
1903 2
            $str = self::to_utf8(
1904 2
                self::utf8_decode($str, true)
1905
            );
1906
        }
1907
1908
        /**
1909
         * @psalm-suppress InvalidReturnStatement
1910
         */
1911 2
        return $str;
1912
    }
1913
1914
    /**
1915
     * Get character of a specific character.
1916
     *
1917
     * @param string $char
1918
     *
1919
     * @return string 'RTL' or 'LTR'
1920
     */
1921 2
    public static function getCharDirection(string $char): string
1922
    {
1923 2
        if (self::$SUPPORT['intlChar'] === true) {
1924
            /** @noinspection PhpComposerExtensionStubsInspection */
1925 2
            $tmpReturn = \IntlChar::charDirection($char);
1926
1927
            // from "IntlChar"-Class
1928
            $charDirection = [
1929 2
                'RTL' => [1, 13, 14, 15, 21],
1930
                'LTR' => [0, 11, 12, 20],
1931
            ];
1932
1933 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1934
                return 'LTR';
1935
            }
1936
1937 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1938 2
                return 'RTL';
1939
            }
1940
        }
1941
1942 2
        $c = static::chr_to_decimal($char);
1943
1944 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1945 2
            return 'LTR';
1946
        }
1947
1948 2
        if ($c <= 0x85e) {
1949 2
            if ($c === 0x5be ||
1950 2
                $c === 0x5c0 ||
1951 2
                $c === 0x5c3 ||
1952 2
                $c === 0x5c6 ||
1953 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1954 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1955 2
                $c === 0x608 ||
1956 2
                $c === 0x60b ||
1957 2
                $c === 0x60d ||
1958 2
                $c === 0x61b ||
1959 2
                ($c >= 0x61e && $c <= 0x64a) ||
1960
                ($c >= 0x66d && $c <= 0x66f) ||
1961
                ($c >= 0x671 && $c <= 0x6d5) ||
1962
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1963
                ($c >= 0x6ee && $c <= 0x6ef) ||
1964
                ($c >= 0x6fa && $c <= 0x70d) ||
1965
                $c === 0x710 ||
1966
                ($c >= 0x712 && $c <= 0x72f) ||
1967
                ($c >= 0x74d && $c <= 0x7a5) ||
1968
                $c === 0x7b1 ||
1969
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1970
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1971
                $c === 0x7fa ||
1972
                ($c >= 0x800 && $c <= 0x815) ||
1973
                $c === 0x81a ||
1974
                $c === 0x824 ||
1975
                $c === 0x828 ||
1976
                ($c >= 0x830 && $c <= 0x83e) ||
1977
                ($c >= 0x840 && $c <= 0x858) ||
1978 2
                $c === 0x85e
1979
            ) {
1980 2
                return 'RTL';
1981
            }
1982 2
        } elseif ($c === 0x200f) {
1983
            return 'RTL';
1984 2
        } elseif ($c >= 0xfb1d) {
1985 2
            if ($c === 0xfb1d ||
1986 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1987 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1988 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1989 2
                $c === 0xfb3e ||
1990 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1991 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1992 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1993 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1994 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1995 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1996 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1997 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1998 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1999 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2000 2
                $c === 0x10808 ||
2001 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2002 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2003 2
                $c === 0x1083c ||
2004 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2005 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2006 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2007 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2008 2
                $c === 0x1093f ||
2009 2
                $c === 0x10a00 ||
2010 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2011 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2012 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2013 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2014 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2015 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2016 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2017 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2018 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2019 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2020
            ) {
2021 2
                return 'RTL';
2022
            }
2023
        }
2024
2025 2
        return 'LTR';
2026
    }
2027
2028
    /**
2029
     * Check for php-support.
2030
     *
2031
     * @param string|null $key
2032
     *
2033
     * @return mixed
2034
     *               Return the full support-"array", if $key === null<br>
2035
     *               return bool-value, if $key is used and available<br>
2036
     *               otherwise return <strong>null</strong>
2037
     */
2038 26
    public static function getSupportInfo(string $key = null)
2039
    {
2040 26
        if ($key === null) {
2041 4
            return self::$SUPPORT;
2042
        }
2043
2044 24
        if (!isset(self::$SUPPORT[$key])) {
2045 2
            return null;
2046
        }
2047
2048 22
        return self::$SUPPORT[$key];
2049
    }
2050
2051
    /**
2052
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2053
     *          if you need more supported types, please use e.g. "finfo"
2054
     *
2055
     * @param string $str
2056
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2057
     *
2058
     * @return array
2059
     *               with this keys: 'ext', 'mime', 'type'
2060
     */
2061 39
    public static function get_file_type(
2062
        string $str,
2063
        array $fallback = [
2064
            'ext'  => null,
2065
            'mime' => 'application/octet-stream',
2066
            'type' => null,
2067
        ]
2068
    ): array {
2069 39
        if ($str === '') {
2070
            return $fallback;
2071
        }
2072
2073 39
        $str_info = \substr($str, 0, 2);
2074 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2075 11
            return $fallback;
2076
        }
2077
2078 35
        $str_info = \unpack('C2chars', $str_info);
2079 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2080
2081
        // DEBUG
2082
        //var_dump($type_code);
2083
2084
        switch ($type_code) {
2085 35
            case 3780:
2086 5
                $ext = 'pdf';
2087 5
                $mime = 'application/pdf';
2088 5
                $type = 'binary';
2089
2090 5
                break;
2091 35
            case 7790:
2092
                $ext = 'exe';
2093
                $mime = 'application/octet-stream';
2094
                $type = 'binary';
2095
2096
                break;
2097 35
            case 7784:
2098
                $ext = 'midi';
2099
                $mime = 'audio/x-midi';
2100
                $type = 'binary';
2101
2102
                break;
2103 35
            case 8075:
2104 7
                $ext = 'zip';
2105 7
                $mime = 'application/zip';
2106 7
                $type = 'binary';
2107
2108 7
                break;
2109 35
            case 8297:
2110
                $ext = 'rar';
2111
                $mime = 'application/rar';
2112
                $type = 'binary';
2113
2114
                break;
2115 35
            case 255216:
2116
                $ext = 'jpg';
2117
                $mime = 'image/jpeg';
2118
                $type = 'binary';
2119
2120
                break;
2121 35
            case 7173:
2122
                $ext = 'gif';
2123
                $mime = 'image/gif';
2124
                $type = 'binary';
2125
2126
                break;
2127 35
            case 6677:
2128
                $ext = 'bmp';
2129
                $mime = 'image/bmp';
2130
                $type = 'binary';
2131
2132
                break;
2133 35
            case 13780:
2134 7
                $ext = 'png';
2135 7
                $mime = 'image/png';
2136 7
                $type = 'binary';
2137
2138 7
                break;
2139
            default:
2140 32
                return $fallback;
2141
        }
2142
2143
        return [
2144 7
            'ext'  => $ext,
2145 7
            'mime' => $mime,
2146 7
            'type' => $type,
2147
        ];
2148
    }
2149
2150
    /**
2151
     * @param int    $length        <p>Length of the random string.</p>
2152
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2153
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2154
     *
2155
     * @return string
2156
     */
2157 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2158
    {
2159
        // init
2160 1
        $i = 0;
2161 1
        $str = '';
2162
2163
        //
2164
        // add random chars
2165
        //
2166
2167 1
        if ($encoding === 'UTF-8') {
2168 1
            $maxlength = (int) \mb_strlen($possibleChars);
2169 1
            if ($maxlength === 0) {
2170 1
                return '';
2171
            }
2172
2173 1
            while ($i < $length) {
2174
                try {
2175 1
                    $randInt = \random_int(0, $maxlength - 1);
2176
                } catch (\Exception $e) {
2177
                    /** @noinspection RandomApiMigrationInspection */
2178
                    $randInt = \mt_rand(0, $maxlength - 1);
2179
                }
2180 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2181 1
                if ($char !== false) {
2182 1
                    $str .= $char;
2183 1
                    ++$i;
2184
                }
2185
            }
2186
        } else {
2187
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2188
2189
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2190
            if ($maxlength === 0) {
2191
                return '';
2192
            }
2193
2194
            while ($i < $length) {
2195
                try {
2196
                    $randInt = \random_int(0, $maxlength - 1);
2197
                } catch (\Exception $e) {
2198
                    /** @noinspection RandomApiMigrationInspection */
2199
                    $randInt = \mt_rand(0, $maxlength - 1);
2200
                }
2201
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2202
                if ($char !== false) {
2203
                    $str .= $char;
2204
                    ++$i;
2205
                }
2206
            }
2207
        }
2208
2209 1
        return $str;
2210
    }
2211
2212
    /**
2213
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2214
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2215
     *
2216
     * @return string
2217
     */
2218 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2219
    {
2220 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2221 1
                        \session_id() .
2222 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2223 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2224 1
                        $entropyExtra;
2225
2226 1
        $uniqueString = \uniqid($uniqueHelper, true);
2227
2228 1
        if ($md5) {
2229 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2230
        }
2231
2232 1
        return $uniqueString;
2233
    }
2234
2235
    /**
2236
     * alias for "UTF8::string_has_bom()"
2237
     *
2238
     * @see        UTF8::string_has_bom()
2239
     *
2240
     * @param string $str
2241
     *
2242
     * @return bool
2243
     *
2244
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2245
     */
2246 2
    public static function hasBom(string $str): bool
2247
    {
2248 2
        return self::string_has_bom($str);
2249
    }
2250
2251
    /**
2252
     * Returns true if the string contains a lower case char, false otherwise.
2253
     *
2254
     * @param string $str <p>The input string.</p>
2255
     *
2256
     * @return bool whether or not the string contains a lower case character
2257
     */
2258 47
    public static function has_lowercase(string $str): bool
2259
    {
2260 47
        if (self::$SUPPORT['mbstring'] === true) {
2261
            /** @noinspection PhpComposerExtensionStubsInspection */
2262 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2263
        }
2264
2265
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2266
    }
2267
2268
    /**
2269
     * Returns true if the string contains an upper case char, false otherwise.
2270
     *
2271
     * @param string $str <p>The input string.</p>
2272
     *
2273
     * @return bool whether or not the string contains an upper case character
2274
     */
2275 12
    public static function has_uppercase(string $str): bool
2276
    {
2277 12
        if (self::$SUPPORT['mbstring'] === true) {
2278
            /** @noinspection PhpComposerExtensionStubsInspection */
2279 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2280
        }
2281
2282
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2283
    }
2284
2285
    /**
2286
     * Converts a hexadecimal-value into an UTF-8 character.
2287
     *
2288
     * @param string $hexdec <p>The hexadecimal value.</p>
2289
     *
2290
     * @return false|string one single UTF-8 character
2291
     */
2292 4
    public static function hex_to_chr(string $hexdec)
2293
    {
2294 4
        return self::decimal_to_chr(\hexdec($hexdec));
2295
    }
2296
2297
    /**
2298
     * Converts hexadecimal U+xxxx code point representation to integer.
2299
     *
2300
     * INFO: opposite to UTF8::int_to_hex()
2301
     *
2302
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2303
     *
2304
     * @return false|int the code point, or false on failure
2305
     */
2306 2
    public static function hex_to_int($hexDec)
2307
    {
2308
        // init
2309 2
        $hexDec = (string) $hexDec;
2310
2311 2
        if ($hexDec === '') {
2312 2
            return false;
2313
        }
2314
2315 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2316 2
            return \intval($match[1], 16);
2317
        }
2318
2319 2
        return false;
2320
    }
2321
2322
    /**
2323
     * alias for "UTF8::html_entity_decode()"
2324
     *
2325
     * @see UTF8::html_entity_decode()
2326
     *
2327
     * @param string $str
2328
     * @param int    $flags
2329
     * @param string $encoding
2330
     *
2331
     * @return string
2332
     */
2333 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2334
    {
2335 4
        return self::html_entity_decode($str, $flags, $encoding);
2336
    }
2337
2338
    /**
2339
     * Converts a UTF-8 string to a series of HTML numbered entities.
2340
     *
2341
     * INFO: opposite to UTF8::html_decode()
2342
     *
2343
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2344
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2345
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2346
     *
2347
     * @return string HTML numbered entities
2348
     */
2349 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2350
    {
2351 13
        if ($str === '') {
2352 4
            return '';
2353
        }
2354
2355 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2356 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2357
        }
2358
2359
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2360 13
        if (self::$SUPPORT['mbstring'] === true) {
2361 13
            $startCode = 0x00;
2362 13
            if ($keepAsciiChars === true) {
2363 13
                $startCode = 0x80;
2364
            }
2365
2366 13
            if ($encoding === 'UTF-8') {
2367 13
                return \mb_encode_numericentity(
2368 13
                    $str,
2369 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2370
                );
2371
            }
2372
2373 4
            return \mb_encode_numericentity(
2374 4
                $str,
2375 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2376 4
                $encoding
2377
            );
2378
        }
2379
2380
        //
2381
        // fallback via vanilla php
2382
        //
2383
2384
        return \implode(
2385
            '',
2386
            \array_map(
2387
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2388
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2389
                },
2390
                self::str_split($str)
2391
            )
2392
        );
2393
    }
2394
2395
    /**
2396
     * UTF-8 version of html_entity_decode()
2397
     *
2398
     * The reason we are not using html_entity_decode() by itself is because
2399
     * while it is not technically correct to leave out the semicolon
2400
     * at the end of an entity most browsers will still interpret the entity
2401
     * correctly. html_entity_decode() does not convert entities without
2402
     * semicolons, so we are left with our own little solution here. Bummer.
2403
     *
2404
     * Convert all HTML entities to their applicable characters
2405
     *
2406
     * INFO: opposite to UTF8::html_encode()
2407
     *
2408
     * @see http://php.net/manual/en/function.html-entity-decode.php
2409
     *
2410
     * @param string $str      <p>
2411
     *                         The input string.
2412
     *                         </p>
2413
     * @param int    $flags    [optional] <p>
2414
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2415
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2416
     *                         <table>
2417
     *                         Available <i>flags</i> constants
2418
     *                         <tr valign="top">
2419
     *                         <td>Constant Name</td>
2420
     *                         <td>Description</td>
2421
     *                         </tr>
2422
     *                         <tr valign="top">
2423
     *                         <td><b>ENT_COMPAT</b></td>
2424
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2425
     *                         </tr>
2426
     *                         <tr valign="top">
2427
     *                         <td><b>ENT_QUOTES</b></td>
2428
     *                         <td>Will convert both double and single quotes.</td>
2429
     *                         </tr>
2430
     *                         <tr valign="top">
2431
     *                         <td><b>ENT_NOQUOTES</b></td>
2432
     *                         <td>Will leave both double and single quotes unconverted.</td>
2433
     *                         </tr>
2434
     *                         <tr valign="top">
2435
     *                         <td><b>ENT_HTML401</b></td>
2436
     *                         <td>
2437
     *                         Handle code as HTML 4.01.
2438
     *                         </td>
2439
     *                         </tr>
2440
     *                         <tr valign="top">
2441
     *                         <td><b>ENT_XML1</b></td>
2442
     *                         <td>
2443
     *                         Handle code as XML 1.
2444
     *                         </td>
2445
     *                         </tr>
2446
     *                         <tr valign="top">
2447
     *                         <td><b>ENT_XHTML</b></td>
2448
     *                         <td>
2449
     *                         Handle code as XHTML.
2450
     *                         </td>
2451
     *                         </tr>
2452
     *                         <tr valign="top">
2453
     *                         <td><b>ENT_HTML5</b></td>
2454
     *                         <td>
2455
     *                         Handle code as HTML 5.
2456
     *                         </td>
2457
     *                         </tr>
2458
     *                         </table>
2459
     *                         </p>
2460
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2461
     *
2462
     * @return string the decoded string
2463
     */
2464 42
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2465
    {
2466
        if (
2467 42
            !isset($str[3]) // examples: &; || &x;
2468
            ||
2469 42
            \strpos($str, '&') === false // no "&"
2470
        ) {
2471 19
            return $str;
2472
        }
2473
2474 41
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2475 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2476
        }
2477
2478 41
        if ($flags === null) {
2479 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2480
        }
2481
2482
        if (
2483 41
            $encoding !== 'UTF-8'
2484
            &&
2485 41
            $encoding !== 'ISO-8859-1'
2486
            &&
2487 41
            $encoding !== 'WINDOWS-1252'
2488
            &&
2489 41
            self::$SUPPORT['mbstring'] === false
2490
        ) {
2491
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2492
        }
2493
2494
        do {
2495 41
            $str_compare = $str;
2496
2497
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2498 41
            if (self::$SUPPORT['mbstring'] === true) {
2499 41
                if ($encoding === 'UTF-8') {
2500 41
                    $str = \mb_decode_numericentity(
2501 41
                        $str,
2502 41
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2503
                    );
2504
                } else {
2505 4
                    $str = \mb_decode_numericentity(
2506 4
                        $str,
2507 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2508 41
                        $encoding
2509
                    );
2510
                }
2511
            } else {
2512
                $str = (string) \preg_replace_callback(
2513
                    "/&#\d{2,6};/",
2514
                    /**
2515
                     * @param string[] $matches
2516
                     *
2517
                     * @return string
2518
                     */
2519
                    static function (array $matches) use ($encoding): string {
2520
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2521
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2522
                            return $returnTmp;
2523
                        }
2524
2525
                        return $matches[0];
2526
                    },
2527
                    $str
2528
                );
2529
            }
2530
2531 41
            if (\strpos($str, '&') !== false) {
2532 37
                if (\strpos($str, '&#') !== false) {
2533
                    // decode also numeric & UTF16 two byte entities
2534 29
                    $str = (string) \preg_replace(
2535 29
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2536 29
                        '$1;',
2537 29
                        $str
2538
                    );
2539
                }
2540
2541 37
                $str = \html_entity_decode(
2542 37
                    $str,
2543 37
                    $flags,
2544 37
                    $encoding
2545
                );
2546
            }
2547 41
        } while ($str_compare !== $str);
2548
2549 41
        return $str;
2550
    }
2551
2552
    /**
2553
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2554
     *
2555
     * @param string $str
2556
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2557
     *
2558
     * @return string
2559
     */
2560 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2561
    {
2562 6
        return self::htmlspecialchars(
2563 6
            $str,
2564 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2565 6
            $encoding
2566
        );
2567
    }
2568
2569
    /**
2570
     * Remove empty html-tag.
2571
     *
2572
     * e.g.: <tag></tag>
2573
     *
2574
     * @param string $str
2575
     *
2576
     * @return string
2577
     */
2578 1
    public static function html_stripe_empty_tags(string $str): string
2579
    {
2580 1
        return (string) \preg_replace(
2581 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2582 1
            '',
2583 1
            $str
2584
        );
2585
    }
2586
2587
    /**
2588
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2589
     *
2590
     * @see http://php.net/manual/en/function.htmlentities.php
2591
     *
2592
     * @param string $str           <p>
2593
     *                              The input string.
2594
     *                              </p>
2595
     * @param int    $flags         [optional] <p>
2596
     *                              A bitmask of one or more of the following flags, which specify how to handle
2597
     *                              quotes, invalid code unit sequences and the used document type. The default is
2598
     *                              ENT_COMPAT | ENT_HTML401.
2599
     *                              <table>
2600
     *                              Available <i>flags</i> constants
2601
     *                              <tr valign="top">
2602
     *                              <td>Constant Name</td>
2603
     *                              <td>Description</td>
2604
     *                              </tr>
2605
     *                              <tr valign="top">
2606
     *                              <td><b>ENT_COMPAT</b></td>
2607
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2608
     *                              </tr>
2609
     *                              <tr valign="top">
2610
     *                              <td><b>ENT_QUOTES</b></td>
2611
     *                              <td>Will convert both double and single quotes.</td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_NOQUOTES</b></td>
2615
     *                              <td>Will leave both double and single quotes unconverted.</td>
2616
     *                              </tr>
2617
     *                              <tr valign="top">
2618
     *                              <td><b>ENT_IGNORE</b></td>
2619
     *                              <td>
2620
     *                              Silently discard invalid code unit sequences instead of returning
2621
     *                              an empty string. Using this flag is discouraged as it
2622
     *                              may have security implications.
2623
     *                              </td>
2624
     *                              </tr>
2625
     *                              <tr valign="top">
2626
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2627
     *                              <td>
2628
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2629
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2630
     *                              string.
2631
     *                              </td>
2632
     *                              </tr>
2633
     *                              <tr valign="top">
2634
     *                              <td><b>ENT_DISALLOWED</b></td>
2635
     *                              <td>
2636
     *                              Replace invalid code points for the given document type with a
2637
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2638
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2639
     *                              instance, to ensure the well-formedness of XML documents with
2640
     *                              embedded external content.
2641
     *                              </td>
2642
     *                              </tr>
2643
     *                              <tr valign="top">
2644
     *                              <td><b>ENT_HTML401</b></td>
2645
     *                              <td>
2646
     *                              Handle code as HTML 4.01.
2647
     *                              </td>
2648
     *                              </tr>
2649
     *                              <tr valign="top">
2650
     *                              <td><b>ENT_XML1</b></td>
2651
     *                              <td>
2652
     *                              Handle code as XML 1.
2653
     *                              </td>
2654
     *                              </tr>
2655
     *                              <tr valign="top">
2656
     *                              <td><b>ENT_XHTML</b></td>
2657
     *                              <td>
2658
     *                              Handle code as XHTML.
2659
     *                              </td>
2660
     *                              </tr>
2661
     *                              <tr valign="top">
2662
     *                              <td><b>ENT_HTML5</b></td>
2663
     *                              <td>
2664
     *                              Handle code as HTML 5.
2665
     *                              </td>
2666
     *                              </tr>
2667
     *                              </table>
2668
     *                              </p>
2669
     * @param string $encoding      [optional] <p>
2670
     *                              Like <b>htmlspecialchars</b>,
2671
     *                              <b>htmlentities</b> takes an optional third argument
2672
     *                              <i>encoding</i> which defines encoding used in
2673
     *                              conversion.
2674
     *                              Although this argument is technically optional, you are highly
2675
     *                              encouraged to specify the correct value for your code.
2676
     *                              </p>
2677
     * @param bool   $double_encode [optional] <p>
2678
     *                              When <i>double_encode</i> is turned off PHP will not
2679
     *                              encode existing html entities. The default is to convert everything.
2680
     *                              </p>
2681
     *
2682
     * @return string
2683
     *                <p>
2684
     *                The encoded string.
2685
     *                <br><br>
2686
     *                If the input <i>string</i> contains an invalid code unit
2687
     *                sequence within the given <i>encoding</i> an empty string
2688
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2689
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2690
     *                </p>
2691
     */
2692 9
    public static function htmlentities(
2693
        string $str,
2694
        int $flags = \ENT_COMPAT,
2695
        string $encoding = 'UTF-8',
2696
        bool $double_encode = true
2697
    ): string {
2698 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2699 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2700
        }
2701
2702 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2703
2704
        /**
2705
         * PHP doesn't replace a backslash to its html entity since this is something
2706
         * that's mostly used to escape characters when inserting in a database. Since
2707
         * we're using a decent database layer, we don't need this shit and we're replacing
2708
         * the double backslashes by its' html entity equivalent.
2709
         *
2710
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2711
         */
2712 9
        $str = \str_replace('\\', '&#92;', $str);
2713
2714 9
        return self::html_encode($str, true, $encoding);
2715
    }
2716
2717
    /**
2718
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2719
     *
2720
     * INFO: Take a look at "UTF8::htmlentities()"
2721
     *
2722
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2723
     *
2724
     * @param string $str           <p>
2725
     *                              The string being converted.
2726
     *                              </p>
2727
     * @param int    $flags         [optional] <p>
2728
     *                              A bitmask of one or more of the following flags, which specify how to handle
2729
     *                              quotes, invalid code unit sequences and the used document type. The default is
2730
     *                              ENT_COMPAT | ENT_HTML401.
2731
     *                              <table>
2732
     *                              Available <i>flags</i> constants
2733
     *                              <tr valign="top">
2734
     *                              <td>Constant Name</td>
2735
     *                              <td>Description</td>
2736
     *                              </tr>
2737
     *                              <tr valign="top">
2738
     *                              <td><b>ENT_COMPAT</b></td>
2739
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2740
     *                              </tr>
2741
     *                              <tr valign="top">
2742
     *                              <td><b>ENT_QUOTES</b></td>
2743
     *                              <td>Will convert both double and single quotes.</td>
2744
     *                              </tr>
2745
     *                              <tr valign="top">
2746
     *                              <td><b>ENT_NOQUOTES</b></td>
2747
     *                              <td>Will leave both double and single quotes unconverted.</td>
2748
     *                              </tr>
2749
     *                              <tr valign="top">
2750
     *                              <td><b>ENT_IGNORE</b></td>
2751
     *                              <td>
2752
     *                              Silently discard invalid code unit sequences instead of returning
2753
     *                              an empty string. Using this flag is discouraged as it
2754
     *                              may have security implications.
2755
     *                              </td>
2756
     *                              </tr>
2757
     *                              <tr valign="top">
2758
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2759
     *                              <td>
2760
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2761
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2762
     *                              string.
2763
     *                              </td>
2764
     *                              </tr>
2765
     *                              <tr valign="top">
2766
     *                              <td><b>ENT_DISALLOWED</b></td>
2767
     *                              <td>
2768
     *                              Replace invalid code points for the given document type with a
2769
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2770
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2771
     *                              instance, to ensure the well-formedness of XML documents with
2772
     *                              embedded external content.
2773
     *                              </td>
2774
     *                              </tr>
2775
     *                              <tr valign="top">
2776
     *                              <td><b>ENT_HTML401</b></td>
2777
     *                              <td>
2778
     *                              Handle code as HTML 4.01.
2779
     *                              </td>
2780
     *                              </tr>
2781
     *                              <tr valign="top">
2782
     *                              <td><b>ENT_XML1</b></td>
2783
     *                              <td>
2784
     *                              Handle code as XML 1.
2785
     *                              </td>
2786
     *                              </tr>
2787
     *                              <tr valign="top">
2788
     *                              <td><b>ENT_XHTML</b></td>
2789
     *                              <td>
2790
     *                              Handle code as XHTML.
2791
     *                              </td>
2792
     *                              </tr>
2793
     *                              <tr valign="top">
2794
     *                              <td><b>ENT_HTML5</b></td>
2795
     *                              <td>
2796
     *                              Handle code as HTML 5.
2797
     *                              </td>
2798
     *                              </tr>
2799
     *                              </table>
2800
     *                              </p>
2801
     * @param string $encoding      [optional] <p>
2802
     *                              Defines encoding used in conversion.
2803
     *                              </p>
2804
     *                              <p>
2805
     *                              For the purposes of this function, the encodings
2806
     *                              ISO-8859-1, ISO-8859-15,
2807
     *                              UTF-8, cp866,
2808
     *                              cp1251, cp1252, and
2809
     *                              KOI8-R are effectively equivalent, provided the
2810
     *                              <i>string</i> itself is valid for the encoding, as
2811
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2812
     *                              the same positions in all of these encodings.
2813
     *                              </p>
2814
     * @param bool   $double_encode [optional] <p>
2815
     *                              When <i>double_encode</i> is turned off PHP will not
2816
     *                              encode existing html entities, the default is to convert everything.
2817
     *                              </p>
2818
     *
2819
     * @return string the converted string.
2820
     *                </p>
2821
     *                <p>
2822
     *                If the input <i>string</i> contains an invalid code unit
2823
     *                sequence within the given <i>encoding</i> an empty string
2824
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2825
     *                <b>ENT_SUBSTITUTE</b> flags are set
2826
     */
2827 8
    public static function htmlspecialchars(
2828
        string $str,
2829
        int $flags = \ENT_COMPAT,
2830
        string $encoding = 'UTF-8',
2831
        bool $double_encode = true
2832
    ): string {
2833 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2834 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2835
        }
2836
2837 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2838
    }
2839
2840
    /**
2841
     * Checks whether iconv is available on the server.
2842
     *
2843
     * @return bool
2844
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2845
     */
2846
    public static function iconv_loaded(): bool
2847
    {
2848
        return \extension_loaded('iconv');
2849
    }
2850
2851
    /**
2852
     * alias for "UTF8::decimal_to_chr()"
2853
     *
2854
     * @see UTF8::decimal_to_chr()
2855
     *
2856
     * @param mixed $int
2857
     *
2858
     * @return string
2859
     */
2860 4
    public static function int_to_chr($int): string
2861
    {
2862 4
        return self::decimal_to_chr($int);
2863
    }
2864
2865
    /**
2866
     * Converts Integer to hexadecimal U+xxxx code point representation.
2867
     *
2868
     * INFO: opposite to UTF8::hex_to_int()
2869
     *
2870
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2871
     * @param string $pfix [optional]
2872
     *
2873
     * @return string the code point, or empty string on failure
2874
     */
2875 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2876
    {
2877 6
        $hex = \dechex($int);
2878
2879 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2880
2881 6
        return $pfix . $hex . '';
2882
    }
2883
2884
    /**
2885
     * Checks whether intl-char is available on the server.
2886
     *
2887
     * @return bool
2888
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2889
     */
2890
    public static function intlChar_loaded(): bool
2891
    {
2892
        return \class_exists('IntlChar');
2893
    }
2894
2895
    /**
2896
     * Checks whether intl is available on the server.
2897
     *
2898
     * @return bool
2899
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2900
     */
2901 5
    public static function intl_loaded(): bool
2902
    {
2903 5
        return \extension_loaded('intl');
2904
    }
2905
2906
    /**
2907
     * alias for "UTF8::is_ascii()"
2908
     *
2909
     * @see        UTF8::is_ascii()
2910
     *
2911
     * @param string $str
2912
     *
2913
     * @return bool
2914
     *
2915
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2916
     */
2917 2
    public static function isAscii(string $str): bool
2918
    {
2919 2
        return self::is_ascii($str);
2920
    }
2921
2922
    /**
2923
     * alias for "UTF8::is_base64()"
2924
     *
2925
     * @see        UTF8::is_base64()
2926
     *
2927
     * @param string $str
2928
     *
2929
     * @return bool
2930
     *
2931
     * @deprecated <p>use "UTF8::is_base64()"</p>
2932
     */
2933 2
    public static function isBase64($str): bool
2934
    {
2935 2
        return self::is_base64($str);
2936
    }
2937
2938
    /**
2939
     * alias for "UTF8::is_binary()"
2940
     *
2941
     * @see        UTF8::is_binary()
2942
     *
2943
     * @param mixed $str
2944
     * @param bool  $strict
2945
     *
2946
     * @return bool
2947
     *
2948
     * @deprecated <p>use "UTF8::is_binary()"</p>
2949
     */
2950 4
    public static function isBinary($str, $strict = false): bool
2951
    {
2952 4
        return self::is_binary($str, $strict);
2953
    }
2954
2955
    /**
2956
     * alias for "UTF8::is_bom()"
2957
     *
2958
     * @see        UTF8::is_bom()
2959
     *
2960
     * @param string $utf8_chr
2961
     *
2962
     * @return bool
2963
     *
2964
     * @deprecated <p>use "UTF8::is_bom()"</p>
2965
     */
2966 2
    public static function isBom(string $utf8_chr): bool
2967
    {
2968 2
        return self::is_bom($utf8_chr);
2969
    }
2970
2971
    /**
2972
     * alias for "UTF8::is_html()"
2973
     *
2974
     * @see        UTF8::is_html()
2975
     *
2976
     * @param string $str
2977
     *
2978
     * @return bool
2979
     *
2980
     * @deprecated <p>use "UTF8::is_html()"</p>
2981
     */
2982 2
    public static function isHtml(string $str): bool
2983
    {
2984 2
        return self::is_html($str);
2985
    }
2986
2987
    /**
2988
     * alias for "UTF8::is_json()"
2989
     *
2990
     * @see        UTF8::is_json()
2991
     *
2992
     * @param string $str
2993
     *
2994
     * @return bool
2995
     *
2996
     * @deprecated <p>use "UTF8::is_json()"</p>
2997
     */
2998
    public static function isJson(string $str): bool
2999
    {
3000
        return self::is_json($str);
3001
    }
3002
3003
    /**
3004
     * alias for "UTF8::is_utf16()"
3005
     *
3006
     * @see        UTF8::is_utf16()
3007
     *
3008
     * @param mixed $str
3009
     *
3010
     * @return false|int
3011
     *                   <strong>false</strong> if is't not UTF16,<br>
3012
     *                   <strong>1</strong> for UTF-16LE,<br>
3013
     *                   <strong>2</strong> for UTF-16BE
3014
     *
3015
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3016
     */
3017 2
    public static function isUtf16($str)
3018
    {
3019 2
        return self::is_utf16($str);
3020
    }
3021
3022
    /**
3023
     * alias for "UTF8::is_utf32()"
3024
     *
3025
     * @see        UTF8::is_utf32()
3026
     *
3027
     * @param mixed $str
3028
     *
3029
     * @return false|int
3030
     *                   <strong>false</strong> if is't not UTF16,
3031
     *                   <strong>1</strong> for UTF-32LE,
3032
     *                   <strong>2</strong> for UTF-32BE
3033
     *
3034
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3035
     */
3036 2
    public static function isUtf32($str)
3037
    {
3038 2
        return self::is_utf32($str);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_utf8()"
3043
     *
3044
     * @see        UTF8::is_utf8()
3045
     *
3046
     * @param string $str
3047
     * @param bool   $strict
3048
     *
3049
     * @return bool
3050
     *
3051
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3052
     */
3053 17
    public static function isUtf8($str, $strict = false): bool
3054
    {
3055 17
        return self::is_utf8($str, $strict);
3056
    }
3057
3058
    /**
3059
     * Returns true if the string contains only alphabetic chars, false otherwise.
3060
     *
3061
     * @param string $str
3062
     *
3063
     * @return bool
3064
     *              Whether or not $str contains only alphabetic chars
3065
     */
3066 10
    public static function is_alpha(string $str): bool
3067
    {
3068 10
        if (self::$SUPPORT['mbstring'] === true) {
3069
            /** @noinspection PhpComposerExtensionStubsInspection */
3070 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3071
        }
3072
3073
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3074
    }
3075
3076
    /**
3077
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3078
     *
3079
     * @param string $str
3080
     *
3081
     * @return bool
3082
     *              Whether or not $str contains only alphanumeric chars
3083
     */
3084 13
    public static function is_alphanumeric(string $str): bool
3085
    {
3086 13
        if (self::$SUPPORT['mbstring'] === true) {
3087
            /** @noinspection PhpComposerExtensionStubsInspection */
3088 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3089
        }
3090
3091
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3092
    }
3093
3094
    /**
3095
     * Checks if a string is 7 bit ASCII.
3096
     *
3097
     * @param string $str <p>The string to check.</p>
3098
     *
3099
     * @return bool
3100
     *              <strong>true</strong> if it is ASCII<br>
3101
     *              <strong>false</strong> otherwise
3102
     */
3103 137
    public static function is_ascii(string $str): bool
3104
    {
3105 137
        if ($str === '') {
3106 10
            return true;
3107
        }
3108
3109 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3110
    }
3111
3112
    /**
3113
     * Returns true if the string is base64 encoded, false otherwise.
3114
     *
3115
     * @param mixed|string $str                <p>The input string.</p>
3116
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3117
     *
3118
     * @return bool whether or not $str is base64 encoded
3119
     */
3120 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3121
    {
3122 16
        if ($emptyStringIsValid === false && $str === '') {
3123 3
            return false;
3124
        }
3125
3126
        /**
3127
         * @psalm-suppress RedundantConditionGivenDocblockType
3128
         */
3129 15
        if (\is_string($str) === false) {
3130 2
            return false;
3131
        }
3132
3133 15
        $base64String = \base64_decode($str, true);
3134
3135 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3136
    }
3137
3138
    /**
3139
     * Check if the input is binary... (is look like a hack).
3140
     *
3141
     * @param mixed $input
3142
     * @param bool  $strict
3143
     *
3144
     * @return bool
3145
     */
3146 39
    public static function is_binary($input, bool $strict = false): bool
3147
    {
3148 39
        $input = (string) $input;
3149 39
        if ($input === '') {
3150 10
            return false;
3151
        }
3152
3153 39
        if (\preg_match('~^[01]+$~', $input)) {
3154 13
            return true;
3155
        }
3156
3157 39
        $ext = self::get_file_type($input);
3158 39
        if ($ext['type'] === 'binary') {
3159 7
            return true;
3160
        }
3161
3162 36
        $testLength = \strlen($input);
3163 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3164 36
        if (($testNull / $testLength) > 0.25) {
3165 12
            return true;
3166
        }
3167
3168 34
        if ($strict === true) {
3169 34
            if (self::$SUPPORT['finfo'] === false) {
3170
                throw new \RuntimeException('ext-fileinfo: is not installed');
3171
            }
3172
3173
            /** @noinspection PhpComposerExtensionStubsInspection */
3174 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3175 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3176 15
                return true;
3177
            }
3178
        }
3179
3180 30
        return false;
3181
    }
3182
3183
    /**
3184
     * Check if the file is binary.
3185
     *
3186
     * @param string $file
3187
     *
3188
     * @return bool
3189
     */
3190 6
    public static function is_binary_file($file): bool
3191
    {
3192
        // init
3193 6
        $block = '';
3194
3195 6
        $fp = \fopen($file, 'rb');
3196 6
        if (\is_resource($fp)) {
3197 6
            $block = \fread($fp, 512);
3198 6
            \fclose($fp);
3199
        }
3200
3201 6
        if ($block === '') {
3202 2
            return false;
3203
        }
3204
3205 6
        return self::is_binary($block, true);
3206
    }
3207
3208
    /**
3209
     * Returns true if the string contains only whitespace chars, false otherwise.
3210
     *
3211
     * @param string $str
3212
     *
3213
     * @return bool
3214
     *              Whether or not $str contains only whitespace characters
3215
     */
3216 15
    public static function is_blank(string $str): bool
3217
    {
3218 15
        if (self::$SUPPORT['mbstring'] === true) {
3219
            /** @noinspection PhpComposerExtensionStubsInspection */
3220 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3221
        }
3222
3223
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3224
    }
3225
3226
    /**
3227
     * Checks if the given string is equal to any "Byte Order Mark".
3228
     *
3229
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3230
     *
3231
     * @param string $str <p>The input string.</p>
3232
     *
3233
     * @return bool
3234
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3235
     */
3236 2
    public static function is_bom($str): bool
3237
    {
3238
        /** @noinspection PhpUnusedLocalVariableInspection */
3239 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3240 2
            if ($str === $bomString) {
3241 2
                return true;
3242
            }
3243
        }
3244
3245 2
        return false;
3246
    }
3247
3248
    /**
3249
     * Determine whether the string is considered to be empty.
3250
     *
3251
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3252
     * empty() does not generate a warning if the variable does not exist.
3253
     *
3254
     * @param mixed $str
3255
     *
3256
     * @return bool whether or not $str is empty()
3257
     */
3258
    public static function is_empty($str): bool
3259
    {
3260
        return empty($str);
3261
    }
3262
3263
    /**
3264
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3265
     *
3266
     * @param string $str
3267
     *
3268
     * @return bool
3269
     *              Whether or not $str contains only hexadecimal chars
3270
     */
3271 13
    public static function is_hexadecimal(string $str): bool
3272
    {
3273 13
        if (self::$SUPPORT['mbstring'] === true) {
3274
            /** @noinspection PhpComposerExtensionStubsInspection */
3275 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3276
        }
3277
3278
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3279
    }
3280
3281
    /**
3282
     * Check if the string contains any html-tags <lall>.
3283
     *
3284
     * @param string $str <p>The input string.</p>
3285
     *
3286
     * @return bool
3287
     */
3288 3
    public static function is_html(string $str): bool
3289
    {
3290 3
        if ($str === '') {
3291 3
            return false;
3292
        }
3293
3294
        // init
3295 3
        $matches = [];
3296
3297 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3298
3299 3
        return \count($matches) !== 0;
3300
    }
3301
3302
    /**
3303
     * Try to check if "$str" is an json-string.
3304
     *
3305
     * @param string $str                              <p>The input string.</p>
3306
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3307
     *
3308
     * @return bool
3309
     */
3310 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3311
    {
3312 42
        if ($str === '') {
3313 4
            return false;
3314
        }
3315
3316 40
        if (self::$SUPPORT['json'] === false) {
3317
            throw new \RuntimeException('ext-json: is not installed');
3318
        }
3319
3320 40
        $json = self::json_decode($str);
3321 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3322 18
            return false;
3323
        }
3324
3325
        if (
3326 24
            $onlyArrayOrObjectResultsAreValid === true
3327
            &&
3328 24
            \is_object($json) === false
3329
            &&
3330 24
            \is_array($json) === false
3331
        ) {
3332 5
            return false;
3333
        }
3334
3335
        /** @noinspection PhpComposerExtensionStubsInspection */
3336 19
        return \json_last_error() === \JSON_ERROR_NONE;
3337
    }
3338
3339
    /**
3340
     * @param string $str
3341
     *
3342
     * @return bool
3343
     */
3344 8
    public static function is_lowercase(string $str): bool
3345
    {
3346 8
        if (self::$SUPPORT['mbstring'] === true) {
3347
            /** @noinspection PhpComposerExtensionStubsInspection */
3348 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3349
        }
3350
3351
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3352
    }
3353
3354
    /**
3355
     * Returns true if the string is serialized, false otherwise.
3356
     *
3357
     * @param string $str
3358
     *
3359
     * @return bool whether or not $str is serialized
3360
     */
3361 7
    public static function is_serialized(string $str): bool
3362
    {
3363 7
        if ($str === '') {
3364 1
            return false;
3365
        }
3366
3367
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3368
        /** @noinspection UnserializeExploitsInspection */
3369 6
        return $str === 'b:0;'
3370
               ||
3371 6
               @\unserialize($str) !== false;
3372
    }
3373
3374
    /**
3375
     * Returns true if the string contains only lower case chars, false
3376
     * otherwise.
3377
     *
3378
     * @param string $str <p>The input string.</p>
3379
     *
3380
     * @return bool
3381
     *              Whether or not $str contains only lower case characters
3382
     */
3383 8
    public static function is_uppercase(string $str): bool
3384
    {
3385 8
        if (self::$SUPPORT['mbstring'] === true) {
3386
            /** @noinspection PhpComposerExtensionStubsInspection */
3387 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3388
        }
3389
3390
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3391
    }
3392
3393
    /**
3394
     * Check if the string is UTF-16.
3395
     *
3396
     * @param mixed $str                   <p>The input string.</p>
3397
     * @param bool  $checkIfStringIsBinary
3398
     *
3399
     * @return false|int
3400
     *                   <strong>false</strong> if is't not UTF-16,<br>
3401
     *                   <strong>1</strong> for UTF-16LE,<br>
3402
     *                   <strong>2</strong> for UTF-16BE
3403
     */
3404 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3405
    {
3406
        // init
3407 22
        $str = (string) $str;
3408 22
        $strChars = [];
3409
3410
        if (
3411 22
            $checkIfStringIsBinary === true
3412
            &&
3413 22
            self::is_binary($str, true) === false
3414
        ) {
3415 2
            return false;
3416
        }
3417
3418 22
        if (self::$SUPPORT['mbstring'] === false) {
3419 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3420
        }
3421
3422 22
        $str = self::remove_bom($str);
3423
3424 22
        $maybeUTF16LE = 0;
3425 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3426 22
        if ($test) {
3427 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3428 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3429 15
            if ($test3 === $test) {
3430 15
                if (\count($strChars) === 0) {
3431 15
                    $strChars = self::count_chars($str, true, false);
3432
                }
3433 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3434 15
                    if (\in_array($test3char, $strChars, true) === true) {
3435 15
                        ++$maybeUTF16LE;
3436
                    }
3437
                }
3438 15
                unset($test3charEmpty);
3439
            }
3440
        }
3441
3442 22
        $maybeUTF16BE = 0;
3443 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3444 22
        if ($test) {
3445 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3446 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3447 15
            if ($test3 === $test) {
3448 15
                if (\count($strChars) === 0) {
3449 7
                    $strChars = self::count_chars($str, true, false);
3450
                }
3451 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3452 15
                    if (\in_array($test3char, $strChars, true) === true) {
3453 15
                        ++$maybeUTF16BE;
3454
                    }
3455
                }
3456 15
                unset($test3charEmpty);
3457
            }
3458
        }
3459
3460 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3461 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3462 4
                return 1;
3463
            }
3464
3465 6
            return 2;
3466
        }
3467
3468 18
        return false;
3469
    }
3470
3471
    /**
3472
     * Check if the string is UTF-32.
3473
     *
3474
     * @param mixed $str                   <p>The input string.</p>
3475
     * @param bool  $checkIfStringIsBinary
3476
     *
3477
     * @return false|int
3478
     *                   <strong>false</strong> if is't not UTF-32,<br>
3479
     *                   <strong>1</strong> for UTF-32LE,<br>
3480
     *                   <strong>2</strong> for UTF-32BE
3481
     */
3482 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3483
    {
3484
        // init
3485 18
        $str = (string) $str;
3486 18
        $strChars = [];
3487
3488
        if (
3489 18
            $checkIfStringIsBinary === true
3490
            &&
3491 18
            self::is_binary($str, true) === false
3492
        ) {
3493 2
            return false;
3494
        }
3495
3496 18
        if (self::$SUPPORT['mbstring'] === false) {
3497 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3498
        }
3499
3500 18
        $str = self::remove_bom($str);
3501
3502 18
        $maybeUTF32LE = 0;
3503 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3504 18
        if ($test) {
3505 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3506 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3507 11
            if ($test3 === $test) {
3508 11
                if (\count($strChars) === 0) {
3509 11
                    $strChars = self::count_chars($str, true, false);
3510
                }
3511 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3512 11
                    if (\in_array($test3char, $strChars, true) === true) {
3513 11
                        ++$maybeUTF32LE;
3514
                    }
3515
                }
3516 11
                unset($test3charEmpty);
3517
            }
3518
        }
3519
3520 18
        $maybeUTF32BE = 0;
3521 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3522 18
        if ($test) {
3523 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3524 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3525 11
            if ($test3 === $test) {
3526 11
                if (\count($strChars) === 0) {
3527 7
                    $strChars = self::count_chars($str, true, false);
3528
                }
3529 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3530 11
                    if (\in_array($test3char, $strChars, true) === true) {
3531 11
                        ++$maybeUTF32BE;
3532
                    }
3533
                }
3534 11
                unset($test3charEmpty);
3535
            }
3536
        }
3537
3538 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3539 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3540 2
                return 1;
3541
            }
3542
3543 2
            return 2;
3544
        }
3545
3546 18
        return false;
3547
    }
3548
3549
    /**
3550
     * Encode a string with emoji chars into a non-emoji string.
3551
     *
3552
     * @param string $str                        <p>The input string</p>
3553
     * @param bool   $useReversibleStringMapping [optional] <p>
3554
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3555
     *                                           between "emoji_encode" and "emoji_decode"</p>
3556
     *
3557
     * @return string
3558
     */
3559 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3560
    {
3561 9
        self::initEmojiData();
3562
3563 9
        if ($useReversibleStringMapping === true) {
3564 9
            return (string) \str_replace(
3565 9
                (array) self::$EMOJI_VALUES_CACHE,
3566 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3567 9
                $str
3568
            );
3569
        }
3570
3571 1
        return (string) \str_replace(
3572 1
            (array) self::$EMOJI_VALUES_CACHE,
3573 1
            (array) self::$EMOJI_KEYS_CACHE,
3574 1
            $str
3575
        );
3576
    }
3577
3578
    /**
3579
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3580
     *
3581
     * @param string $str                        <p>The input string.</p>
3582
     * @param bool   $useReversibleStringMapping [optional] <p>
3583
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3584
     *                                           between "emoji_encode" and "emoji_decode".</p>
3585
     *
3586
     * @return string
3587
     */
3588 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3589
    {
3590 9
        self::initEmojiData();
3591
3592 9
        if ($useReversibleStringMapping === true) {
3593 9
            return (string) \str_replace(
3594 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3595 9
                (array) self::$EMOJI_VALUES_CACHE,
3596 9
                $str
3597
            );
3598
        }
3599
3600 1
        return (string) \str_replace(
3601 1
            (array) self::$EMOJI_KEYS_CACHE,
3602 1
            (array) self::$EMOJI_VALUES_CACHE,
3603 1
            $str
3604
        );
3605
    }
3606
3607
    /**
3608
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3609
     *
3610
     * @see    http://hsivonen.iki.fi/php-utf8/
3611
     *
3612
     * @param string|string[] $str    <p>The string to be checked.</p>
3613
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3614
     *
3615
     * @return bool
3616
     */
3617 106
    public static function is_utf8($str, bool $strict = false): bool
3618
    {
3619 106
        if (\is_array($str) === true) {
3620 2
            foreach ($str as &$v) {
3621 2
                if (self::is_utf8($v, $strict) === false) {
3622 2
                    return false;
3623
                }
3624
            }
3625
3626
            return true;
3627
        }
3628
3629 106
        if ($str === '') {
3630 12
            return true;
3631
        }
3632
3633 102
        if ($strict === true) {
3634 2
            $isBinary = self::is_binary($str, true);
3635
3636 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3637 2
                return false;
3638
            }
3639
3640
            if ($isBinary && self::is_utf32($str, false) !== false) {
3641
                return false;
3642
            }
3643
        }
3644
3645 102
        if (self::pcre_utf8_support() !== true) {
3646
3647
            // If even just the first character can be matched, when the /u
3648
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3649
            // invalid, nothing at all will match, even if the string contains
3650
            // some valid sequences
3651
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3652
        }
3653
3654 102
        $mState = 0; // cached expected number of octets after the current octet
3655
        // until the beginning of the next UTF8 character sequence
3656 102
        $mUcs4 = 0; // cached Unicode character
3657 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3658
3659 102
        if (self::$ORD === null) {
3660
            self::$ORD = self::getData('ord');
3661
        }
3662
3663 102
        $len = \strlen((string) $str);
3664
        /** @noinspection ForeachInvariantsInspection */
3665 102
        for ($i = 0; $i < $len; ++$i) {
3666 102
            $in = self::$ORD[$str[$i]];
3667 102
            if ($mState === 0) {
3668
                // When mState is zero we expect either a US-ASCII character or a
3669
                // multi-octet sequence.
3670 102
                if ((0x80 & $in) === 0) {
3671
                    // US-ASCII, pass straight through.
3672 97
                    $mBytes = 1;
3673 83
                } elseif ((0xE0 & $in) === 0xC0) {
3674
                    // First octet of 2 octet sequence.
3675 73
                    $mUcs4 = $in;
3676 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3677 73
                    $mState = 1;
3678 73
                    $mBytes = 2;
3679 58
                } elseif ((0xF0 & $in) === 0xE0) {
3680
                    // First octet of 3 octet sequence.
3681 42
                    $mUcs4 = $in;
3682 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3683 42
                    $mState = 2;
3684 42
                    $mBytes = 3;
3685 29
                } elseif ((0xF8 & $in) === 0xF0) {
3686
                    // First octet of 4 octet sequence.
3687 18
                    $mUcs4 = $in;
3688 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3689 18
                    $mState = 3;
3690 18
                    $mBytes = 4;
3691 13
                } elseif ((0xFC & $in) === 0xF8) {
3692
                    /* First octet of 5 octet sequence.
3693
                     *
3694
                     * This is illegal because the encoded codepoint must be either
3695
                     * (a) not the shortest form or
3696
                     * (b) outside the Unicode range of 0-0x10FFFF.
3697
                     * Rather than trying to resynchronize, we will carry on until the end
3698
                     * of the sequence and let the later error handling code catch it.
3699
                     */
3700 5
                    $mUcs4 = $in;
3701 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3702 5
                    $mState = 4;
3703 5
                    $mBytes = 5;
3704 10
                } elseif ((0xFE & $in) === 0xFC) {
3705
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3706 5
                    $mUcs4 = $in;
3707 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3708 5
                    $mState = 5;
3709 5
                    $mBytes = 6;
3710
                } else {
3711
                    // Current octet is neither in the US-ASCII range nor a legal first
3712
                    // octet of a multi-octet sequence.
3713 102
                    return false;
3714
                }
3715 83
            } elseif ((0xC0 & $in) === 0x80) {
3716
3717
                // When mState is non-zero, we expect a continuation of the multi-octet
3718
                // sequence
3719
3720
                // Legal continuation.
3721 75
                $shift = ($mState - 1) * 6;
3722 75
                $tmp = $in;
3723 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3724 75
                $mUcs4 |= $tmp;
3725
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3726
                // Unicode code point to be output.
3727 75
                if (--$mState === 0) {
3728
                    // Check for illegal sequences and code points.
3729
                    //
3730
                    // From Unicode 3.1, non-shortest form is illegal
3731
                    if (
3732 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3733
                        ||
3734 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3735
                        ||
3736 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3737
                        ||
3738 75
                        ($mBytes > 4)
3739
                        ||
3740
                        // From Unicode 3.2, surrogate characters are illegal.
3741 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3742
                        ||
3743
                        // Code points outside the Unicode range are illegal.
3744 75
                        ($mUcs4 > 0x10FFFF)
3745
                    ) {
3746 8
                        return false;
3747
                    }
3748
                    // initialize UTF8 cache
3749 75
                    $mState = 0;
3750 75
                    $mUcs4 = 0;
3751 75
                    $mBytes = 1;
3752
                }
3753
            } else {
3754
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3755
                // Incomplete multi-octet sequence.
3756 35
                return false;
3757
            }
3758
        }
3759
3760 67
        return true;
3761
    }
3762
3763
    /**
3764
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3765
     * Decodes a JSON string
3766
     *
3767
     * @see http://php.net/manual/en/function.json-decode.php
3768
     *
3769
     * @param string $json    <p>
3770
     *                        The <i>json</i> string being decoded.
3771
     *                        </p>
3772
     *                        <p>
3773
     *                        This function only works with UTF-8 encoded strings.
3774
     *                        </p>
3775
     *                        <p>PHP implements a superset of
3776
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3777
     *                        only supports these values when they are nested inside an array or an object.
3778
     *                        </p>
3779
     * @param bool   $assoc   [optional] <p>
3780
     *                        When <b>TRUE</b>, returned objects will be converted into
3781
     *                        associative arrays.
3782
     *                        </p>
3783
     * @param int    $depth   [optional] <p>
3784
     *                        User specified recursion depth.
3785
     *                        </p>
3786
     * @param int    $options [optional] <p>
3787
     *                        Bitmask of JSON decode options. Currently only
3788
     *                        <b>JSON_BIGINT_AS_STRING</b>
3789
     *                        is supported (default is to cast large integers as floats)
3790
     *                        </p>
3791
     *
3792
     * @return mixed
3793
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3794
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3795
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3796
     *               is deeper than the recursion limit.
3797
     */
3798 43
    public static function json_decode(
3799
        string $json,
3800
        bool $assoc = false,
3801
        int $depth = 512,
3802
        int $options = 0
3803
    ) {
3804 43
        $json = self::filter($json);
3805
3806 43
        if (self::$SUPPORT['json'] === false) {
3807
            throw new \RuntimeException('ext-json: is not installed');
3808
        }
3809
3810
        /** @noinspection PhpComposerExtensionStubsInspection */
3811 43
        return \json_decode($json, $assoc, $depth, $options);
3812
    }
3813
3814
    /**
3815
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3816
     * Returns the JSON representation of a value.
3817
     *
3818
     * @see http://php.net/manual/en/function.json-encode.php
3819
     *
3820
     * @param mixed $value   <p>
3821
     *                       The <i>value</i> being encoded. Can be any type except
3822
     *                       a resource.
3823
     *                       </p>
3824
     *                       <p>
3825
     *                       All string data must be UTF-8 encoded.
3826
     *                       </p>
3827
     *                       <p>PHP implements a superset of
3828
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3829
     *                       only supports these values when they are nested inside an array or an object.
3830
     *                       </p>
3831
     * @param int   $options [optional] <p>
3832
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3833
     *                       <b>JSON_HEX_TAG</b>,
3834
     *                       <b>JSON_HEX_AMP</b>,
3835
     *                       <b>JSON_HEX_APOS</b>,
3836
     *                       <b>JSON_NUMERIC_CHECK</b>,
3837
     *                       <b>JSON_PRETTY_PRINT</b>,
3838
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3839
     *                       <b>JSON_FORCE_OBJECT</b>,
3840
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3841
     *                       constants is described on
3842
     *                       the JSON constants page.
3843
     *                       </p>
3844
     * @param int   $depth   [optional] <p>
3845
     *                       Set the maximum depth. Must be greater than zero.
3846
     *                       </p>
3847
     *
3848
     * @return false|string
3849
     *                      A JSON encoded <strong>string</strong> on success or<br>
3850
     *                      <strong>FALSE</strong> on failure
3851
     */
3852 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3853
    {
3854 5
        $value = self::filter($value);
3855
3856 5
        if (self::$SUPPORT['json'] === false) {
3857
            throw new \RuntimeException('ext-json: is not installed');
3858
        }
3859
3860
        /** @noinspection PhpComposerExtensionStubsInspection */
3861 5
        return \json_encode($value, $options, $depth);
3862
    }
3863
3864
    /**
3865
     * Checks whether JSON is available on the server.
3866
     *
3867
     * @return bool
3868
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3869
     */
3870
    public static function json_loaded(): bool
3871
    {
3872
        return \function_exists('json_decode');
3873
    }
3874
3875
    /**
3876
     * Makes string's first char lowercase.
3877
     *
3878
     * @param string      $str                   <p>The input string</p>
3879
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3880
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3881
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3882
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3883
     *
3884
     * @return string the resulting string
3885
     */
3886 46
    public static function lcfirst(
3887
        string $str,
3888
        string $encoding = 'UTF-8',
3889
        bool $cleanUtf8 = false,
3890
        string $lang = null,
3891
        bool $tryToKeepStringLength = false
3892
    ): string {
3893 46
        if ($cleanUtf8 === true) {
3894
            $str = self::clean($str);
3895
        }
3896
3897 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3898
3899 46
        if ($encoding === 'UTF-8') {
3900 43
            $strPartTwo = (string) \mb_substr($str, 1);
3901
3902 43
            if ($useMbFunction === true) {
3903 43
                $strPartOne = \mb_strtolower(
3904 43
                    (string) \mb_substr($str, 0, 1)
3905
                );
3906
            } else {
3907
                $strPartOne = self::strtolower(
3908
                    (string) \mb_substr($str, 0, 1),
3909
                    $encoding,
3910
                    false,
3911
                    $lang,
3912 43
                    $tryToKeepStringLength
3913
                );
3914
            }
3915
        } else {
3916 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3917
3918 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3919
3920 3
            $strPartOne = self::strtolower(
3921 3
                (string) self::substr($str, 0, 1, $encoding),
3922 3
                $encoding,
3923 3
                false,
3924 3
                $lang,
3925 3
                $tryToKeepStringLength
3926
            );
3927
        }
3928
3929 46
        return $strPartOne . $strPartTwo;
3930
    }
3931
3932
    /**
3933
     * alias for "UTF8::lcfirst()"
3934
     *
3935
     * @see UTF8::lcfirst()
3936
     *
3937
     * @param string      $str
3938
     * @param string      $encoding
3939
     * @param bool        $cleanUtf8
3940
     * @param string|null $lang
3941
     * @param bool        $tryToKeepStringLength
3942
     *
3943
     * @return string
3944
     */
3945 2
    public static function lcword(
3946
        string $str,
3947
        string $encoding = 'UTF-8',
3948
        bool $cleanUtf8 = false,
3949
        string $lang = null,
3950
        bool $tryToKeepStringLength = false
3951
    ): string {
3952 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3953
    }
3954
3955
    /**
3956
     * Lowercase for all words in the string.
3957
     *
3958
     * @param string      $str                   <p>The input string.</p>
3959
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3960
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3961
     *                                           a new word.</p>
3962
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3963
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3964
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3965
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3966
     *
3967
     * @return string
3968
     */
3969 2
    public static function lcwords(
3970
        string $str,
3971
        array $exceptions = [],
3972
        string $charlist = '',
3973
        string $encoding = 'UTF-8',
3974
        bool $cleanUtf8 = false,
3975
        string $lang = null,
3976
        bool $tryToKeepStringLength = false
3977
    ): string {
3978 2
        if (!$str) {
3979 2
            return '';
3980
        }
3981
3982 2
        $words = self::str_to_words($str, $charlist);
3983 2
        $useExceptions = \count($exceptions) > 0;
3984
3985 2
        foreach ($words as &$word) {
3986 2
            if (!$word) {
3987 2
                continue;
3988
            }
3989
3990
            if (
3991 2
                $useExceptions === false
3992
                ||
3993 2
                !\in_array($word, $exceptions, true)
3994
            ) {
3995 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3996
            }
3997
        }
3998
3999 2
        return \implode('', $words);
4000
    }
4001
4002
    /**
4003
     * alias for "UTF8::lcfirst()"
4004
     *
4005
     * @see UTF8::lcfirst()
4006
     *
4007
     * @param string      $str
4008
     * @param string      $encoding
4009
     * @param bool        $cleanUtf8
4010
     * @param string|null $lang
4011
     * @param bool        $tryToKeepStringLength
4012
     *
4013
     * @return string
4014
     */
4015 5
    public static function lowerCaseFirst(
4016
        string $str,
4017
        string $encoding = 'UTF-8',
4018
        bool $cleanUtf8 = false,
4019
        string $lang = null,
4020
        bool $tryToKeepStringLength = false
4021
    ): string {
4022 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4023
    }
4024
4025
    /**
4026
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4027
     *
4028
     * @param string      $str   <p>The string to be trimmed</p>
4029
     * @param string|null $chars <p>Optional characters to be stripped</p>
4030
     *
4031
     * @return string the string with unwanted characters stripped from the left
4032
     */
4033 22
    public static function ltrim(string $str = '', string $chars = null): string
4034
    {
4035 22
        if ($str === '') {
4036 3
            return '';
4037
        }
4038
4039 21
        if ($chars) {
4040 10
            $chars = \preg_quote($chars, '/');
4041 10
            $pattern = "^[${chars}]+";
4042
        } else {
4043
            $pattern = "^[\s]+";
4044
        }
4045
4046
        if (self::$SUPPORT['mbstring'] === true) {
4047
            /** @noinspection PhpComposerExtensionStubsInspection */
4048
            return (string) \mb_ereg_replace($pattern, '', $str);
4049
        }
4050
4051
        return self::regex_replace($str, $pattern, '', '', '/');
4052
    }
4053
4054
    /**
4055
     * Returns the UTF-8 character with the maximum code point in the given data.
4056
     *
4057
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4058
     *
4059
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4060
     */
4061
    public static function max($arg)
4062
    {
4063 2
        if (\is_array($arg) === true) {
4064 2
            $arg = \implode('', $arg);
4065
        }
4066
4067 2
        $codepoints = self::codepoints($arg, false);
4068 2
        if (\count($codepoints) === 0) {
4069 2
            return null;
4070
        }
4071
4072 2
        $codepoint_max = \max($codepoints);
4073
4074 2
        return self::chr($codepoint_max);
4075
    }
4076
4077
    /**
4078
     * Calculates and returns the maximum number of bytes taken by any
4079
     * UTF-8 encoded character in the given string.
4080
     *
4081
     * @param string $str <p>The original Unicode string.</p>
4082
     *
4083
     * @return int max byte lengths of the given chars
4084
     */
4085
    public static function max_chr_width(string $str): int
4086
    {
4087 2
        $bytes = self::chr_size_list($str);
4088 2
        if (\count($bytes) > 0) {
4089 2
            return (int) \max($bytes);
4090
        }
4091
4092 2
        return 0;
4093
    }
4094
4095
    /**
4096
     * Checks whether mbstring is available on the server.
4097
     *
4098
     * @return bool
4099
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4100
     */
4101
    public static function mbstring_loaded(): bool
4102
    {
4103 27
        return \extension_loaded('mbstring');
4104
    }
4105
4106
    /**
4107
     * Returns the UTF-8 character with the minimum code point in the given data.
4108
     *
4109
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4110
     *
4111
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4112
     */
4113
    public static function min($arg)
4114
    {
4115 2
        if (\is_array($arg) === true) {
4116 2
            $arg = \implode('', $arg);
4117
        }
4118
4119 2
        $codepoints = self::codepoints($arg, false);
4120 2
        if (\count($codepoints) === 0) {
4121 2
            return null;
4122
        }
4123
4124 2
        $codepoint_min = \min($codepoints);
4125
4126 2
        return self::chr($codepoint_min);
4127
    }
4128
4129
    /**
4130
     * alias for "UTF8::normalize_encoding()"
4131
     *
4132
     * @see        UTF8::normalize_encoding()
4133
     *
4134
     * @param mixed $encoding
4135
     * @param mixed $fallback
4136
     *
4137
     * @return mixed
4138
     *
4139
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4140
     */
4141
    public static function normalizeEncoding($encoding, $fallback = '')
4142
    {
4143 2
        return self::normalize_encoding($encoding, $fallback);
4144
    }
4145
4146
    /**
4147
     * Normalize the encoding-"name" input.
4148
     *
4149
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4150
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4151
     *
4152
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4153
     */
4154
    public static function normalize_encoding($encoding, $fallback = '')
4155
    {
4156 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4157
4158
        // init
4159 323
        $encoding = (string) $encoding;
4160
4161 323
        if (!$encoding) {
4162 278
            return $fallback;
4163
        }
4164
4165
        if (
4166 50
            $encoding === 'UTF-8'
4167
            ||
4168 50
            $encoding === 'UTF8'
4169
        ) {
4170 24
            return 'UTF-8';
4171
        }
4172
4173
        if (
4174 43
            $encoding === '8BIT'
4175
            ||
4176 43
            $encoding === 'BINARY'
4177
        ) {
4178
            return 'CP850';
4179
        }
4180
4181
        if (
4182 43
            $encoding === 'HTML'
4183
            ||
4184 43
            $encoding === 'HTML-ENTITIES'
4185
        ) {
4186 2
            return 'HTML-ENTITIES';
4187
        }
4188
4189
        if (
4190 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4191
            ||
4192 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4193
        ) {
4194 1
            return $fallback;
4195
        }
4196
4197 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4198 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4199
        }
4200
4201 6
        if (self::$ENCODINGS === null) {
4202 1
            self::$ENCODINGS = self::getData('encodings');
4203
        }
4204
4205 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4206 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4207
4208 4
            return $encoding;
4209
        }
4210
4211 5
        $encodingOrig = $encoding;
4212 5
        $encoding = \strtoupper($encoding);
4213 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4214
4215
        $equivalences = [
4216 5
            'ISO8859'     => 'ISO-8859-1',
4217
            'ISO88591'    => 'ISO-8859-1',
4218
            'ISO'         => 'ISO-8859-1',
4219
            'LATIN'       => 'ISO-8859-1',
4220
            'LATIN1'      => 'ISO-8859-1', // Western European
4221
            'ISO88592'    => 'ISO-8859-2',
4222
            'LATIN2'      => 'ISO-8859-2', // Central European
4223
            'ISO88593'    => 'ISO-8859-3',
4224
            'LATIN3'      => 'ISO-8859-3', // Southern European
4225
            'ISO88594'    => 'ISO-8859-4',
4226
            'LATIN4'      => 'ISO-8859-4', // Northern European
4227
            'ISO88595'    => 'ISO-8859-5',
4228
            'ISO88596'    => 'ISO-8859-6', // Greek
4229
            'ISO88597'    => 'ISO-8859-7',
4230
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4231
            'ISO88599'    => 'ISO-8859-9',
4232
            'LATIN5'      => 'ISO-8859-9', // Turkish
4233
            'ISO885911'   => 'ISO-8859-11',
4234
            'TIS620'      => 'ISO-8859-11', // Thai
4235
            'ISO885910'   => 'ISO-8859-10',
4236
            'LATIN6'      => 'ISO-8859-10', // Nordic
4237
            'ISO885913'   => 'ISO-8859-13',
4238
            'LATIN7'      => 'ISO-8859-13', // Baltic
4239
            'ISO885914'   => 'ISO-8859-14',
4240
            'LATIN8'      => 'ISO-8859-14', // Celtic
4241
            'ISO885915'   => 'ISO-8859-15',
4242
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4243
            'ISO885916'   => 'ISO-8859-16',
4244
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4245
            'CP1250'      => 'WINDOWS-1250',
4246
            'WIN1250'     => 'WINDOWS-1250',
4247
            'WINDOWS1250' => 'WINDOWS-1250',
4248
            'CP1251'      => 'WINDOWS-1251',
4249
            'WIN1251'     => 'WINDOWS-1251',
4250
            'WINDOWS1251' => 'WINDOWS-1251',
4251
            'CP1252'      => 'WINDOWS-1252',
4252
            'WIN1252'     => 'WINDOWS-1252',
4253
            'WINDOWS1252' => 'WINDOWS-1252',
4254
            'CP1253'      => 'WINDOWS-1253',
4255
            'WIN1253'     => 'WINDOWS-1253',
4256
            'WINDOWS1253' => 'WINDOWS-1253',
4257
            'CP1254'      => 'WINDOWS-1254',
4258
            'WIN1254'     => 'WINDOWS-1254',
4259
            'WINDOWS1254' => 'WINDOWS-1254',
4260
            'CP1255'      => 'WINDOWS-1255',
4261
            'WIN1255'     => 'WINDOWS-1255',
4262
            'WINDOWS1255' => 'WINDOWS-1255',
4263
            'CP1256'      => 'WINDOWS-1256',
4264
            'WIN1256'     => 'WINDOWS-1256',
4265
            'WINDOWS1256' => 'WINDOWS-1256',
4266
            'CP1257'      => 'WINDOWS-1257',
4267
            'WIN1257'     => 'WINDOWS-1257',
4268
            'WINDOWS1257' => 'WINDOWS-1257',
4269
            'CP1258'      => 'WINDOWS-1258',
4270
            'WIN1258'     => 'WINDOWS-1258',
4271
            'WINDOWS1258' => 'WINDOWS-1258',
4272
            'UTF16'       => 'UTF-16',
4273
            'UTF32'       => 'UTF-32',
4274
            'UTF8'        => 'UTF-8',
4275
            'UTF'         => 'UTF-8',
4276
            'UTF7'        => 'UTF-7',
4277
            '8BIT'        => 'CP850',
4278
            'BINARY'      => 'CP850',
4279
        ];
4280
4281 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4282 4
            $encoding = $equivalences[$encodingUpperHelper];
4283
        }
4284
4285 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4286
4287 5
        return $encoding;
4288
    }
4289
4290
    /**
4291
     * Standardize line ending to unix-like.
4292
     *
4293
     * @param string $str
4294
     *
4295
     * @return string
4296
     */
4297
    public static function normalize_line_ending(string $str): string
4298
    {
4299 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4300
    }
4301
4302
    /**
4303
     * Normalize some MS Word special characters.
4304
     *
4305
     * @param string $str <p>The string to be normalized.</p>
4306
     *
4307
     * @return string
4308
     */
4309
    public static function normalize_msword(string $str): string
4310
    {
4311 38
        if ($str === '') {
4312 2
            return '';
4313
        }
4314
4315
        $keys = [
4316 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4317
            "\xc2\xbb", // » (U+00BB) in UTF-8
4318
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4319
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4320
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4321
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4322
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4323
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4324
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4325
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4326
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4327
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4328
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4329
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4330
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4331
        ];
4332
4333
        $values = [
4334 38
            '"', // « (U+00AB) in UTF-8
4335
            '"', // » (U+00BB) in UTF-8
4336
            "'", // ‘ (U+2018) in UTF-8
4337
            "'", // ’ (U+2019) in UTF-8
4338
            "'", // ‚ (U+201A) in UTF-8
4339
            "'", // ‛ (U+201B) in UTF-8
4340
            '"', // “ (U+201C) in UTF-8
4341
            '"', // ” (U+201D) in UTF-8
4342
            '"', // „ (U+201E) in UTF-8
4343
            '"', // ‟ (U+201F) in UTF-8
4344
            "'", // ‹ (U+2039) in UTF-8
4345
            "'", // › (U+203A) in UTF-8
4346
            '-', // – (U+2013) in UTF-8
4347
            '-', // — (U+2014) in UTF-8
4348
            '...', // … (U+2026) in UTF-8
4349
        ];
4350
4351 38
        return \str_replace($keys, $values, $str);
4352
    }
4353
4354
    /**
4355
     * Normalize the whitespace.
4356
     *
4357
     * @param string $str                     <p>The string to be normalized.</p>
4358
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4359
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4360
     *                                        bidirectional text chars.</p>
4361
     *
4362
     * @return string
4363
     */
4364
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4365
    {
4366 86
        if ($str === '') {
4367 9
            return '';
4368
        }
4369
4370 86
        static $WHITESPACE_CACHE = [];
4371 86
        $cacheKey = (int) $keepNonBreakingSpace;
4372
4373 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4374 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4375
4376 2
            if ($keepNonBreakingSpace === true) {
4377 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4378
            }
4379
4380 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4381
        }
4382
4383 86
        if ($keepBidiUnicodeControls === false) {
4384 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4385
4386 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4387 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4388
            }
4389
4390 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4391
        }
4392
4393 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4394
    }
4395
4396
    /**
4397
     * Calculates Unicode code point of the given UTF-8 encoded character.
4398
     *
4399
     * INFO: opposite to UTF8::chr()
4400
     *
4401
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4402
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4403
     *
4404
     * @return int
4405
     *             Unicode code point of the given character,<br>
4406
     *             0 on invalid UTF-8 byte sequence
4407
     */
4408
    public static function ord($chr, string $encoding = 'UTF-8'): int
4409
    {
4410 30
        static $CHAR_CACHE = [];
4411
4412
        // init
4413 30
        $chr = (string) $chr;
4414
4415 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4416 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4417
        }
4418
4419 30
        $cacheKey = $chr . $encoding;
4420 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4421 30
            return $CHAR_CACHE[$cacheKey];
4422
        }
4423
4424
        // check again, if it's still not UTF-8
4425 12
        if ($encoding !== 'UTF-8') {
4426 3
            $chr = self::encode($encoding, $chr);
4427
        }
4428
4429 12
        if (self::$ORD === null) {
4430
            self::$ORD = self::getData('ord');
4431
        }
4432
4433 12
        if (isset(self::$ORD[$chr])) {
4434 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4435
        }
4436
4437
        //
4438
        // fallback via "IntlChar"
4439
        //
4440
4441 6
        if (self::$SUPPORT['intlChar'] === true) {
4442
            /** @noinspection PhpComposerExtensionStubsInspection */
4443 5
            $code = \IntlChar::ord($chr);
4444 5
            if ($code) {
4445 5
                return $CHAR_CACHE[$cacheKey] = $code;
4446
            }
4447
        }
4448
4449
        //
4450
        // fallback via vanilla php
4451
        //
4452
4453
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4454 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4455 1
        $code = $chr ? $chr[1] : 0;
4456
4457 1
        if ($code >= 0xF0 && isset($chr[4])) {
4458
            /** @noinspection UnnecessaryCastingInspection */
4459
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4460
        }
4461
4462 1
        if ($code >= 0xE0 && isset($chr[3])) {
4463
            /** @noinspection UnnecessaryCastingInspection */
4464 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4465
        }
4466
4467 1
        if ($code >= 0xC0 && isset($chr[2])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4470
        }
4471
4472
        return $CHAR_CACHE[$cacheKey] = $code;
4473
    }
4474
4475
    /**
4476
     * Parses the string into an array (into the the second parameter).
4477
     *
4478
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4479
     *          if the second parameter is not set!
4480
     *
4481
     * @see http://php.net/manual/en/function.parse-str.php
4482
     *
4483
     * @param string $str       <p>The input string.</p>
4484
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4485
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4486
     *
4487
     * @return bool
4488
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4489
     */
4490
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4491
    {
4492 2
        if ($cleanUtf8 === true) {
4493 2
            $str = self::clean($str);
4494
        }
4495
4496 2
        if (self::$SUPPORT['mbstring'] === true) {
4497 2
            $return = \mb_parse_str($str, $result);
4498
4499 2
            return $return !== false && $result !== [];
4500
        }
4501
4502
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4503
        \parse_str($str, $result);
4504
4505
        return $result !== [];
4506
    }
4507
4508
    /**
4509
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4510
     *
4511
     * @return bool
4512
     *              <strong>true</strong> if support is available,<br>
4513
     *              <strong>false</strong> otherwise
4514
     */
4515
    public static function pcre_utf8_support(): bool
4516
    {
4517
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4518 102
        return (bool) @\preg_match('//u', '');
4519
    }
4520
4521
    /**
4522
     * Create an array containing a range of UTF-8 characters.
4523
     *
4524
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4525
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4526
     *
4527
     * @return string[]
4528
     */
4529
    public static function range($var1, $var2): array
4530
    {
4531 2
        if (!$var1 || !$var2) {
4532 2
            return [];
4533
        }
4534
4535 2
        if (self::$SUPPORT['ctype'] === false) {
4536
            throw new \RuntimeException('ext-ctype: is not installed');
4537
        }
4538
4539
        /** @noinspection PhpComposerExtensionStubsInspection */
4540 2
        if (\ctype_digit((string) $var1)) {
4541 2
            $start = (int) $var1;
4542 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4543
            $start = (int) self::hex_to_int($var1);
4544
        } else {
4545 2
            $start = self::ord($var1);
4546
        }
4547
4548 2
        if (!$start) {
4549
            return [];
4550
        }
4551
4552
        /** @noinspection PhpComposerExtensionStubsInspection */
4553 2
        if (\ctype_digit((string) $var2)) {
4554 2
            $end = (int) $var2;
4555 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4556
            $end = (int) self::hex_to_int($var2);
4557
        } else {
4558 2
            $end = self::ord($var2);
4559
        }
4560
4561 2
        if (!$end) {
4562
            return [];
4563
        }
4564
4565 2
        return \array_map(
4566
            static function (int $i): string {
4567 2
                return (string) self::chr($i);
4568 2
            },
4569 2
            \range($start, $end)
4570
        );
4571
    }
4572
4573
    /**
4574
     * Multi decode html entity & fix urlencoded-win1252-chars.
4575
     *
4576
     * e.g:
4577
     * 'test+test'                     => 'test+test'
4578
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4579
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4580
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4581
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4582
     * 'Düsseldorf'                   => 'Düsseldorf'
4583
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4584
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4585
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4586
     *
4587
     * @param string $str          <p>The input string.</p>
4588
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4589
     *
4590
     * @return string
4591
     */
4592
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4593
    {
4594 3
        if ($str === '') {
4595 2
            return '';
4596
        }
4597
4598 3
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
4599 3
        if (\preg_match($pattern, $str)) {
4600 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4601
        }
4602
4603 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4604
4605 3
        if ($multi_decode === true) {
4606
            do {
4607 3
                $str_compare = $str;
4608
4609
                /**
4610
                 * @psalm-suppress PossiblyInvalidArgument
4611
                 */
4612 3
                $str = self::fix_simple_utf8(
4613 3
                    \rawurldecode(
4614 3
                        self::html_entity_decode(
4615 3
                            self::to_utf8($str),
4616 3
                            $flags
4617
                        )
4618
                    )
4619
                );
4620 3
            } while ($str_compare !== $str);
4621
        }
4622
4623 3
        return $str;
4624
    }
4625
4626
    /**
4627
     * Replaces all occurrences of $pattern in $str by $replacement.
4628
     *
4629
     * @param string $str         <p>The input string.</p>
4630
     * @param string $pattern     <p>The regular expression pattern.</p>
4631
     * @param string $replacement <p>The string to replace with.</p>
4632
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4633
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4634
     *
4635
     * @return string
4636
     */
4637
    public static function regex_replace(
4638
        string $str,
4639
        string $pattern,
4640
        string $replacement,
4641
        string $options = '',
4642
        string $delimiter = '/'
4643
    ): string {
4644 18
        if ($options === 'msr') {
4645 9
            $options = 'ms';
4646
        }
4647
4648
        // fallback
4649 18
        if (!$delimiter) {
4650
            $delimiter = '/';
4651
        }
4652
4653 18
        return (string) \preg_replace(
4654 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4655 18
            $replacement,
4656 18
            $str
4657
        );
4658
    }
4659
4660
    /**
4661
     * alias for "UTF8::remove_bom()"
4662
     *
4663
     * @see        UTF8::remove_bom()
4664
     *
4665
     * @param string $str
4666
     *
4667
     * @return string
4668
     *
4669
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4670
     */
4671
    public static function removeBOM(string $str): string
4672
    {
4673
        return self::remove_bom($str);
4674
    }
4675
4676
    /**
4677
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4678
     *
4679
     * @param string $str <p>The input string.</p>
4680
     *
4681
     * @return string string without UTF-BOM
4682
     */
4683
    public static function remove_bom(string $str): string
4684
    {
4685 82
        if ($str === '') {
4686 9
            return '';
4687
        }
4688
4689 82
        $strLength = \strlen($str);
4690 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4691 82
            if (\strpos($str, $bomString, 0) === 0) {
4692 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4693 10
                if ($strTmp === false) {
4694
                    return '';
4695
                }
4696
4697 10
                $strLength -= (int) $bomByteLength;
4698
4699 82
                $str = (string) $strTmp;
4700
            }
4701
        }
4702
4703 82
        return $str;
4704
    }
4705
4706
    /**
4707
     * Removes duplicate occurrences of a string in another string.
4708
     *
4709
     * @param string          $str  <p>The base string.</p>
4710
     * @param string|string[] $what <p>String to search for in the base string.</p>
4711
     *
4712
     * @return string the result string with removed duplicates
4713
     */
4714
    public static function remove_duplicates(string $str, $what = ' '): string
4715
    {
4716 2
        if (\is_string($what) === true) {
4717 2
            $what = [$what];
4718
        }
4719
4720 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4721
            /** @noinspection ForeachSourceInspection */
4722 2
            foreach ($what as $item) {
4723 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4724
            }
4725
        }
4726
4727 2
        return $str;
4728
    }
4729
4730
    /**
4731
     * Remove html via "strip_tags()" from the string.
4732
     *
4733
     * @param string $str
4734
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4735
     *                              not be stripped. Default: null
4736
     *                              </p>
4737
     *
4738
     * @return string
4739
     */
4740
    public static function remove_html(string $str, string $allowableTags = ''): string
4741
    {
4742 6
        return \strip_tags($str, $allowableTags);
4743
    }
4744
4745
    /**
4746
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4747
     *
4748
     * @param string $str
4749
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4750
     *
4751
     * @return string
4752
     */
4753
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4754
    {
4755 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4756
    }
4757
4758
    /**
4759
     * Remove invisible characters from a string.
4760
     *
4761
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4762
     *
4763
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4764
     *
4765
     * @param string $str
4766
     * @param bool   $url_encoded
4767
     * @param string $replacement
4768
     *
4769
     * @return string
4770
     */
4771
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4772
    {
4773
        // init
4774 115
        $non_displayables = [];
4775
4776
        // every control character except newline (dec 10),
4777
        // carriage return (dec 13) and horizontal tab (dec 09)
4778 115
        if ($url_encoded) {
4779 115
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4780 115
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4781
        }
4782
4783 115
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4784
4785
        do {
4786 115
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4787 115
        } while ($count !== 0);
4788
4789 115
        return $str;
4790
    }
4791
4792
    /**
4793
     * Returns a new string with the prefix $substring removed, if present.
4794
     *
4795
     * @param string $str
4796
     * @param string $substring <p>The prefix to remove.</p>
4797
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4798
     *
4799
     * @return string string without the prefix $substring
4800
     */
4801
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4802
    {
4803 12
        if ($substring && \strpos($str, $substring) === 0) {
4804 6
            if ($encoding === 'UTF-8') {
4805 4
                return (string) \mb_substr(
4806 4
                    $str,
4807 4
                    (int) \mb_strlen($substring)
4808
                );
4809
            }
4810
4811 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4812
4813 2
            return (string) self::substr(
4814 2
                $str,
4815 2
                (int) self::strlen($substring, $encoding),
4816 2
                null,
4817 2
                $encoding
4818
            );
4819
        }
4820
4821 6
        return $str;
4822
    }
4823
4824
    /**
4825
     * Returns a new string with the suffix $substring removed, if present.
4826
     *
4827
     * @param string $str
4828
     * @param string $substring <p>The suffix to remove.</p>
4829
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4830
     *
4831
     * @return string string having a $str without the suffix $substring
4832
     */
4833
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4834
    {
4835 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4836 6
            if ($encoding === 'UTF-8') {
4837 4
                return (string) \mb_substr(
4838 4
                    $str,
4839 4
                    0,
4840 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4841
                );
4842
            }
4843
4844 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4845
4846 2
            return (string) self::substr(
4847 2
                $str,
4848 2
                0,
4849 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4850 2
                $encoding
4851
            );
4852
        }
4853
4854 6
        return $str;
4855
    }
4856
4857
    /**
4858
     * Replaces all occurrences of $search in $str by $replacement.
4859
     *
4860
     * @param string $str           <p>The input string.</p>
4861
     * @param string $search        <p>The needle to search for.</p>
4862
     * @param string $replacement   <p>The string to replace with.</p>
4863
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4864
     *
4865
     * @return string string after the replacements
4866
     */
4867
    public static function replace(
4868
        string $str,
4869
        string $search,
4870
        string $replacement,
4871
        bool $caseSensitive = true
4872
    ): string {
4873 29
        if ($caseSensitive) {
4874 22
            return \str_replace($search, $replacement, $str);
4875
        }
4876
4877 7
        return self::str_ireplace($search, $replacement, $str);
4878
    }
4879
4880
    /**
4881
     * Replaces all occurrences of $search in $str by $replacement.
4882
     *
4883
     * @param string       $str           <p>The input string.</p>
4884
     * @param array        $search        <p>The elements to search for.</p>
4885
     * @param array|string $replacement   <p>The string to replace with.</p>
4886
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4887
     *
4888
     * @return string string after the replacements
4889
     */
4890
    public static function replace_all(
4891
        string $str,
4892
        array $search,
4893
        $replacement,
4894
        bool $caseSensitive = true
4895
    ): string {
4896 30
        if ($caseSensitive) {
4897 23
            return \str_replace($search, $replacement, $str);
4898
        }
4899
4900 7
        return self::str_ireplace($search, $replacement, $str);
4901
    }
4902
4903
    /**
4904
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4905
     *
4906
     * @param string $str                <p>The input string</p>
4907
     * @param string $replacementChar    <p>The replacement character.</p>
4908
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4909
     *
4910
     * @return string
4911
     */
4912
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4913
    {
4914 62
        if ($str === '') {
4915 9
            return '';
4916
        }
4917
4918 62
        if ($processInvalidUtf8 === true) {
4919 62
            $replacementCharHelper = $replacementChar;
4920 62
            if ($replacementChar === '') {
4921 62
                $replacementCharHelper = 'none';
4922
            }
4923
4924 62
            if (self::$SUPPORT['mbstring'] === false) {
4925
                // if there is no native support for "mbstring",
4926
                // then we need to clean the string before ...
4927
                $str = self::clean($str);
4928
            }
4929
4930 62
            $save = \mb_substitute_character();
4931 62
            \mb_substitute_character($replacementCharHelper);
4932
            // the polyfill maybe return false, so cast to string
4933 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4934 62
            \mb_substitute_character($save);
4935
        }
4936
4937 62
        return \str_replace(
4938
            [
4939 62
                "\xEF\xBF\xBD",
4940
                '�',
4941
            ],
4942
            [
4943 62
                $replacementChar,
4944 62
                $replacementChar,
4945
            ],
4946 62
            $str
4947
        );
4948
    }
4949
4950
    /**
4951
     * Strip whitespace or other characters from end of a UTF-8 string.
4952
     *
4953
     * @param string      $str   <p>The string to be trimmed.</p>
4954
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4955
     *
4956
     * @return string the string with unwanted characters stripped from the right
4957
     */
4958
    public static function rtrim(string $str = '', string $chars = null): string
4959
    {
4960 20
        if ($str === '') {
4961 3
            return '';
4962
        }
4963
4964 19
        if ($chars) {
4965 8
            $chars = \preg_quote($chars, '/');
4966 8
            $pattern = "[${chars}]+\$";
4967
        } else {
4968 14
            $pattern = "[\s]+\$";
4969
        }
4970
4971 19
        if (self::$SUPPORT['mbstring'] === true) {
4972
            /** @noinspection PhpComposerExtensionStubsInspection */
4973 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4974
        }
4975
4976
        return self::regex_replace($str, $pattern, '', '', '/');
4977
    }
4978
4979
    /**
4980
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4981
     */
4982
    public static function showSupport()
4983
    {
4984 2
        echo '<pre>';
4985 2
        foreach (self::$SUPPORT as $key => &$value) {
4986 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4987
        }
4988 2
        unset($value);
4989 2
        echo '</pre>';
4990 2
    }
4991
4992
    /**
4993
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4994
     *
4995
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4996
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4997
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4998
     *
4999
     * @return string the HTML numbered entity
5000
     */
5001
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5002
    {
5003 2
        if ($char === '') {
5004 2
            return '';
5005
        }
5006
5007
        if (
5008 2
            $keepAsciiChars === true
5009
            &&
5010 2
            self::is_ascii($char) === true
5011
        ) {
5012 2
            return $char;
5013
        }
5014
5015 2
        return '&#' . self::ord($char, $encoding) . ';';
5016
    }
5017
5018
    /**
5019
     * @param string $str
5020
     * @param int    $tabLength
5021
     *
5022
     * @return string
5023
     */
5024
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5025
    {
5026 5
        if ($tabLength === 4) {
5027 3
            $tab = '    ';
5028 2
        } elseif ($tabLength === 2) {
5029 1
            $tab = '  ';
5030
        } else {
5031 1
            $tab = \str_repeat(' ', $tabLength);
5032
        }
5033
5034 5
        return \str_replace($tab, "\t", $str);
5035
    }
5036
5037
    /**
5038
     * Convert a string to an array of Unicode characters.
5039
     *
5040
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5041
     * @param int                       $length             [optional] <p>Max character length of each array
5042
     *                                                      element.</p>
5043
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5044
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5045
     *                                                      "mb_substr"</p>
5046
     *
5047
     * @return array
5048
     *               <p>An array containing chunks of the input.</p>
5049
     */
5050
    public static function str_split(
5051
        $str,
5052
        int $length = 1,
5053
        bool $cleanUtf8 = false,
5054
        bool $tryToUseMbFunction = true
5055
    ): array {
5056 87
        if ($length <= 0) {
5057 3
            return [];
5058
        }
5059
5060 86
        if (\is_array($str) === true) {
5061 2
            foreach ($str as $k => &$v) {
5062 2
                $v = self::str_split(
5063 2
                    $v,
5064 2
                    $length,
5065 2
                    $cleanUtf8,
5066 2
                    $tryToUseMbFunction
5067
                );
5068
            }
5069
5070 2
            return $str;
5071
        }
5072
5073
        // init
5074 86
        $str = (string) $str;
5075
5076 86
        if ($str === '') {
5077 13
            return [];
5078
        }
5079
5080 83
        if ($cleanUtf8 === true) {
5081 19
            $str = self::clean($str);
5082
        }
5083
5084
        if (
5085 83
            $tryToUseMbFunction === true
5086
            &&
5087 83
            self::$SUPPORT['mbstring'] === true
5088
        ) {
5089 79
            $iMax = \mb_strlen($str);
5090 79
            if ($iMax <= 127) {
5091 73
                $ret = [];
5092 73
                for ($i = 0; $i < $iMax; ++$i) {
5093 73
                    $ret[] = \mb_substr($str, $i, 1);
5094
                }
5095
            } else {
5096 15
                $retArray = [];
5097 15
                \preg_match_all('/./us', $str, $retArray);
5098 79
                $ret = $retArray[0] ?? [];
5099
            }
5100 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5101 17
            $retArray = [];
5102 17
            \preg_match_all('/./us', $str, $retArray);
5103 17
            $ret = $retArray[0] ?? [];
5104
        } else {
5105
5106
            // fallback
5107
5108 8
            $ret = [];
5109 8
            $len = \strlen($str);
5110
5111
            /** @noinspection ForeachInvariantsInspection */
5112 8
            for ($i = 0; $i < $len; ++$i) {
5113 8
                if (($str[$i] & "\x80") === "\x00") {
5114 8
                    $ret[] = $str[$i];
5115
                } elseif (
5116 8
                    isset($str[$i + 1])
5117
                    &&
5118 8
                    ($str[$i] & "\xE0") === "\xC0"
5119
                ) {
5120 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5121 4
                        $ret[] = $str[$i] . $str[$i + 1];
5122
5123 4
                        ++$i;
5124
                    }
5125
                } elseif (
5126 6
                    isset($str[$i + 2])
5127
                    &&
5128 6
                    ($str[$i] & "\xF0") === "\xE0"
5129
                ) {
5130
                    if (
5131 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5132
                        &&
5133 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5134
                    ) {
5135 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5136
5137 6
                        $i += 2;
5138
                    }
5139
                } elseif (
5140
                    isset($str[$i + 3])
5141
                    &&
5142
                    ($str[$i] & "\xF8") === "\xF0"
5143
                ) {
5144
                    if (
5145
                        ($str[$i + 1] & "\xC0") === "\x80"
5146
                        &&
5147
                        ($str[$i + 2] & "\xC0") === "\x80"
5148
                        &&
5149
                        ($str[$i + 3] & "\xC0") === "\x80"
5150
                    ) {
5151
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5152
5153
                        $i += 3;
5154
                    }
5155
                }
5156
            }
5157
        }
5158
5159 83
        if ($length > 1) {
5160 11
            $ret = \array_chunk($ret, $length);
5161
5162 11
            return \array_map(
5163
                static function (array &$item): string {
5164 11
                    return \implode('', $item);
5165 11
                },
5166 11
                $ret
5167
            );
5168
        }
5169
5170 76
        if (isset($ret[0]) && $ret[0] === '') {
5171
            return [];
5172
        }
5173
5174 76
        return $ret;
5175
    }
5176
5177
    /**
5178
     * Returns a camelCase version of the string. Trims surrounding spaces,
5179
     * capitalizes letters following digits, spaces, dashes and underscores,
5180
     * and removes spaces, dashes, as well as underscores.
5181
     *
5182
     * @param string      $str                   <p>The input string.</p>
5183
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5184
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5185
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5186
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5187
     *
5188
     * @return string
5189
     */
5190
    public static function str_camelize(
5191
        string $str,
5192
        string $encoding = 'UTF-8',
5193
        bool $cleanUtf8 = false,
5194
        string $lang = null,
5195
        bool $tryToKeepStringLength = false
5196
    ): string {
5197 32
        if ($cleanUtf8 === true) {
5198
            $str = self::clean($str);
5199
        }
5200
5201 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5202 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5203
        }
5204
5205 32
        $str = self::lcfirst(
5206 32
            \trim($str),
5207 32
            $encoding,
5208 32
            false,
5209 32
            $lang,
5210 32
            $tryToKeepStringLength
5211
        );
5212 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5213
5214 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5215
5216 32
        $str = (string) \preg_replace_callback(
5217 32
            '/[-_\s]+(.)?/u',
5218
            /**
5219
             * @param array $match
5220
             *
5221
             * @return string
5222
             */
5223
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5224 27
                if (isset($match[1])) {
5225 27
                    if ($useMbFunction === true) {
5226 27
                        if ($encoding === 'UTF-8') {
5227 27
                            return \mb_strtoupper($match[1]);
5228
                        }
5229
5230
                        return \mb_strtoupper($match[1], $encoding);
5231
                    }
5232
5233
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5234
                }
5235
5236 1
                return '';
5237 32
            },
5238 32
            $str
5239
        );
5240
5241 32
        return (string) \preg_replace_callback(
5242 32
            '/[\d]+(.)?/u',
5243
            /**
5244
             * @param array $match
5245
             *
5246
             * @return string
5247
             */
5248
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5249 6
                if ($useMbFunction === true) {
5250 6
                    if ($encoding === 'UTF-8') {
5251 6
                        return \mb_strtoupper($match[0]);
5252
                    }
5253
5254
                    return \mb_strtoupper($match[0], $encoding);
5255
                }
5256
5257
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5258 32
            },
5259 32
            $str
5260
        );
5261
    }
5262
5263
    /**
5264
     * Returns the string with the first letter of each word capitalized,
5265
     * except for when the word is a name which shouldn't be capitalized.
5266
     *
5267
     * @param string $str
5268
     *
5269
     * @return string string with $str capitalized
5270
     */
5271
    public static function str_capitalize_name(string $str): string
5272
    {
5273 1
        return self::str_capitalize_name_helper(
5274 1
            self::str_capitalize_name_helper(
5275 1
                self::collapse_whitespace($str),
5276 1
                ' '
5277
            ),
5278 1
            '-'
5279
        );
5280
    }
5281
5282
    /**
5283
     * Returns true if the string contains $needle, false otherwise. By default
5284
     * the comparison is case-sensitive, but can be made insensitive by setting
5285
     * $caseSensitive to false.
5286
     *
5287
     * @param string $haystack      <p>The input string.</p>
5288
     * @param string $needle        <p>Substring to look for.</p>
5289
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5290
     *
5291
     * @return bool whether or not $haystack contains $needle
5292
     */
5293
    public static function str_contains(
5294
        string $haystack,
5295
        string $needle,
5296
        bool $caseSensitive = true
5297
    ): bool {
5298 21
        if ($caseSensitive) {
5299 11
            return \strpos($haystack, $needle) !== false;
5300
        }
5301
5302 10
        return \mb_stripos($haystack, $needle) !== false;
5303
    }
5304
5305
    /**
5306
     * Returns true if the string contains all $needles, false otherwise. By
5307
     * default the comparison is case-sensitive, but can be made insensitive by
5308
     * setting $caseSensitive to false.
5309
     *
5310
     * @param string $haystack      <p>The input string.</p>
5311
     * @param array  $needles       <p>SubStrings to look for.</p>
5312
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5313
     *
5314
     * @return bool whether or not $haystack contains $needle
5315
     */
5316
    public static function str_contains_all(
5317
        string $haystack,
5318
        array $needles,
5319
        bool $caseSensitive = true
5320
    ): bool {
5321 44
        if ($haystack === '' || $needles === []) {
5322 1
            return false;
5323
        }
5324
5325
        /** @noinspection LoopWhichDoesNotLoopInspection */
5326 43
        foreach ($needles as &$needle) {
5327 43
            if (!$needle) {
5328 1
                return false;
5329
            }
5330
5331 42
            if ($caseSensitive) {
5332 22
                return \strpos($haystack, $needle) !== false;
5333
            }
5334
5335 20
            return \mb_stripos($haystack, $needle) !== false;
5336
        }
5337
5338
        return true;
5339
    }
5340
5341
    /**
5342
     * Returns true if the string contains any $needles, false otherwise. By
5343
     * default the comparison is case-sensitive, but can be made insensitive by
5344
     * setting $caseSensitive to false.
5345
     *
5346
     * @param string $haystack      <p>The input string.</p>
5347
     * @param array  $needles       <p>SubStrings to look for.</p>
5348
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5349
     *
5350
     * @return bool
5351
     *              Whether or not $str contains $needle
5352
     */
5353
    public static function str_contains_any(
5354
        string $haystack,
5355
        array $needles,
5356
        bool $caseSensitive = true
5357
    ): bool {
5358 43
        if ($haystack === '' || $needles === []) {
5359 1
            return false;
5360
        }
5361
5362
        /** @noinspection LoopWhichDoesNotLoopInspection */
5363 42
        foreach ($needles as &$needle) {
5364 42
            if (!$needle) {
5365
                return false;
5366
            }
5367
5368 42
            if ($caseSensitive) {
5369 22
                return \strpos($haystack, $needle) !== false;
5370
            }
5371
5372 20
            return \mb_stripos($haystack, $needle) !== false;
5373
        }
5374
5375
        return false;
5376
    }
5377
5378
    /**
5379
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5380
     * inserted before uppercase characters (with the exception of the first
5381
     * character of the string), and in place of spaces as well as underscores.
5382
     *
5383
     * @param string $str      <p>The input string.</p>
5384
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5385
     *
5386
     * @return string
5387
     */
5388
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5389
    {
5390 19
        return self::str_delimit($str, '-', $encoding);
5391
    }
5392
5393
    /**
5394
     * Returns a lowercase and trimmed string separated by the given delimiter.
5395
     * Delimiters are inserted before uppercase characters (with the exception
5396
     * of the first character of the string), and in place of spaces, dashes,
5397
     * and underscores. Alpha delimiters are not converted to lowercase.
5398
     *
5399
     * @param string      $str                   <p>The input string.</p>
5400
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5401
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5402
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5403
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5404
     *                                           tr</p>
5405
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5406
     *                                           ß</p>
5407
     *
5408
     * @return string
5409
     */
5410
    public static function str_delimit(
5411
        string $str,
5412
        string $delimiter,
5413
        string $encoding = 'UTF-8',
5414
        bool $cleanUtf8 = false,
5415
        string $lang = null,
5416
        bool $tryToKeepStringLength = false
5417
    ): string {
5418 49
        if (self::$SUPPORT['mbstring'] === true) {
5419
            /** @noinspection PhpComposerExtensionStubsInspection */
5420 49
            $str = (string) \mb_ereg_replace('\B([A-Z])', '-\1', \trim($str));
5421
5422 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5423 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5424 22
                $str = \mb_strtolower($str);
5425
            } else {
5426 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5427
            }
5428
5429
            /** @noinspection PhpComposerExtensionStubsInspection */
5430 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5431
        }
5432
5433
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', \trim($str));
5434
5435
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5436
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5437
            $str = \mb_strtolower($str);
5438
        } else {
5439
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5440
        }
5441
5442
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5443
    }
5444
5445
    /**
5446
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5447
     *
5448
     * @param string $str <p>The input string.</p>
5449
     *
5450
     * @return false|string
5451
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5452
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5453
     */
5454
    public static function str_detect_encoding($str)
5455
    {
5456
        // init
5457 30
        $str = (string) $str;
5458
5459
        //
5460
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5461
        //
5462
5463 30
        if (self::is_binary($str, true) === true) {
5464 11
            $isUtf16 = self::is_utf16($str, false);
5465 11
            if ($isUtf16 === 1) {
5466 2
                return 'UTF-16LE';
5467
            }
5468 11
            if ($isUtf16 === 2) {
5469 2
                return 'UTF-16BE';
5470
            }
5471
5472 9
            $isUtf32 = self::is_utf32($str, false);
5473 9
            if ($isUtf32 === 1) {
5474
                return 'UTF-32LE';
5475
            }
5476 9
            if ($isUtf32 === 2) {
5477
                return 'UTF-32BE';
5478
            }
5479
5480
            // is binary but not "UTF-16" or "UTF-32"
5481 9
            return false;
5482
        }
5483
5484
        //
5485
        // 2.) simple check for ASCII chars
5486
        //
5487
5488 26
        if (self::is_ascii($str) === true) {
5489 9
            return 'ASCII';
5490
        }
5491
5492
        //
5493
        // 3.) simple check for UTF-8 chars
5494
        //
5495
5496 26
        if (self::is_utf8($str) === true) {
5497 19
            return 'UTF-8';
5498
        }
5499
5500
        //
5501
        // 4.) check via "mb_detect_encoding()"
5502
        //
5503
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5504
5505
        $detectOrder = [
5506 15
            'ISO-8859-1',
5507
            'ISO-8859-2',
5508
            'ISO-8859-3',
5509
            'ISO-8859-4',
5510
            'ISO-8859-5',
5511
            'ISO-8859-6',
5512
            'ISO-8859-7',
5513
            'ISO-8859-8',
5514
            'ISO-8859-9',
5515
            'ISO-8859-10',
5516
            'ISO-8859-13',
5517
            'ISO-8859-14',
5518
            'ISO-8859-15',
5519
            'ISO-8859-16',
5520
            'WINDOWS-1251',
5521
            'WINDOWS-1252',
5522
            'WINDOWS-1254',
5523
            'CP932',
5524
            'CP936',
5525
            'CP950',
5526
            'CP866',
5527
            'CP850',
5528
            'CP51932',
5529
            'CP50220',
5530
            'CP50221',
5531
            'CP50222',
5532
            'ISO-2022-JP',
5533
            'ISO-2022-KR',
5534
            'JIS',
5535
            'JIS-ms',
5536
            'EUC-CN',
5537
            'EUC-JP',
5538
        ];
5539
5540 15
        if (self::$SUPPORT['mbstring'] === true) {
5541
            // info: do not use the symfony polyfill here
5542 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5543 15
            if ($encoding) {
5544 15
                return $encoding;
5545
            }
5546
        }
5547
5548
        //
5549
        // 5.) check via "iconv()"
5550
        //
5551
5552
        if (self::$ENCODINGS === null) {
5553
            self::$ENCODINGS = self::getData('encodings');
5554
        }
5555
5556
        foreach (self::$ENCODINGS as $encodingTmp) {
5557
            // INFO: //IGNORE but still throw notice
5558
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5559
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5560
                return $encodingTmp;
5561
            }
5562
        }
5563
5564
        return false;
5565
    }
5566
5567
    /**
5568
     * Check if the string ends with the given substring.
5569
     *
5570
     * @param string $haystack <p>The string to search in.</p>
5571
     * @param string $needle   <p>The substring to search for.</p>
5572
     *
5573
     * @return bool
5574
     */
5575
    public static function str_ends_with(string $haystack, string $needle): bool
5576
    {
5577 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5578
    }
5579
5580
    /**
5581
     * Returns true if the string ends with any of $substrings, false otherwise.
5582
     *
5583
     * - case-sensitive
5584
     *
5585
     * @param string   $str        <p>The input string.</p>
5586
     * @param string[] $substrings <p>Substrings to look for.</p>
5587
     *
5588
     * @return bool whether or not $str ends with $substring
5589
     */
5590
    public static function str_ends_with_any(string $str, array $substrings): bool
5591
    {
5592 7
        if ($substrings === []) {
5593
            return false;
5594
        }
5595
5596 7
        foreach ($substrings as &$substring) {
5597 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5598 7
                return true;
5599
            }
5600
        }
5601
5602 6
        return false;
5603
    }
5604
5605
    /**
5606
     * Ensures that the string begins with $substring. If it doesn't, it's
5607
     * prepended.
5608
     *
5609
     * @param string $str       <p>The input string.</p>
5610
     * @param string $substring <p>The substring to add if not present.</p>
5611
     *
5612
     * @return string
5613
     */
5614
    public static function str_ensure_left(string $str, string $substring): string
5615
    {
5616
        if (
5617 10
            $substring !== ''
5618
            &&
5619 10
            \strpos($str, $substring) === 0
5620
        ) {
5621 6
            return $str;
5622
        }
5623
5624 4
        return $substring . $str;
5625
    }
5626
5627
    /**
5628
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5629
     *
5630
     * @param string $str       <p>The input string.</p>
5631
     * @param string $substring <p>The substring to add if not present.</p>
5632
     *
5633
     * @return string
5634
     */
5635
    public static function str_ensure_right(string $str, string $substring): string
5636
    {
5637
        if (
5638 10
            $str === ''
5639
            ||
5640 10
            $substring === ''
5641
            ||
5642 10
            \substr($str, -\strlen($substring)) !== $substring
5643
        ) {
5644 4
            $str .= $substring;
5645
        }
5646
5647 10
        return $str;
5648
    }
5649
5650
    /**
5651
     * Capitalizes the first word of the string, replaces underscores with
5652
     * spaces, and strips '_id'.
5653
     *
5654
     * @param string $str
5655
     *
5656
     * @return string
5657
     */
5658
    public static function str_humanize($str): string
5659
    {
5660 3
        $str = \str_replace(
5661
            [
5662 3
                '_id',
5663
                '_',
5664
            ],
5665
            [
5666 3
                '',
5667
                ' ',
5668
            ],
5669 3
            $str
5670
        );
5671
5672 3
        return self::ucfirst(\trim($str));
5673
    }
5674
5675
    /**
5676
     * Check if the string ends with the given substring, case insensitive.
5677
     *
5678
     * @param string $haystack <p>The string to search in.</p>
5679
     * @param string $needle   <p>The substring to search for.</p>
5680
     *
5681
     * @return bool
5682
     */
5683
    public static function str_iends_with(string $haystack, string $needle): bool
5684
    {
5685 12
        if ($haystack === '' || $needle === '') {
5686 2
            return false;
5687
        }
5688
5689 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5690
    }
5691
5692
    /**
5693
     * Returns true if the string ends with any of $substrings, false otherwise.
5694
     *
5695
     * - case-insensitive
5696
     *
5697
     * @param string   $str        <p>The input string.</p>
5698
     * @param string[] $substrings <p>Substrings to look for.</p>
5699
     *
5700
     * @return bool whether or not $str ends with $substring
5701
     */
5702
    public static function str_iends_with_any(string $str, array $substrings): bool
5703
    {
5704 4
        if ($substrings === []) {
5705
            return false;
5706
        }
5707
5708 4
        foreach ($substrings as &$substring) {
5709 4
            if (self::str_iends_with($str, $substring)) {
5710 4
                return true;
5711
            }
5712
        }
5713
5714
        return false;
5715
    }
5716
5717
    /**
5718
     * Returns the index of the first occurrence of $needle in the string,
5719
     * and false if not found. Accepts an optional offset from which to begin
5720
     * the search.
5721
     *
5722
     * @param string $str      <p>The input string.</p>
5723
     * @param string $needle   <p>Substring to look for.</p>
5724
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5725
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5726
     *
5727
     * @return false|int
5728
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5729
     */
5730
    public static function str_iindex_first(
5731
        string $str,
5732
        string $needle,
5733
        int $offset = 0,
5734
        string $encoding = 'UTF-8'
5735
    ) {
5736 2
        return self::stripos(
5737 2
            $str,
5738 2
            $needle,
5739 2
            $offset,
5740 2
            $encoding
5741
        );
5742
    }
5743
5744
    /**
5745
     * Returns the index of the last occurrence of $needle in the string,
5746
     * and false if not found. Accepts an optional offset from which to begin
5747
     * the search. Offsets may be negative to count from the last character
5748
     * in the string.
5749
     *
5750
     * @param string $str      <p>The input string.</p>
5751
     * @param string $needle   <p>Substring to look for.</p>
5752
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5753
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5754
     *
5755
     * @return false|int
5756
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5757
     */
5758
    public static function str_iindex_last(
5759
        string $str,
5760
        string $needle,
5761
        int $offset = 0,
5762
        string $encoding = 'UTF-8'
5763
    ) {
5764
        return self::strripos(
5765
            $str,
5766
            $needle,
5767
            $offset,
5768
            $encoding
5769
        );
5770
    }
5771
5772
    /**
5773
     * Returns the index of the first occurrence of $needle in the string,
5774
     * and false if not found. Accepts an optional offset from which to begin
5775
     * the search.
5776
     *
5777
     * @param string $str      <p>The input string.</p>
5778
     * @param string $needle   <p>Substring to look for.</p>
5779
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5780
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5781
     *
5782
     * @return false|int
5783
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5784
     */
5785
    public static function str_index_first(
5786
        string $str,
5787
        string $needle,
5788
        int $offset = 0,
5789
        string $encoding = 'UTF-8'
5790
    ) {
5791 10
        return self::strpos(
5792 10
            $str,
5793 10
            $needle,
5794 10
            $offset,
5795 10
            $encoding
5796
        );
5797
    }
5798
5799
    /**
5800
     * Returns the index of the last occurrence of $needle in the string,
5801
     * and false if not found. Accepts an optional offset from which to begin
5802
     * the search. Offsets may be negative to count from the last character
5803
     * in the string.
5804
     *
5805
     * @param string $str      <p>The input string.</p>
5806
     * @param string $needle   <p>Substring to look for.</p>
5807
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5808
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5809
     *
5810
     * @return false|int
5811
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5812
     */
5813
    public static function str_index_last(
5814
        string $str,
5815
        string $needle,
5816
        int $offset = 0,
5817
        string $encoding = 'UTF-8'
5818
    ) {
5819 10
        return self::strrpos(
5820 10
            $str,
5821 10
            $needle,
5822 10
            $offset,
5823 10
            $encoding
5824
        );
5825
    }
5826
5827
    /**
5828
     * Inserts $substring into the string at the $index provided.
5829
     *
5830
     * @param string $str       <p>The input string.</p>
5831
     * @param string $substring <p>String to be inserted.</p>
5832
     * @param int    $index     <p>The index at which to insert the substring.</p>
5833
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5834
     *
5835
     * @return string
5836
     */
5837
    public static function str_insert(
5838
        string $str,
5839
        string $substring,
5840
        int $index,
5841
        string $encoding = 'UTF-8'
5842
    ): string {
5843 8
        if ($encoding === 'UTF-8') {
5844 4
            $len = (int) \mb_strlen($str);
5845 4
            if ($index > $len) {
5846
                return $str;
5847
            }
5848
5849
            /** @noinspection UnnecessaryCastingInspection */
5850 4
            return (string) \mb_substr($str, 0, $index) .
5851 4
                   $substring .
5852 4
                   (string) \mb_substr($str, $index, $len);
5853
        }
5854
5855 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5856
5857 4
        $len = (int) self::strlen($str, $encoding);
5858 4
        if ($index > $len) {
5859 1
            return $str;
5860
        }
5861
5862 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5863 3
               $substring .
5864 3
               ((string) self::substr($str, $index, $len, $encoding));
5865
    }
5866
5867
    /**
5868
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5869
     *
5870
     * @see  http://php.net/manual/en/function.str-ireplace.php
5871
     *
5872
     * @param mixed $search  <p>
5873
     *                       Every replacement with search array is
5874
     *                       performed on the result of previous replacement.
5875
     *                       </p>
5876
     * @param mixed $replace <p>
5877
     *                       </p>
5878
     * @param mixed $subject <p>
5879
     *                       If subject is an array, then the search and
5880
     *                       replace is performed with every entry of
5881
     *                       subject, and the return value is an array as
5882
     *                       well.
5883
     *                       </p>
5884
     * @param int   $count   [optional] <p>
5885
     *                       The number of matched and replaced needles will
5886
     *                       be returned in count which is passed by
5887
     *                       reference.
5888
     *                       </p>
5889
     *
5890
     * @return mixed a string or an array of replacements
5891
     */
5892
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5893
    {
5894 29
        $search = (array) $search;
5895
5896
        /** @noinspection AlterInForeachInspection */
5897 29
        foreach ($search as &$s) {
5898 29
            $s = (string) $s;
5899 29
            if ($s === '') {
5900 6
                $s = '/^(?<=.)$/';
5901
            } else {
5902 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5903
            }
5904
        }
5905
5906 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5907 29
        $count = $replace; // used as reference parameter
5908
5909 29
        return $subject;
5910
    }
5911
5912
    /**
5913
     * Replaces $search from the beginning of string with $replacement.
5914
     *
5915
     * @param string $str         <p>The input string.</p>
5916
     * @param string $search      <p>The string to search for.</p>
5917
     * @param string $replacement <p>The replacement.</p>
5918
     *
5919
     * @return string string after the replacements
5920
     */
5921
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5922
    {
5923 17
        if ($str === '') {
5924 4
            if ($replacement === '') {
5925 2
                return '';
5926
            }
5927
5928 2
            if ($search === '') {
5929 2
                return $replacement;
5930
            }
5931
        }
5932
5933 13
        if ($search === '') {
5934 2
            return $str . $replacement;
5935
        }
5936
5937 11
        if (\stripos($str, $search) === 0) {
5938 10
            return $replacement . \substr($str, \strlen($search));
5939
        }
5940
5941 1
        return $str;
5942
    }
5943
5944
    /**
5945
     * Replaces $search from the ending of string with $replacement.
5946
     *
5947
     * @param string $str         <p>The input string.</p>
5948
     * @param string $search      <p>The string to search for.</p>
5949
     * @param string $replacement <p>The replacement.</p>
5950
     *
5951
     * @return string string after the replacements
5952
     */
5953
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5954
    {
5955 17
        if ($str === '') {
5956 4
            if ($replacement === '') {
5957 2
                return '';
5958
            }
5959
5960 2
            if ($search === '') {
5961 2
                return $replacement;
5962
            }
5963
        }
5964
5965 13
        if ($search === '') {
5966 2
            return $str . $replacement;
5967
        }
5968
5969 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5970 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5971
        }
5972
5973 11
        return $str;
5974
    }
5975
5976
    /**
5977
     * Check if the string starts with the given substring, case insensitive.
5978
     *
5979
     * @param string $haystack <p>The string to search in.</p>
5980
     * @param string $needle   <p>The substring to search for.</p>
5981
     *
5982
     * @return bool
5983
     */
5984
    public static function str_istarts_with(string $haystack, string $needle): bool
5985
    {
5986 12
        if ($haystack === '' || $needle === '') {
5987 2
            return false;
5988
        }
5989
5990 12
        return self::stripos($haystack, $needle) === 0;
5991
    }
5992
5993
    /**
5994
     * Returns true if the string begins with any of $substrings, false otherwise.
5995
     *
5996
     * - case-insensitive
5997
     *
5998
     * @param string $str        <p>The input string.</p>
5999
     * @param array  $substrings <p>Substrings to look for.</p>
6000
     *
6001
     * @return bool whether or not $str starts with $substring
6002
     */
6003
    public static function str_istarts_with_any(string $str, array $substrings): bool
6004
    {
6005 4
        if ($str === '') {
6006
            return false;
6007
        }
6008
6009 4
        if ($substrings === []) {
6010
            return false;
6011
        }
6012
6013 4
        foreach ($substrings as &$substring) {
6014 4
            if (self::str_istarts_with($str, $substring)) {
6015 4
                return true;
6016
            }
6017
        }
6018
6019
        return false;
6020
    }
6021
6022
    /**
6023
     * Gets the substring after the first occurrence of a separator.
6024
     *
6025
     * @param string $str       <p>The input string.</p>
6026
     * @param string $separator <p>The string separator.</p>
6027
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6028
     *
6029
     * @return string
6030
     */
6031
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6032
    {
6033 1
        if ($separator === '' || $str === '') {
6034 1
            return '';
6035
        }
6036
6037 1
        $offset = self::str_iindex_first($str, $separator);
6038 1
        if ($offset === false) {
6039 1
            return '';
6040
        }
6041
6042 1
        if ($encoding === 'UTF-8') {
6043 1
            return (string) \mb_substr(
6044 1
                $str,
6045 1
                $offset + (int) \mb_strlen($separator)
6046
            );
6047
        }
6048
6049
        return (string) self::substr(
6050
            $str,
6051
            $offset + (int) self::strlen($separator, $encoding),
6052
            null,
6053
            $encoding
6054
        );
6055
    }
6056
6057
    /**
6058
     * Gets the substring after the last occurrence of a separator.
6059
     *
6060
     * @param string $str       <p>The input string.</p>
6061
     * @param string $separator <p>The string separator.</p>
6062
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6063
     *
6064
     * @return string
6065
     */
6066
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6067
    {
6068 1
        if ($separator === '' || $str === '') {
6069 1
            return '';
6070
        }
6071
6072 1
        $offset = self::strripos($str, $separator);
6073 1
        if ($offset === false) {
6074 1
            return '';
6075
        }
6076
6077 1
        if ($encoding === 'UTF-8') {
6078 1
            return (string) \mb_substr(
6079 1
                $str,
6080 1
                $offset + (int) self::strlen($separator)
6081
            );
6082
        }
6083
6084
        return (string) self::substr(
6085
            $str,
6086
            $offset + (int) self::strlen($separator, $encoding),
6087
            null,
6088
            $encoding
6089
        );
6090
    }
6091
6092
    /**
6093
     * Gets the substring before the first occurrence of a separator.
6094
     *
6095
     * @param string $str       <p>The input string.</p>
6096
     * @param string $separator <p>The string separator.</p>
6097
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6098
     *
6099
     * @return string
6100
     */
6101
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6102
    {
6103 1
        if ($separator === '' || $str === '') {
6104 1
            return '';
6105
        }
6106
6107 1
        $offset = self::str_iindex_first($str, $separator);
6108 1
        if ($offset === false) {
6109 1
            return '';
6110
        }
6111
6112 1
        if ($encoding === 'UTF-8') {
6113 1
            return (string) \mb_substr($str, 0, $offset);
6114
        }
6115
6116
        return (string) self::substr($str, 0, $offset, $encoding);
6117
    }
6118
6119
    /**
6120
     * Gets the substring before the last occurrence of a separator.
6121
     *
6122
     * @param string $str       <p>The input string.</p>
6123
     * @param string $separator <p>The string separator.</p>
6124
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6125
     *
6126
     * @return string
6127
     */
6128
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6129
    {
6130 1
        if ($separator === '' || $str === '') {
6131 1
            return '';
6132
        }
6133
6134 1
        if ($encoding === 'UTF-8') {
6135 1
            $offset = \mb_strripos($str, $separator);
6136 1
            if ($offset === false) {
6137 1
                return '';
6138
            }
6139
6140 1
            return (string) \mb_substr($str, 0, $offset);
6141
        }
6142
6143
        $offset = self::strripos($str, $separator, 0, $encoding);
6144
        if ($offset === false) {
6145
            return '';
6146
        }
6147
6148
        return (string) self::substr($str, 0, $offset, $encoding);
6149
    }
6150
6151
    /**
6152
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6153
     *
6154
     * @param string $str          <p>The input string.</p>
6155
     * @param string $needle       <p>The string to look for.</p>
6156
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6157
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6158
     *
6159
     * @return string
6160
     */
6161
    public static function str_isubstr_first(
6162
        string $str,
6163
        string $needle,
6164
        bool $beforeNeedle = false,
6165
        string $encoding = 'UTF-8'
6166
    ): string {
6167
        if (
6168 2
            $needle === ''
6169
            ||
6170 2
            $str === ''
6171
        ) {
6172 2
            return '';
6173
        }
6174
6175 2
        $part = self::stristr(
6176 2
            $str,
6177 2
            $needle,
6178 2
            $beforeNeedle,
6179 2
            $encoding
6180
        );
6181 2
        if ($part === false) {
6182 2
            return '';
6183
        }
6184
6185 2
        return $part;
6186
    }
6187
6188
    /**
6189
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6190
     *
6191
     * @param string $str          <p>The input string.</p>
6192
     * @param string $needle       <p>The string to look for.</p>
6193
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6194
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6195
     *
6196
     * @return string
6197
     */
6198
    public static function str_isubstr_last(
6199
        string $str,
6200
        string $needle,
6201
        bool $beforeNeedle = false,
6202
        string $encoding = 'UTF-8'
6203
    ): string {
6204
        if (
6205 1
            $needle === ''
6206
            ||
6207 1
            $str === ''
6208
        ) {
6209 1
            return '';
6210
        }
6211
6212 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6213 1
        if ($part === false) {
6214 1
            return '';
6215
        }
6216
6217 1
        return $part;
6218
    }
6219
6220
    /**
6221
     * Returns the last $n characters of the string.
6222
     *
6223
     * @param string $str      <p>The input string.</p>
6224
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6226
     *
6227
     * @return string
6228
     */
6229
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6230
    {
6231 12
        if ($str === '' || $n <= 0) {
6232 4
            return '';
6233
        }
6234
6235 8
        if ($encoding === 'UTF-8') {
6236 4
            return (string) \mb_substr($str, -$n);
6237
        }
6238
6239 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6240
6241 4
        return (string) self::substr($str, -$n, null, $encoding);
6242
    }
6243
6244
    /**
6245
     * Limit the number of characters in a string.
6246
     *
6247
     * @param string $str      <p>The input string.</p>
6248
     * @param int    $length   [optional] <p>Default: 100</p>
6249
     * @param string $strAddOn [optional] <p>Default: …</p>
6250
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6251
     *
6252
     * @return string
6253
     */
6254
    public static function str_limit(
6255
        string $str,
6256
        int $length = 100,
6257
        string $strAddOn = '…',
6258
        string $encoding = 'UTF-8'
6259
    ): string {
6260 2
        if ($str === '' || $length <= 0) {
6261 2
            return '';
6262
        }
6263
6264 2
        if ($encoding === 'UTF-8') {
6265 2
            if ((int) \mb_strlen($str) <= $length) {
6266 2
                return $str;
6267
            }
6268
6269
            /** @noinspection UnnecessaryCastingInspection */
6270 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6271
        }
6272
6273
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6274
6275
        if ((int) self::strlen($str, $encoding) <= $length) {
6276
            return $str;
6277
        }
6278
6279
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6280
    }
6281
6282
    /**
6283
     * Limit the number of characters in a string, but also after the next word.
6284
     *
6285
     * @param string $str      <p>The input string.</p>
6286
     * @param int    $length   [optional] <p>Default: 100</p>
6287
     * @param string $strAddOn [optional] <p>Default: …</p>
6288
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6289
     *
6290
     * @return string
6291
     */
6292
    public static function str_limit_after_word(
6293
        string $str,
6294
        int $length = 100,
6295
        string $strAddOn = '…',
6296
        string $encoding = 'UTF-8'
6297
    ): string {
6298 6
        if ($str === '' || $length <= 0) {
6299 2
            return '';
6300
        }
6301
6302 6
        if ($encoding === 'UTF-8') {
6303
            /** @noinspection UnnecessaryCastingInspection */
6304 2
            if ((int) \mb_strlen($str) <= $length) {
6305 2
                return $str;
6306
            }
6307
6308 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6309 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6310
            }
6311
6312 2
            $str = \mb_substr($str, 0, $length);
6313
6314 2
            $array = \explode(' ', $str);
6315 2
            \array_pop($array);
6316 2
            $new_str = \implode(' ', $array);
6317
6318 2
            if ($new_str === '') {
6319 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6320
            }
6321
        } else {
6322 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6323
                return $str;
6324
            }
6325
6326 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6327 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6328
            }
6329
6330 1
            $str = self::substr($str, 0, $length, $encoding);
6331 1
            if ($str === false) {
6332
                return '' . $strAddOn;
6333
            }
6334
6335 1
            $array = \explode(' ', $str);
6336 1
            \array_pop($array);
6337 1
            $new_str = \implode(' ', $array);
6338
6339 1
            if ($new_str === '') {
6340
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6341
            }
6342
        }
6343
6344 3
        return $new_str . $strAddOn;
6345
    }
6346
6347
    /**
6348
     * Returns the longest common prefix between the string and $otherStr.
6349
     *
6350
     * @param string $str      <p>The input sting.</p>
6351
     * @param string $otherStr <p>Second string for comparison.</p>
6352
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6353
     *
6354
     * @return string
6355
     */
6356
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6357
    {
6358
        // init
6359 10
        $longestCommonPrefix = '';
6360
6361 10
        if ($encoding === 'UTF-8') {
6362 5
            $maxLength = (int) \min(
6363 5
                \mb_strlen($str),
6364 5
                \mb_strlen($otherStr)
6365
            );
6366
6367 5
            for ($i = 0; $i < $maxLength; ++$i) {
6368 4
                $char = \mb_substr($str, $i, 1);
6369
6370
                if (
6371 4
                    $char !== false
6372
                    &&
6373 4
                    $char === \mb_substr($otherStr, $i, 1)
6374
                ) {
6375 3
                    $longestCommonPrefix .= $char;
6376
                } else {
6377 3
                    break;
6378
                }
6379
            }
6380
        } else {
6381 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6382
6383 5
            $maxLength = (int) \min(
6384 5
                self::strlen($str, $encoding),
6385 5
                self::strlen($otherStr, $encoding)
6386
            );
6387
6388 5
            for ($i = 0; $i < $maxLength; ++$i) {
6389 4
                $char = self::substr($str, $i, 1, $encoding);
6390
6391
                if (
6392 4
                    $char !== false
6393
                    &&
6394 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6395
                ) {
6396 3
                    $longestCommonPrefix .= $char;
6397
                } else {
6398 3
                    break;
6399
                }
6400
            }
6401
        }
6402
6403 10
        return $longestCommonPrefix;
6404
    }
6405
6406
    /**
6407
     * Returns the longest common substring between the string and $otherStr.
6408
     * In the case of ties, it returns that which occurs first.
6409
     *
6410
     * @param string $str
6411
     * @param string $otherStr <p>Second string for comparison.</p>
6412
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6413
     *
6414
     * @return string string with its $str being the longest common substring
6415
     */
6416
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6417
    {
6418 11
        if ($str === '' || $otherStr === '') {
6419 2
            return '';
6420
        }
6421
6422
        // Uses dynamic programming to solve
6423
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6424
6425 9
        if ($encoding === 'UTF-8') {
6426 4
            $strLength = (int) \mb_strlen($str);
6427 4
            $otherLength = (int) \mb_strlen($otherStr);
6428
        } else {
6429 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6430
6431 5
            $strLength = (int) self::strlen($str, $encoding);
6432 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6433
        }
6434
6435
        // Return if either string is empty
6436 9
        if ($strLength === 0 || $otherLength === 0) {
6437
            return '';
6438
        }
6439
6440 9
        $len = 0;
6441 9
        $end = 0;
6442 9
        $table = \array_fill(
6443 9
            0,
6444 9
            $strLength + 1,
6445 9
            \array_fill(0, $otherLength + 1, 0)
6446
        );
6447
6448 9
        if ($encoding === 'UTF-8') {
6449 9
            for ($i = 1; $i <= $strLength; ++$i) {
6450 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6451 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6452 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6453
6454 9
                    if ($strChar === $otherChar) {
6455 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6456 8
                        if ($table[$i][$j] > $len) {
6457 8
                            $len = $table[$i][$j];
6458 8
                            $end = $i;
6459
                        }
6460
                    } else {
6461 9
                        $table[$i][$j] = 0;
6462
                    }
6463
                }
6464
            }
6465
        } else {
6466
            for ($i = 1; $i <= $strLength; ++$i) {
6467
                for ($j = 1; $j <= $otherLength; ++$j) {
6468
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6469
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6470
6471
                    if ($strChar === $otherChar) {
6472
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6473
                        if ($table[$i][$j] > $len) {
6474
                            $len = $table[$i][$j];
6475
                            $end = $i;
6476
                        }
6477
                    } else {
6478
                        $table[$i][$j] = 0;
6479
                    }
6480
                }
6481
            }
6482
        }
6483
6484 9
        if ($encoding === 'UTF-8') {
6485 9
            return (string) \mb_substr($str, $end - $len, $len);
6486
        }
6487
6488
        return (string) self::substr($str, $end - $len, $len, $encoding);
6489
    }
6490
6491
    /**
6492
     * Returns the longest common suffix between the string and $otherStr.
6493
     *
6494
     * @param string $str
6495
     * @param string $otherStr <p>Second string for comparison.</p>
6496
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6497
     *
6498
     * @return string
6499
     */
6500
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6501
    {
6502 10
        if ($str === '' || $otherStr === '') {
6503 2
            return '';
6504
        }
6505
6506 8
        if ($encoding === 'UTF-8') {
6507 4
            $maxLength = (int) \min(
6508 4
                \mb_strlen($str, $encoding),
6509 4
                \mb_strlen($otherStr, $encoding)
6510
            );
6511
6512 4
            $longestCommonSuffix = '';
6513 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6514 4
                $char = \mb_substr($str, -$i, 1);
6515
6516
                if (
6517 4
                    $char !== false
6518
                    &&
6519 4
                    $char === \mb_substr($otherStr, -$i, 1)
6520
                ) {
6521 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6522
                } else {
6523 3
                    break;
6524
                }
6525
            }
6526
        } else {
6527 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6528
6529 4
            $maxLength = (int) \min(
6530 4
                self::strlen($str, $encoding),
6531 4
                self::strlen($otherStr, $encoding)
6532
            );
6533
6534 4
            $longestCommonSuffix = '';
6535 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6536 4
                $char = self::substr($str, -$i, 1, $encoding);
6537
6538
                if (
6539 4
                    $char !== false
6540
                    &&
6541 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6542
                ) {
6543 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6544
                } else {
6545 3
                    break;
6546
                }
6547
            }
6548
        }
6549
6550 8
        return $longestCommonSuffix;
6551
    }
6552
6553
    /**
6554
     * Returns true if $str matches the supplied pattern, false otherwise.
6555
     *
6556
     * @param string $str     <p>The input string.</p>
6557
     * @param string $pattern <p>Regex pattern to match against.</p>
6558
     *
6559
     * @return bool whether or not $str matches the pattern
6560
     */
6561
    public static function str_matches_pattern(string $str, string $pattern): bool
6562
    {
6563
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6564
    }
6565
6566
    /**
6567
     * Returns whether or not a character exists at an index. Offsets may be
6568
     * negative to count from the last character in the string. Implements
6569
     * part of the ArrayAccess interface.
6570
     *
6571
     * @param string $str      <p>The input string.</p>
6572
     * @param int    $offset   <p>The index to check.</p>
6573
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6574
     *
6575
     * @return bool whether or not the index exists
6576
     */
6577
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6578
    {
6579
        // init
6580 6
        $length = (int) self::strlen($str, $encoding);
6581
6582 6
        if ($offset >= 0) {
6583 3
            return $length > $offset;
6584
        }
6585
6586 3
        return $length >= \abs($offset);
6587
    }
6588
6589
    /**
6590
     * Returns the character at the given index. Offsets may be negative to
6591
     * count from the last character in the string. Implements part of the
6592
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6593
     * does not exist.
6594
     *
6595
     * @param string $str      <p>The input string.</p>
6596
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6597
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6598
     *
6599
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6600
     *
6601
     * @return string the character at the specified index
6602
     */
6603
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6604
    {
6605
        // init
6606 2
        $length = (int) self::strlen($str);
6607
6608
        if (
6609 2
            ($index >= 0 && $length <= $index)
6610
            ||
6611 2
            $length < \abs($index)
6612
        ) {
6613 1
            throw new \OutOfBoundsException('No character exists at the index');
6614
        }
6615
6616 1
        return self::char_at($str, $index, $encoding);
6617
    }
6618
6619
    /**
6620
     * Pad a UTF-8 string to given length with another string.
6621
     *
6622
     * @param string     $str        <p>The input string.</p>
6623
     * @param int        $pad_length <p>The length of return string.</p>
6624
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6625
     * @param int|string $pad_type   [optional] <p>
6626
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6627
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6628
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6629
     *                               </p>
6630
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6631
     *
6632
     * @return string returns the padded string
6633
     */
6634
    public static function str_pad(
6635
        string $str,
6636
        int $pad_length,
6637
        string $pad_string = ' ',
6638
        $pad_type = \STR_PAD_RIGHT,
6639
        string $encoding = 'UTF-8'
6640
    ): string {
6641 41
        if ($pad_length === 0 || $pad_string === '') {
6642 1
            return $str;
6643
        }
6644
6645 41
        if ($pad_type !== (int) $pad_type) {
6646 13
            if ($pad_type === 'left') {
6647 3
                $pad_type = \STR_PAD_LEFT;
6648 10
            } elseif ($pad_type === 'right') {
6649 6
                $pad_type = \STR_PAD_RIGHT;
6650 4
            } elseif ($pad_type === 'both') {
6651 3
                $pad_type = \STR_PAD_BOTH;
6652
            } else {
6653 1
                throw new \InvalidArgumentException(
6654 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6655
                );
6656
            }
6657
        }
6658
6659 40
        if ($encoding === 'UTF-8') {
6660 25
            $str_length = (int) \mb_strlen($str);
6661
6662 25
            if ($pad_length >= $str_length) {
6663
                switch ($pad_type) {
6664 25
                    case \STR_PAD_LEFT:
6665 8
                        $ps_length = (int) \mb_strlen($pad_string);
6666
6667 8
                        $diff = ($pad_length - $str_length);
6668
6669 8
                        $pre = (string) \mb_substr(
6670 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6671 8
                            0,
6672 8
                            $diff
6673
                        );
6674 8
                        $post = '';
6675
6676 8
                        break;
6677
6678 20
                    case \STR_PAD_BOTH:
6679 14
                        $diff = ($pad_length - $str_length);
6680
6681 14
                        $ps_length_left = (int) \floor($diff / 2);
6682
6683 14
                        $ps_length_right = (int) \ceil($diff / 2);
6684
6685 14
                        $pre = (string) \mb_substr(
6686 14
                            \str_repeat($pad_string, $ps_length_left),
6687 14
                            0,
6688 14
                            $ps_length_left
6689
                        );
6690 14
                        $post = (string) \mb_substr(
6691 14
                            \str_repeat($pad_string, $ps_length_right),
6692 14
                            0,
6693 14
                            $ps_length_right
6694
                        );
6695
6696 14
                        break;
6697
6698 9
                    case \STR_PAD_RIGHT:
6699
                    default:
6700 9
                        $ps_length = (int) \mb_strlen($pad_string);
6701
6702 9
                        $diff = ($pad_length - $str_length);
6703
6704 9
                        $post = (string) \mb_substr(
6705 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6706 9
                            0,
6707 9
                            $diff
6708
                        );
6709 9
                        $pre = '';
6710
                }
6711
6712 25
                return $pre . $str . $post;
6713
            }
6714
6715 3
            return $str;
6716
        }
6717
6718 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6719
6720 15
        $str_length = (int) self::strlen($str, $encoding);
6721
6722 15
        if ($pad_length >= $str_length) {
6723
            switch ($pad_type) {
6724 14
                case \STR_PAD_LEFT:
6725 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6726
6727 5
                    $diff = ($pad_length - $str_length);
6728
6729 5
                    $pre = (string) self::substr(
6730 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6731 5
                        0,
6732 5
                        $diff,
6733 5
                        $encoding
6734
                    );
6735 5
                    $post = '';
6736
6737 5
                    break;
6738
6739 9
                case \STR_PAD_BOTH:
6740 3
                    $diff = ($pad_length - $str_length);
6741
6742 3
                    $ps_length_left = (int) \floor($diff / 2);
6743
6744 3
                    $ps_length_right = (int) \ceil($diff / 2);
6745
6746 3
                    $pre = (string) self::substr(
6747 3
                        \str_repeat($pad_string, $ps_length_left),
6748 3
                        0,
6749 3
                        $ps_length_left,
6750 3
                        $encoding
6751
                    );
6752 3
                    $post = (string) self::substr(
6753 3
                        \str_repeat($pad_string, $ps_length_right),
6754 3
                        0,
6755 3
                        $ps_length_right,
6756 3
                        $encoding
6757
                    );
6758
6759 3
                    break;
6760
6761 6
                case \STR_PAD_RIGHT:
6762
                default:
6763 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6764
6765 6
                    $diff = ($pad_length - $str_length);
6766
6767 6
                    $post = (string) self::substr(
6768 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6769 6
                        0,
6770 6
                        $diff,
6771 6
                        $encoding
6772
                    );
6773 6
                    $pre = '';
6774
            }
6775
6776 14
            return $pre . $str . $post;
6777
        }
6778
6779 1
        return $str;
6780
    }
6781
6782
    /**
6783
     * Returns a new string of a given length such that both sides of the
6784
     * string are padded. Alias for pad() with a $padType of 'both'.
6785
     *
6786
     * @param string $str
6787
     * @param int    $length   <p>Desired string length after padding.</p>
6788
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6789
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6790
     *
6791
     * @return string string with padding applied
6792
     */
6793
    public static function str_pad_both(
6794
        string $str,
6795
        int $length,
6796
        string $padStr = ' ',
6797
        string $encoding = 'UTF-8'
6798
    ): string {
6799 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6800
    }
6801
6802
    /**
6803
     * Returns a new string of a given length such that the beginning of the
6804
     * string is padded. Alias for pad() with a $padType of 'left'.
6805
     *
6806
     * @param string $str
6807
     * @param int    $length   <p>Desired string length after padding.</p>
6808
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6809
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6810
     *
6811
     * @return string string with left padding
6812
     */
6813
    public static function str_pad_left(
6814
        string $str,
6815
        int $length,
6816
        string $padStr = ' ',
6817
        string $encoding = 'UTF-8'
6818
    ): string {
6819 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6820
    }
6821
6822
    /**
6823
     * Returns a new string of a given length such that the end of the string
6824
     * is padded. Alias for pad() with a $padType of 'right'.
6825
     *
6826
     * @param string $str
6827
     * @param int    $length   <p>Desired string length after padding.</p>
6828
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6829
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6830
     *
6831
     * @return string string with right padding
6832
     */
6833
    public static function str_pad_right(
6834
        string $str,
6835
        int $length,
6836
        string $padStr = ' ',
6837
        string $encoding = 'UTF-8'
6838
    ): string {
6839 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6840
    }
6841
6842
    /**
6843
     * Repeat a string.
6844
     *
6845
     * @param string $str        <p>
6846
     *                           The string to be repeated.
6847
     *                           </p>
6848
     * @param int    $multiplier <p>
6849
     *                           Number of time the input string should be
6850
     *                           repeated.
6851
     *                           </p>
6852
     *                           <p>
6853
     *                           multiplier has to be greater than or equal to 0.
6854
     *                           If the multiplier is set to 0, the function
6855
     *                           will return an empty string.
6856
     *                           </p>
6857
     *
6858
     * @return string the repeated string
6859
     */
6860
    public static function str_repeat(string $str, int $multiplier): string
6861
    {
6862 9
        $str = self::filter($str);
6863
6864 9
        return \str_repeat($str, $multiplier);
6865
    }
6866
6867
    /**
6868
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6869
     *
6870
     * Replace all occurrences of the search string with the replacement string
6871
     *
6872
     * @see http://php.net/manual/en/function.str-replace.php
6873
     *
6874
     * @param mixed $search  <p>
6875
     *                       The value being searched for, otherwise known as the needle.
6876
     *                       An array may be used to designate multiple needles.
6877
     *                       </p>
6878
     * @param mixed $replace <p>
6879
     *                       The replacement value that replaces found search
6880
     *                       values. An array may be used to designate multiple replacements.
6881
     *                       </p>
6882
     * @param mixed $subject <p>
6883
     *                       The string or array being searched and replaced on,
6884
     *                       otherwise known as the haystack.
6885
     *                       </p>
6886
     *                       <p>
6887
     *                       If subject is an array, then the search and
6888
     *                       replace is performed with every entry of
6889
     *                       subject, and the return value is an array as
6890
     *                       well.
6891
     *                       </p>
6892
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6893
     *
6894
     * @return mixed this function returns a string or an array with the replaced values
6895
     */
6896
    public static function str_replace(
6897
        $search,
6898
        $replace,
6899
        $subject,
6900
        int &$count = null
6901
    ) {
6902
        /** @psalm-suppress PossiblyNullArgument */
6903 12
        return \str_replace($search, $replace, $subject, $count);
6904
    }
6905
6906
    /**
6907
     * Replaces $search from the beginning of string with $replacement.
6908
     *
6909
     * @param string $str         <p>The input string.</p>
6910
     * @param string $search      <p>The string to search for.</p>
6911
     * @param string $replacement <p>The replacement.</p>
6912
     *
6913
     * @return string string after the replacements
6914
     */
6915
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6916
    {
6917 17
        if ($str === '') {
6918 4
            if ($replacement === '') {
6919 2
                return '';
6920
            }
6921
6922 2
            if ($search === '') {
6923 2
                return $replacement;
6924
            }
6925
        }
6926
6927 13
        if ($search === '') {
6928 2
            return $str . $replacement;
6929
        }
6930
6931 11
        if (\strpos($str, $search) === 0) {
6932 9
            return $replacement . \substr($str, \strlen($search));
6933
        }
6934
6935 2
        return $str;
6936
    }
6937
6938
    /**
6939
     * Replaces $search from the ending of string with $replacement.
6940
     *
6941
     * @param string $str         <p>The input string.</p>
6942
     * @param string $search      <p>The string to search for.</p>
6943
     * @param string $replacement <p>The replacement.</p>
6944
     *
6945
     * @return string string after the replacements
6946
     */
6947
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6948
    {
6949 17
        if ($str === '') {
6950 4
            if ($replacement === '') {
6951 2
                return '';
6952
            }
6953
6954 2
            if ($search === '') {
6955 2
                return $replacement;
6956
            }
6957
        }
6958
6959 13
        if ($search === '') {
6960 2
            return $str . $replacement;
6961
        }
6962
6963 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6964 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6965
        }
6966
6967 11
        return $str;
6968
    }
6969
6970
    /**
6971
     * Replace the first "$search"-term with the "$replace"-term.
6972
     *
6973
     * @param string $search
6974
     * @param string $replace
6975
     * @param string $subject
6976
     *
6977
     * @return string
6978
     *
6979
     * @psalm-suppress InvalidReturnType
6980
     */
6981
    public static function str_replace_first(string $search, string $replace, string $subject): string
6982
    {
6983 2
        $pos = self::strpos($subject, $search);
6984
6985 2
        if ($pos !== false) {
6986
            /** @psalm-suppress InvalidReturnStatement */
6987 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6988
        }
6989
6990 2
        return $subject;
6991
    }
6992
6993
    /**
6994
     * Replace the last "$search"-term with the "$replace"-term.
6995
     *
6996
     * @param string $search
6997
     * @param string $replace
6998
     * @param string $subject
6999
     *
7000
     * @return string
7001
     *
7002
     * @psalm-suppress InvalidReturnType
7003
     */
7004
    public static function str_replace_last(
7005
        string $search,
7006
        string $replace,
7007
        string $subject
7008
    ): string {
7009 2
        $pos = self::strrpos($subject, $search);
7010 2
        if ($pos !== false) {
7011
            /** @psalm-suppress InvalidReturnStatement */
7012 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7013
        }
7014
7015 2
        return $subject;
7016
    }
7017
7018
    /**
7019
     * Shuffles all the characters in the string.
7020
     *
7021
     * PS: uses random algorithm which is weak for cryptography purposes
7022
     *
7023
     * @param string $str      <p>The input string</p>
7024
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7025
     *
7026
     * @return string the shuffled string
7027
     */
7028
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7029
    {
7030 5
        if ($encoding === 'UTF-8') {
7031 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7032
            /** @noinspection NonSecureShuffleUsageInspection */
7033 5
            \shuffle($indexes);
7034
7035
            // init
7036 5
            $shuffledStr = '';
7037
7038 5
            foreach ($indexes as &$i) {
7039 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7040 5
                if ($tmpSubStr !== false) {
7041 5
                    $shuffledStr .= $tmpSubStr;
7042
                }
7043
            }
7044
        } else {
7045
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7046
7047
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7048
            /** @noinspection NonSecureShuffleUsageInspection */
7049
            \shuffle($indexes);
7050
7051
            // init
7052
            $shuffledStr = '';
7053
7054
            foreach ($indexes as &$i) {
7055
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7056
                if ($tmpSubStr !== false) {
7057
                    $shuffledStr .= $tmpSubStr;
7058
                }
7059
            }
7060
        }
7061
7062 5
        return $shuffledStr;
7063
    }
7064
7065
    /**
7066
     * Returns the substring beginning at $start, and up to, but not including
7067
     * the index specified by $end. If $end is omitted, the function extracts
7068
     * the remaining string. If $end is negative, it is computed from the end
7069
     * of the string.
7070
     *
7071
     * @param string $str
7072
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7073
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7074
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7075
     *
7076
     * @return false|string
7077
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7078
     *                      characters long, <b>FALSE</b> will be returned.
7079
     */
7080
    public static function str_slice(
7081
        string $str,
7082
        int $start,
7083
        int $end = null,
7084
        string $encoding = 'UTF-8'
7085
    ) {
7086 18
        if ($encoding === 'UTF-8') {
7087 7
            if ($end === null) {
7088 1
                $length = (int) \mb_strlen($str);
7089 6
            } elseif ($end >= 0 && $end <= $start) {
7090 2
                return '';
7091 4
            } elseif ($end < 0) {
7092 1
                $length = (int) \mb_strlen($str) + $end - $start;
7093
            } else {
7094 3
                $length = $end - $start;
7095
            }
7096
7097 5
            return \mb_substr($str, $start, $length);
7098
        }
7099
7100 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7101
7102 11
        if ($end === null) {
7103 5
            $length = (int) self::strlen($str, $encoding);
7104 6
        } elseif ($end >= 0 && $end <= $start) {
7105 2
            return '';
7106 4
        } elseif ($end < 0) {
7107 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7108
        } else {
7109 3
            $length = $end - $start;
7110
        }
7111
7112 9
        return self::substr($str, $start, $length, $encoding);
7113
    }
7114
7115
    /**
7116
     * Convert a string to e.g.: "snake_case"
7117
     *
7118
     * @param string $str
7119
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7120
     *
7121
     * @return string string in snake_case
7122
     */
7123
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7124
    {
7125 20
        if ($str === '') {
7126
            return '';
7127
        }
7128
7129 20
        $str = \str_replace(
7130 20
            '-',
7131 20
            '_',
7132 20
            self::normalize_whitespace($str)
7133
        );
7134
7135 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7136 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7137
        }
7138
7139 20
        $str = (string) \preg_replace_callback(
7140 20
            '/([\d|A-Z])/u',
7141
            /**
7142
             * @param string[] $matches
7143
             *
7144
             * @return string
7145
             */
7146
            static function (array $matches) use ($encoding): string {
7147 8
                $match = $matches[1];
7148 8
                $matchInt = (int) $match;
7149
7150 8
                if ((string) $matchInt === $match) {
7151 4
                    return '_' . $match . '_';
7152
                }
7153
7154 4
                if ($encoding === 'UTF-8') {
7155 4
                    return '_' . \mb_strtolower($match);
7156
                }
7157
7158
                return '_' . self::strtolower($match, $encoding);
7159 20
            },
7160 20
            $str
7161
        );
7162
7163 20
        $str = (string) \preg_replace(
7164
            [
7165 20
                '/\s+/',        // convert spaces to "_"
7166
                '/^\s+|\s+$/',  // trim leading & trailing spaces
7167
                '/_+/',         // remove double "_"
7168
            ],
7169
            [
7170 20
                '_',
7171
                '',
7172
                '_',
7173
            ],
7174 20
            $str
7175
        );
7176
7177 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7178
    }
7179
7180
    /**
7181
     * Sort all characters according to code points.
7182
     *
7183
     * @param string $str    <p>A UTF-8 string.</p>
7184
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7185
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7186
     *
7187
     * @return string string of sorted characters
7188
     */
7189
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7190
    {
7191 2
        $array = self::codepoints($str);
7192
7193 2
        if ($unique) {
7194 2
            $array = \array_flip(\array_flip($array));
7195
        }
7196
7197 2
        if ($desc) {
7198 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7198
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7199
        } else {
7200 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7200
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7201
        }
7202
7203 2
        return self::string($array);
7204
    }
7205
7206
    /**
7207
     * alias for "UTF8::str_split()"
7208
     *
7209
     * @see UTF8::str_split()
7210
     *
7211
     * @param string|string[] $str
7212
     * @param int             $length
7213
     * @param bool            $cleanUtf8
7214
     *
7215
     * @return string[]
7216
     */
7217
    public static function split(
7218
        $str,
7219
        int $length = 1,
7220
        bool $cleanUtf8 = false
7221
    ): array {
7222 9
        return self::str_split($str, $length, $cleanUtf8);
7223
    }
7224
7225
    /**
7226
     * Splits the string with the provided regular expression, returning an
7227
     * array of Stringy objects. An optional integer $limit will truncate the
7228
     * results.
7229
     *
7230
     * @param string $str
7231
     * @param string $pattern <p>The regex with which to split the string.</p>
7232
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7233
     *
7234
     * @return string[] an array of strings
7235
     */
7236
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7237
    {
7238 16
        if ($limit === 0) {
7239 2
            return [];
7240
        }
7241
7242 14
        if ($pattern === '') {
7243 1
            return [$str];
7244
        }
7245
7246 13
        if (self::$SUPPORT['mbstring'] === true) {
7247 13
            if ($limit >= 0) {
7248
                /** @noinspection PhpComposerExtensionStubsInspection */
7249 8
                return \array_filter(
7250 8
                    \mb_split($pattern, $str),
7251
                    static function () use (&$limit): bool {
7252 8
                        return --$limit >= 0;
7253 8
                    }
7254
                );
7255
            }
7256
7257
            /** @noinspection PhpComposerExtensionStubsInspection */
7258 5
            return \mb_split($pattern, $str);
7259
        }
7260
7261
        if ($limit > 0) {
7262
            ++$limit;
7263
        } else {
7264
            $limit = -1;
7265
        }
7266
7267
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7268
7269
        if ($array === false) {
7270
            return [];
7271
        }
7272
7273
        if ($limit > 0 && \count($array) === $limit) {
7274
            \array_pop($array);
7275
        }
7276
7277
        return $array;
7278
    }
7279
7280
    /**
7281
     * Check if the string starts with the given substring.
7282
     *
7283
     * @param string $haystack <p>The string to search in.</p>
7284
     * @param string $needle   <p>The substring to search for.</p>
7285
     *
7286
     * @return bool
7287
     */
7288
    public static function str_starts_with(string $haystack, string $needle): bool
7289
    {
7290 19
        return \strpos($haystack, $needle) === 0;
7291
    }
7292
7293
    /**
7294
     * Returns true if the string begins with any of $substrings, false otherwise.
7295
     *
7296
     * - case-sensitive
7297
     *
7298
     * @param string $str        <p>The input string.</p>
7299
     * @param array  $substrings <p>Substrings to look for.</p>
7300
     *
7301
     * @return bool whether or not $str starts with $substring
7302
     */
7303
    public static function str_starts_with_any(string $str, array $substrings): bool
7304
    {
7305 8
        if ($str === '') {
7306
            return false;
7307
        }
7308
7309 8
        if ($substrings === []) {
7310
            return false;
7311
        }
7312
7313 8
        foreach ($substrings as &$substring) {
7314 8
            if (self::str_starts_with($str, $substring)) {
7315 8
                return true;
7316
            }
7317
        }
7318
7319 6
        return false;
7320
    }
7321
7322
    /**
7323
     * Gets the substring after the first occurrence of a separator.
7324
     *
7325
     * @param string $str       <p>The input string.</p>
7326
     * @param string $separator <p>The string separator.</p>
7327
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7328
     *
7329
     * @return string
7330
     */
7331
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7332
    {
7333 1
        if ($separator === '' || $str === '') {
7334 1
            return '';
7335
        }
7336
7337 1
        if ($encoding === 'UTF-8') {
7338 1
            $offset = \mb_strpos($str, $separator);
7339 1
            if ($offset === false) {
7340 1
                return '';
7341
            }
7342
7343 1
            return (string) \mb_substr(
7344 1
                $str,
7345 1
                $offset + (int) \mb_strlen($separator)
7346
            );
7347
        }
7348
7349
        $offset = self::strpos($str, $separator, 0, $encoding);
7350
        if ($offset === false) {
7351
            return '';
7352
        }
7353
7354
        return (string) \mb_substr(
7355
            $str,
7356
            $offset + (int) self::strlen($separator, $encoding),
7357
            null,
7358
            $encoding
7359
        );
7360
    }
7361
7362
    /**
7363
     * Gets the substring after the last occurrence of a separator.
7364
     *
7365
     * @param string $str       <p>The input string.</p>
7366
     * @param string $separator <p>The string separator.</p>
7367
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7368
     *
7369
     * @return string
7370
     */
7371
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7372
    {
7373 1
        if ($separator === '' || $str === '') {
7374 1
            return '';
7375
        }
7376
7377 1
        if ($encoding === 'UTF-8') {
7378 1
            $offset = \mb_strrpos($str, $separator);
7379 1
            if ($offset === false) {
7380 1
                return '';
7381
            }
7382
7383 1
            return (string) \mb_substr(
7384 1
                $str,
7385 1
                $offset + (int) \mb_strlen($separator)
7386
            );
7387
        }
7388
7389
        $offset = self::strrpos($str, $separator, 0, $encoding);
7390
        if ($offset === false) {
7391
            return '';
7392
        }
7393
7394
        return (string) self::substr(
7395
            $str,
7396
            $offset + (int) self::strlen($separator, $encoding),
7397
            null,
7398
            $encoding
7399
        );
7400
    }
7401
7402
    /**
7403
     * Gets the substring before the first occurrence of a separator.
7404
     *
7405
     * @param string $str       <p>The input string.</p>
7406
     * @param string $separator <p>The string separator.</p>
7407
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7408
     *
7409
     * @return string
7410
     */
7411
    public static function str_substr_before_first_separator(
7412
        string $str,
7413
        string $separator,
7414
        string $encoding = 'UTF-8'
7415
    ): string {
7416 1
        if ($separator === '' || $str === '') {
7417 1
            return '';
7418
        }
7419
7420 1
        if ($encoding === 'UTF-8') {
7421 1
            $offset = \mb_strpos($str, $separator);
7422 1
            if ($offset === false) {
7423 1
                return '';
7424
            }
7425
7426 1
            return (string) \mb_substr(
7427 1
                $str,
7428 1
                0,
7429 1
                $offset
7430
            );
7431
        }
7432
7433
        $offset = self::strpos($str, $separator, 0, $encoding);
7434
        if ($offset === false) {
7435
            return '';
7436
        }
7437
7438
        return (string) self::substr(
7439
            $str,
7440
            0,
7441
            $offset,
7442
            $encoding
7443
        );
7444
    }
7445
7446
    /**
7447
     * Gets the substring before the last occurrence of a separator.
7448
     *
7449
     * @param string $str       <p>The input string.</p>
7450
     * @param string $separator <p>The string separator.</p>
7451
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7452
     *
7453
     * @return string
7454
     */
7455
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7456
    {
7457 1
        if ($separator === '' || $str === '') {
7458 1
            return '';
7459
        }
7460
7461 1
        if ($encoding === 'UTF-8') {
7462 1
            $offset = \mb_strrpos($str, $separator);
7463 1
            if ($offset === false) {
7464 1
                return '';
7465
            }
7466
7467 1
            return (string) \mb_substr(
7468 1
                $str,
7469 1
                0,
7470 1
                $offset
7471
            );
7472
        }
7473
7474
        $offset = self::strrpos($str, $separator, 0, $encoding);
7475
        if ($offset === false) {
7476
            return '';
7477
        }
7478
7479
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7480
7481
        return (string) self::substr(
7482
            $str,
7483
            0,
7484
            $offset,
7485
            $encoding
7486
        );
7487
    }
7488
7489
    /**
7490
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7491
     *
7492
     * @param string $str          <p>The input string.</p>
7493
     * @param string $needle       <p>The string to look for.</p>
7494
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7495
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7496
     *
7497
     * @return string
7498
     */
7499
    public static function str_substr_first(
7500
        string $str,
7501
        string $needle,
7502
        bool $beforeNeedle = false,
7503
        string $encoding = 'UTF-8'
7504
    ): string {
7505 2
        if ($str === '' || $needle === '') {
7506 2
            return '';
7507
        }
7508
7509 2
        if ($encoding === 'UTF-8') {
7510 2
            if ($beforeNeedle === true) {
7511 1
                $part = \mb_strstr(
7512 1
                    $str,
7513 1
                    $needle,
7514 1
                    $beforeNeedle
7515
                );
7516
            } else {
7517 1
                $part = \mb_strstr(
7518 1
                    $str,
7519 2
                    $needle
7520
                );
7521
            }
7522
        } else {
7523
            $part = self::strstr(
7524
                $str,
7525
                $needle,
7526
                $beforeNeedle,
7527
                $encoding
7528
            );
7529
        }
7530
7531 2
        return $part === false ? '' : $part;
7532
    }
7533
7534
    /**
7535
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7536
     *
7537
     * @param string $str          <p>The input string.</p>
7538
     * @param string $needle       <p>The string to look for.</p>
7539
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7540
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7541
     *
7542
     * @return string
7543
     */
7544
    public static function str_substr_last(
7545
        string $str,
7546
        string $needle,
7547
        bool $beforeNeedle = false,
7548
        string $encoding = 'UTF-8'
7549
    ): string {
7550 2
        if ($str === '' || $needle === '') {
7551 2
            return '';
7552
        }
7553
7554 2
        if ($encoding === 'UTF-8') {
7555 2
            if ($beforeNeedle === true) {
7556 1
                $part = \mb_strrchr(
7557 1
                    $str,
7558 1
                    $needle,
7559 1
                    $beforeNeedle
7560
                );
7561
            } else {
7562 1
                $part = \mb_strrchr(
7563 1
                    $str,
7564 2
                    $needle
7565
                );
7566
            }
7567
        } else {
7568
            $part = self::strrchr(
7569
                $str,
7570
                $needle,
7571
                $beforeNeedle,
7572
                $encoding
7573
            );
7574
        }
7575
7576 2
        return $part === false ? '' : $part;
7577
    }
7578
7579
    /**
7580
     * Surrounds $str with the given substring.
7581
     *
7582
     * @param string $str
7583
     * @param string $substring <p>The substring to add to both sides.</P>
7584
     *
7585
     * @return string string with the substring both prepended and appended
7586
     */
7587
    public static function str_surround(string $str, string $substring): string
7588
    {
7589 5
        return $substring . $str . $substring;
7590
    }
7591
7592
    /**
7593
     * Returns a trimmed string with the first letter of each word capitalized.
7594
     * Also accepts an array, $ignore, allowing you to list words not to be
7595
     * capitalized.
7596
     *
7597
     * @param string              $str
7598
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7599
     *                                                   Default: null</p>
7600
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7601
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7602
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7603
     *                                                   tr</p>
7604
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7605
     *                                                   ß</p>
7606
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7607
     *
7608
     * @return string the titleized string
7609
     */
7610
    public static function str_titleize(
7611
        string $str,
7612
        array $ignore = null,
7613
        string $encoding = 'UTF-8',
7614
        bool $cleanUtf8 = false,
7615
        string $lang = null,
7616
        bool $tryToKeepStringLength = false,
7617
        bool $useTrimFirst = true
7618
    ): string {
7619 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7620 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7621
        }
7622
7623 5
        if ($useTrimFirst === true) {
7624 5
            $str = \trim($str);
7625
        }
7626
7627 5
        if ($cleanUtf8 === true) {
7628
            $str = self::clean($str);
7629
        }
7630
7631 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7632
7633 5
        return (string) \preg_replace_callback(
7634 5
            '/([\S]+)/u',
7635
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7636 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7637 2
                    return $match[0];
7638
                }
7639
7640 5
                if ($useMbFunction === true) {
7641 5
                    if ($encoding === 'UTF-8') {
7642 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7643 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7644
                    }
7645
7646
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7647
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7648
                }
7649
7650
                return self::ucfirst(
7651
                    self::strtolower(
7652
                        $match[0],
7653
                        $encoding,
7654
                        false,
7655
                        $lang,
7656
                        $tryToKeepStringLength
7657
                    ),
7658
                    $encoding,
7659
                    false,
7660
                    $lang,
7661
                    $tryToKeepStringLength
7662
                );
7663 5
            },
7664 5
            $str
7665
        );
7666
    }
7667
7668
    /**
7669
     * Returns a trimmed string in proper title case.
7670
     *
7671
     * Also accepts an array, $ignore, allowing you to list words not to be
7672
     * capitalized.
7673
     *
7674
     * Adapted from John Gruber's script.
7675
     *
7676
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7677
     *
7678
     * @param string $str
7679
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7680
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7681
     *
7682
     * @return string the titleized string
7683
     */
7684
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7685
    {
7686 35
        $smallWords = \array_merge(
7687
            [
7688 35
                '(?<!q&)a',
7689
                'an',
7690
                'and',
7691
                'as',
7692
                'at(?!&t)',
7693
                'but',
7694
                'by',
7695
                'en',
7696
                'for',
7697
                'if',
7698
                'in',
7699
                'of',
7700
                'on',
7701
                'or',
7702
                'the',
7703
                'to',
7704
                'v[.]?',
7705
                'via',
7706
                'vs[.]?',
7707
            ],
7708 35
            $ignore
7709
        );
7710
7711 35
        $smallWordsRx = \implode('|', $smallWords);
7712 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7713
7714 35
        $str = \trim($str);
7715
7716 35
        if (self::has_lowercase($str) === false) {
7717 2
            $str = self::strtolower($str, $encoding);
7718
        }
7719
7720
        // the main substitutions
7721 35
        $str = (string) \preg_replace_callback(
7722
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7723
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7724 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7725
                        |
7726 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7727
                        |
7728 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7729
                        |
7730 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7731
                      ) (_*) \b                                                           # 6. With trailing underscore
7732
                    ~ux',
7733
            /**
7734
             * @param string[] $matches
7735
             *
7736
             * @return string
7737
             */
7738
            static function (array $matches) use ($encoding): string {
7739
                // preserve leading underscore
7740 35
                $str = $matches[1];
7741 35
                if ($matches[2]) {
7742
                    // preserve URLs, domains, emails and file paths
7743 5
                    $str .= $matches[2];
7744 35
                } elseif ($matches[3]) {
7745
                    // lower-case small words
7746 25
                    $str .= self::strtolower($matches[3], $encoding);
7747 35
                } elseif ($matches[4]) {
7748
                    // capitalize word w/o internal caps
7749 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7750
                } else {
7751
                    // preserve other kinds of word (iPhone)
7752 7
                    $str .= $matches[5];
7753
                }
7754
                // Preserve trailing underscore
7755 35
                $str .= $matches[6];
7756
7757 35
                return $str;
7758 35
            },
7759 35
            $str
7760
        );
7761
7762
        // Exceptions for small words: capitalize at start of title...
7763 35
        $str = (string) \preg_replace_callback(
7764
            '~(  \A [[:punct:]]*                # start of title...
7765
                      |  [:.;?!][ ]+               # or of subsentence...
7766
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7767 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7768
                     ~uxi',
7769
            /**
7770
             * @param string[] $matches
7771
             *
7772
             * @return string
7773
             */
7774
            static function (array $matches) use ($encoding): string {
7775 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7776 35
            },
7777 35
            $str
7778
        );
7779
7780
        // ...and end of title
7781 35
        $str = (string) \preg_replace_callback(
7782 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7783
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7784
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7785
                     ~uxi',
7786
            /**
7787
             * @param string[] $matches
7788
             *
7789
             * @return string
7790
             */
7791
            static function (array $matches) use ($encoding): string {
7792 3
                return static::str_upper_first($matches[1], $encoding);
7793 35
            },
7794 35
            $str
7795
        );
7796
7797
        // Exceptions for small words in hyphenated compound words.
7798
        // e.g. "in-flight" -> In-Flight
7799 35
        $str = (string) \preg_replace_callback(
7800
            '~\b
7801
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7802 35
                        ( ' . $smallWordsRx . ' )
7803
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7804
                       ~uxi',
7805
            /**
7806
             * @param string[] $matches
7807
             *
7808
             * @return string
7809
             */
7810
            static function (array $matches) use ($encoding): string {
7811
                return static::str_upper_first($matches[1], $encoding);
7812 35
            },
7813 35
            $str
7814
        );
7815
7816
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7817 35
        $str = (string) \preg_replace_callback(
7818
            '~\b
7819
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7820
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7821 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7822
                      (?!	- )                   # Negative lookahead for another -
7823
                     ~uxi',
7824
            /**
7825
             * @param string[] $matches
7826
             *
7827
             * @return string
7828
             */
7829
            static function (array $matches) use ($encoding): string {
7830
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7831 35
            },
7832 35
            $str
7833
        );
7834
7835 35
        return $str;
7836
    }
7837
7838
    /**
7839
     * Get a binary representation of a specific string.
7840
     *
7841
     * @param string $str <p>The input string.</p>
7842
     *
7843
     * @return string
7844
     */
7845
    public static function str_to_binary(string $str): string
7846
    {
7847 2
        $value = \unpack('H*', $str);
7848
7849 2
        return \base_convert($value[1], 16, 2);
7850
    }
7851
7852
    /**
7853
     * @param string   $str
7854
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7855
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7856
     *
7857
     * @return string[]
7858
     */
7859
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7860
    {
7861 17
        if ($str === '') {
7862 1
            return $removeEmptyValues === true ? [] : [''];
7863
        }
7864
7865 16
        if (self::$SUPPORT['mbstring'] === true) {
7866
            /** @noinspection PhpComposerExtensionStubsInspection */
7867 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7868
        } else {
7869
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7870
        }
7871
7872 16
        if ($return === false) {
7873
            return $removeEmptyValues === true ? [] : [''];
7874
        }
7875
7876
        if (
7877 16
            $removeShortValues === null
7878
            &&
7879 16
            $removeEmptyValues === false
7880
        ) {
7881 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7882
        }
7883
7884
        return self::reduce_string_array(
7885
            $return,
7886
            $removeEmptyValues,
7887
            $removeShortValues
7888
        );
7889
    }
7890
7891
    /**
7892
     * Convert a string into an array of words.
7893
     *
7894
     * @param string   $str
7895
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7896
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7897
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7898
     *
7899
     * @return string[]
7900
     */
7901
    public static function str_to_words(
7902
        string $str,
7903
        string $charList = '',
7904
        bool $removeEmptyValues = false,
7905
        int $removeShortValues = null
7906
    ): array {
7907 13
        if ($str === '') {
7908 4
            return $removeEmptyValues === true ? [] : [''];
7909
        }
7910
7911 13
        $charList = self::rxClass($charList, '\pL');
7912
7913 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7914 13
        if ($return === false) {
7915
            return $removeEmptyValues === true ? [] : [''];
7916
        }
7917
7918
        if (
7919 13
            $removeShortValues === null
7920
            &&
7921 13
            $removeEmptyValues === false
7922
        ) {
7923 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7924
        }
7925
7926 2
        $tmpReturn = self::reduce_string_array(
7927 2
            $return,
7928 2
            $removeEmptyValues,
7929 2
            $removeShortValues
7930
        );
7931
7932 2
        foreach ($tmpReturn as &$item) {
7933 2
            $item = (string) $item;
7934
        }
7935
7936 2
        return $tmpReturn;
7937
    }
7938
7939
    /**
7940
     * alias for "UTF8::to_ascii()"
7941
     *
7942
     * @see UTF8::to_ascii()
7943
     *
7944
     * @param string $str
7945
     * @param string $unknown
7946
     * @param bool   $strict
7947
     *
7948
     * @return string
7949
     */
7950
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7951
    {
7952 8
        return self::to_ascii($str, $unknown, $strict);
7953
    }
7954
7955
    /**
7956
     * Truncates the string to a given length. If $substring is provided, and
7957
     * truncating occurs, the string is further truncated so that the substring
7958
     * may be appended without exceeding the desired length.
7959
     *
7960
     * @param string $str
7961
     * @param int    $length    <p>Desired length of the truncated string.</p>
7962
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7963
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7964
     *
7965
     * @return string string after truncating
7966
     */
7967
    public static function str_truncate(
7968
        string $str,
7969
        int $length,
7970
        string $substring = '',
7971
        string $encoding = 'UTF-8'
7972
    ): string {
7973 22
        if ($str === '') {
7974
            return '';
7975
        }
7976
7977 22
        if ($encoding === 'UTF-8') {
7978 10
            if ($length >= (int) \mb_strlen($str)) {
7979 2
                return $str;
7980
            }
7981
7982 8
            if ($substring !== '') {
7983 4
                $length -= (int) \mb_strlen($substring);
7984
7985
                /** @noinspection UnnecessaryCastingInspection */
7986 4
                return (string) \mb_substr($str, 0, $length) . $substring;
7987
            }
7988
7989
            /** @noinspection UnnecessaryCastingInspection */
7990 4
            return (string) \mb_substr($str, 0, $length);
7991
        }
7992
7993 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7994
7995 12
        if ($length >= (int) self::strlen($str, $encoding)) {
7996 2
            return $str;
7997
        }
7998
7999 10
        if ($substring !== '') {
8000 6
            $length -= (int) self::strlen($substring, $encoding);
8001
        }
8002
8003
        return (
8004 10
            (string) self::substr(
8005 10
                $str,
8006 10
                0,
8007 10
                $length,
8008 10
                $encoding
8009
            )
8010 10
       ) . $substring;
8011
    }
8012
8013
    /**
8014
     * Truncates the string to a given length, while ensuring that it does not
8015
     * split words. If $substring is provided, and truncating occurs, the
8016
     * string is further truncated so that the substring may be appended without
8017
     * exceeding the desired length.
8018
     *
8019
     * @param string $str
8020
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8021
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8022
     *                                                ''</p>
8023
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8024
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8025
     *
8026
     * @return string string after truncating
8027
     */
8028
    public static function str_truncate_safe(
8029
        string $str,
8030
        int $length,
8031
        string $substring = '',
8032
        string $encoding = 'UTF-8',
8033
        bool $ignoreDoNotSplitWordsForOneWord = false
8034
    ): string {
8035 47
        if ($str === '' || $length <= 0) {
8036 1
            return $substring;
8037
        }
8038
8039 47
        if ($encoding === 'UTF-8') {
8040 21
            if ($length >= (int) \mb_strlen($str)) {
8041 5
                return $str;
8042
            }
8043
8044
            // need to further trim the string so we can append the substring
8045 17
            $length -= (int) \mb_strlen($substring);
8046 17
            if ($length <= 0) {
8047 1
                return $substring;
8048
            }
8049
8050 17
            $truncated = \mb_substr($str, 0, $length);
8051
8052 17
            if ($truncated === false) {
8053
                return '';
8054
            }
8055
8056
            // if the last word was truncated
8057 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8058 17
            if ($strPosSpace !== $length) {
8059
                // find pos of the last occurrence of a space, get up to that
8060 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8061
8062
                if (
8063 13
                    $lastPos !== false
8064
                    ||
8065 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8066
                ) {
8067 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8068
                }
8069
            }
8070
        } else {
8071 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8072
8073 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8074 4
                return $str;
8075
            }
8076
8077
            // need to further trim the string so we can append the substring
8078 22
            $length -= (int) self::strlen($substring, $encoding);
8079 22
            if ($length <= 0) {
8080
                return $substring;
8081
            }
8082
8083 22
            $truncated = self::substr($str, 0, $length, $encoding);
8084
8085 22
            if ($truncated === false) {
8086
                return '';
8087
            }
8088
8089
            // if the last word was truncated
8090 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8091 22
            if ($strPosSpace !== $length) {
8092
                // find pos of the last occurrence of a space, get up to that
8093 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8094
8095
                if (
8096 12
                    $lastPos !== false
8097
                    ||
8098 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8099
                ) {
8100 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8101
                }
8102
            }
8103
        }
8104
8105 39
        return $truncated . $substring;
8106
    }
8107
8108
    /**
8109
     * Returns a lowercase and trimmed string separated by underscores.
8110
     * Underscores are inserted before uppercase characters (with the exception
8111
     * of the first character of the string), and in place of spaces as well as
8112
     * dashes.
8113
     *
8114
     * @param string $str
8115
     *
8116
     * @return string the underscored string
8117
     */
8118
    public static function str_underscored(string $str): string
8119
    {
8120 16
        return self::str_delimit($str, '_');
8121
    }
8122
8123
    /**
8124
     * Returns an UpperCamelCase version of the supplied string. It trims
8125
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8126
     * and underscores, and removes spaces, dashes, underscores.
8127
     *
8128
     * @param string      $str                   <p>The input string.</p>
8129
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8130
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8131
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8132
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8133
     *
8134
     * @return string string in UpperCamelCase
8135
     */
8136
    public static function str_upper_camelize(
8137
        string $str,
8138
        string $encoding = 'UTF-8',
8139
        bool $cleanUtf8 = false,
8140
        string $lang = null,
8141
        bool $tryToKeepStringLength = false
8142
    ): string {
8143 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8144
    }
8145
8146
    /**
8147
     * alias for "UTF8::ucfirst()"
8148
     *
8149
     * @see UTF8::ucfirst()
8150
     *
8151
     * @param string      $str
8152
     * @param string      $encoding
8153
     * @param bool        $cleanUtf8
8154
     * @param string|null $lang
8155
     * @param bool        $tryToKeepStringLength
8156
     *
8157
     * @return string
8158
     */
8159
    public static function str_upper_first(
8160
        string $str,
8161
        string $encoding = 'UTF-8',
8162
        bool $cleanUtf8 = false,
8163
        string $lang = null,
8164
        bool $tryToKeepStringLength = false
8165
    ): string {
8166 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8167
    }
8168
8169
    /**
8170
     * Counts number of words in the UTF-8 string.
8171
     *
8172
     * @param string $str      <p>The input string.</p>
8173
     * @param int    $format   [optional] <p>
8174
     *                         <strong>0</strong> => return a number of words (default)<br>
8175
     *                         <strong>1</strong> => return an array of words<br>
8176
     *                         <strong>2</strong> => return an array of words with word-offset as key
8177
     *                         </p>
8178
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8179
     *
8180
     * @return int|string[] The number of words in the string
8181
     */
8182
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8183
    {
8184 2
        $strParts = self::str_to_words($str, $charlist);
8185
8186 2
        $len = \count($strParts);
8187
8188 2
        if ($format === 1) {
8189 2
            $numberOfWords = [];
8190 2
            for ($i = 1; $i < $len; $i += 2) {
8191 2
                $numberOfWords[] = $strParts[$i];
8192
            }
8193 2
        } elseif ($format === 2) {
8194 2
            $numberOfWords = [];
8195 2
            $offset = (int) self::strlen($strParts[0]);
8196 2
            for ($i = 1; $i < $len; $i += 2) {
8197 2
                $numberOfWords[$offset] = $strParts[$i];
8198 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8199
            }
8200
        } else {
8201 2
            $numberOfWords = (int) (($len - 1) / 2);
8202
        }
8203
8204 2
        return $numberOfWords;
8205
    }
8206
8207
    /**
8208
     * Case-insensitive string comparison.
8209
     *
8210
     * INFO: Case-insensitive version of UTF8::strcmp()
8211
     *
8212
     * @param string $str1     <p>The first string.</p>
8213
     * @param string $str2     <p>The second string.</p>
8214
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8215
     *
8216
     * @return int
8217
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8218
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8219
     *             <strong>0</strong> if they are equal
8220
     */
8221
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8222
    {
8223 23
        return self::strcmp(
8224 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8225 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8226
        );
8227
    }
8228
8229
    /**
8230
     * alias for "UTF8::strstr()"
8231
     *
8232
     * @see UTF8::strstr()
8233
     *
8234
     * @param string $haystack
8235
     * @param string $needle
8236
     * @param bool   $before_needle
8237
     * @param string $encoding
8238
     * @param bool   $cleanUtf8
8239
     *
8240
     * @return false|string
8241
     */
8242
    public static function strchr(
8243
        string $haystack,
8244
        string $needle,
8245
        bool $before_needle = false,
8246
        string $encoding = 'UTF-8',
8247
        bool $cleanUtf8 = false
8248
    ) {
8249 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8250
    }
8251
8252
    /**
8253
     * Case-sensitive string comparison.
8254
     *
8255
     * @param string $str1 <p>The first string.</p>
8256
     * @param string $str2 <p>The second string.</p>
8257
     *
8258
     * @return int
8259
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8260
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8261
     *             <strong>0</strong> if they are equal
8262
     */
8263
    public static function strcmp(string $str1, string $str2): int
8264
    {
8265 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8266 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8267 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8268
        );
8269
    }
8270
8271
    /**
8272
     * Find length of initial segment not matching mask.
8273
     *
8274
     * @param string $str
8275
     * @param string $charList
8276
     * @param int    $offset
8277
     * @param int    $length
8278
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8279
     *
8280
     * @return int
8281
     */
8282
    public static function strcspn(
8283
        string $str,
8284
        string $charList,
8285
        int $offset = null,
8286
        int $length = null,
8287
        string $encoding = 'UTF-8'
8288
    ): int {
8289 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8290
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8291
        }
8292
8293 12
        if ($charList === '') {
8294 2
            return (int) self::strlen($str, $encoding);
8295
        }
8296
8297 11
        if ($offset !== null || $length !== null) {
8298 3
            if ($encoding === 'UTF-8') {
8299 3
                if ($length === null) {
8300
                    /** @noinspection UnnecessaryCastingInspection */
8301 2
                    $strTmp = \mb_substr($str, (int) $offset);
8302
                } else {
8303
                    /** @noinspection UnnecessaryCastingInspection */
8304 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8305
                }
8306
            } else {
8307
                /** @noinspection UnnecessaryCastingInspection */
8308
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8309
            }
8310 3
            if ($strTmp === false) {
8311
                return 0;
8312
            }
8313 3
            $str = $strTmp;
8314
        }
8315
8316 11
        if ($str === '') {
8317 2
            return 0;
8318
        }
8319
8320 10
        $matches = [];
8321 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8322 9
            $return = self::strlen($matches[1], $encoding);
8323 9
            if ($return === false) {
8324
                return 0;
8325
            }
8326
8327 9
            return $return;
8328
        }
8329
8330 2
        return (int) self::strlen($str, $encoding);
8331
    }
8332
8333
    /**
8334
     * alias for "UTF8::stristr()"
8335
     *
8336
     * @see UTF8::stristr()
8337
     *
8338
     * @param string $haystack
8339
     * @param string $needle
8340
     * @param bool   $before_needle
8341
     * @param string $encoding
8342
     * @param bool   $cleanUtf8
8343
     *
8344
     * @return false|string
8345
     */
8346
    public static function strichr(
8347
        string $haystack,
8348
        string $needle,
8349
        bool $before_needle = false,
8350
        string $encoding = 'UTF-8',
8351
        bool $cleanUtf8 = false
8352
    ) {
8353 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8354
    }
8355
8356
    /**
8357
     * Create a UTF-8 string from code points.
8358
     *
8359
     * INFO: opposite to UTF8::codepoints()
8360
     *
8361
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8362
     *
8363
     * @return string UTF-8 encoded string
8364
     */
8365
    public static function string(array $array): string
8366
    {
8367 4
        return \implode(
8368 4
            '',
8369 4
            \array_map(
8370
                [
8371 4
                    self::class,
8372
                    'chr',
8373
                ],
8374 4
                $array
8375
            )
8376
        );
8377
    }
8378
8379
    /**
8380
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8381
     *
8382
     * @param string $str <p>The input string.</p>
8383
     *
8384
     * @return bool
8385
     *              <strong>true</strong> if the string has BOM at the start,<br>
8386
     *              <strong>false</strong> otherwise
8387
     */
8388
    public static function string_has_bom(string $str): bool
8389
    {
8390
        /** @noinspection PhpUnusedLocalVariableInspection */
8391 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8392 6
            if (\strpos($str, $bomString) === 0) {
8393 6
                return true;
8394
            }
8395
        }
8396
8397 6
        return false;
8398
    }
8399
8400
    /**
8401
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8402
     *
8403
     * @see http://php.net/manual/en/function.strip-tags.php
8404
     *
8405
     * @param string $str            <p>
8406
     *                               The input string.
8407
     *                               </p>
8408
     * @param string $allowable_tags [optional] <p>
8409
     *                               You can use the optional second parameter to specify tags which should
8410
     *                               not be stripped.
8411
     *                               </p>
8412
     *                               <p>
8413
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8414
     *                               can not be changed with allowable_tags.
8415
     *                               </p>
8416
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8417
     *
8418
     * @return string the stripped string
8419
     */
8420
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8421
    {
8422 4
        if ($str === '') {
8423 1
            return '';
8424
        }
8425
8426 4
        if ($cleanUtf8 === true) {
8427 2
            $str = self::clean($str);
8428
        }
8429
8430 4
        if ($allowable_tags === null) {
8431 4
            return \strip_tags($str);
8432
        }
8433
8434 2
        return \strip_tags($str, $allowable_tags);
8435
    }
8436
8437
    /**
8438
     * Strip all whitespace characters. This includes tabs and newline
8439
     * characters, as well as multibyte whitespace such as the thin space
8440
     * and ideographic space.
8441
     *
8442
     * @param string $str
8443
     *
8444
     * @return string
8445
     */
8446
    public static function strip_whitespace(string $str): string
8447
    {
8448 36
        if ($str === '') {
8449 3
            return '';
8450
        }
8451
8452 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8453
    }
8454
8455
    /**
8456
     * Finds position of first occurrence of a string within another, case insensitive.
8457
     *
8458
     * @see http://php.net/manual/en/function.mb-stripos.php
8459
     *
8460
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8461
     * @param string $needle    <p>The string to find in haystack.</p>
8462
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8463
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8464
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8465
     *
8466
     * @return false|int
8467
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8468
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8469
     */
8470
    public static function stripos(
8471
        string $haystack,
8472
        string $needle,
8473
        int $offset = 0,
8474
        $encoding = 'UTF-8',
8475
        bool $cleanUtf8 = false
8476
    ) {
8477 24
        if ($haystack === '' || $needle === '') {
8478 5
            return false;
8479
        }
8480
8481 23
        if ($cleanUtf8 === true) {
8482
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8483
            // if invalid characters are found in $haystack before $needle
8484 1
            $haystack = self::clean($haystack);
8485 1
            $needle = self::clean($needle);
8486
        }
8487
8488 23
        if (self::$SUPPORT['mbstring'] === true) {
8489 23
            if ($encoding === 'UTF-8') {
8490 23
                return \mb_stripos($haystack, $needle, $offset);
8491
            }
8492
8493 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8494
8495 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8496
        }
8497
8498 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8499
8500
        if (
8501 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8502
            &&
8503 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8504
            &&
8505 2
            self::$SUPPORT['intl'] === true
8506
        ) {
8507
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8508
            if ($returnTmp !== false) {
8509
                return $returnTmp;
8510
            }
8511
        }
8512
8513
        //
8514
        // fallback for ascii only
8515
        //
8516
8517 2
        if (self::is_ascii($haystack . $needle)) {
8518
            return \stripos($haystack, $needle, $offset);
8519
        }
8520
8521
        //
8522
        // fallback via vanilla php
8523
        //
8524
8525 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8526 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8527
8528 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8529
    }
8530
8531
    /**
8532
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8533
     *
8534
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8535
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8536
     * @param bool   $before_needle [optional] <p>
8537
     *                              If <b>TRUE</b>, it returns the part of the
8538
     *                              haystack before the first occurrence of the needle (excluding the needle).
8539
     *                              </p>
8540
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8541
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8542
     *
8543
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8544
     */
8545
    public static function stristr(
8546
        string $haystack,
8547
        string $needle,
8548
        bool $before_needle = false,
8549
        string $encoding = 'UTF-8',
8550
        bool $cleanUtf8 = false
8551
    ) {
8552 12
        if ($haystack === '' || $needle === '') {
8553 3
            return false;
8554
        }
8555
8556 9
        if ($cleanUtf8 === true) {
8557
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8558
            // if invalid characters are found in $haystack before $needle
8559 1
            $needle = self::clean($needle);
8560 1
            $haystack = self::clean($haystack);
8561
        }
8562
8563 9
        if (!$needle) {
8564
            return $haystack;
8565
        }
8566
8567 9
        if (self::$SUPPORT['mbstring'] === true) {
8568 9
            if ($encoding === 'UTF-8') {
8569 9
                return \mb_stristr($haystack, $needle, $before_needle);
8570
            }
8571
8572 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8573
8574 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8575
        }
8576
8577
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8578
8579
        if (
8580
            $encoding !== 'UTF-8'
8581
            &&
8582
            self::$SUPPORT['mbstring'] === false
8583
        ) {
8584
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8585
        }
8586
8587
        if (
8588
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8589
            &&
8590
            self::$SUPPORT['intl'] === true
8591
        ) {
8592
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8593
            if ($returnTmp !== false) {
8594
                return $returnTmp;
8595
            }
8596
        }
8597
8598
        if (self::is_ascii($needle . $haystack)) {
8599
            return \stristr($haystack, $needle, $before_needle);
8600
        }
8601
8602
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8603
8604
        if (!isset($match[1])) {
8605
            return false;
8606
        }
8607
8608
        if ($before_needle) {
8609
            return $match[1];
8610
        }
8611
8612
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8613
    }
8614
8615
    /**
8616
     * Get the string length, not the byte-length!
8617
     *
8618
     * @see     http://php.net/manual/en/function.mb-strlen.php
8619
     *
8620
     * @param string $str       <p>The string being checked for length.</p>
8621
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8622
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8623
     *
8624
     * @return false|int
8625
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8626
     *                   $encoding.
8627
     *                   (One multi-byte character counted as +1).
8628
     *                   <br>
8629
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8630
     *                   chars.
8631
     */
8632
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8633
    {
8634 173
        if ($str === '') {
8635 21
            return 0;
8636
        }
8637
8638 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8639 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8640
        }
8641
8642 171
        if ($cleanUtf8 === true) {
8643
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8644
            // if invalid characters are found in $str
8645 4
            $str = self::clean($str);
8646
        }
8647
8648
        //
8649
        // fallback via mbstring
8650
        //
8651
8652 171
        if (self::$SUPPORT['mbstring'] === true) {
8653 165
            if ($encoding === 'UTF-8') {
8654 165
                return \mb_strlen($str);
8655
            }
8656
8657 4
            return \mb_strlen($str, $encoding);
8658
        }
8659
8660
        //
8661
        // fallback for binary || ascii only
8662
        //
8663
8664
        if (
8665 8
            $encoding === 'CP850'
8666
            ||
8667 8
            $encoding === 'ASCII'
8668
        ) {
8669
            return \strlen($str);
8670
        }
8671
8672
        if (
8673 8
            $encoding !== 'UTF-8'
8674
            &&
8675 8
            self::$SUPPORT['mbstring'] === false
8676
            &&
8677 8
            self::$SUPPORT['iconv'] === false
8678
        ) {
8679 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8680
        }
8681
8682
        //
8683
        // fallback via iconv
8684
        //
8685
8686 8
        if (self::$SUPPORT['iconv'] === true) {
8687
            $returnTmp = \iconv_strlen($str, $encoding);
8688
            if ($returnTmp !== false) {
8689
                return $returnTmp;
8690
            }
8691
        }
8692
8693
        //
8694
        // fallback via intl
8695
        //
8696
8697
        if (
8698 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8699
            &&
8700 8
            self::$SUPPORT['intl'] === true
8701
        ) {
8702
            $returnTmp = \grapheme_strlen($str);
8703
            if ($returnTmp !== null) {
8704
                return $returnTmp;
8705
            }
8706
        }
8707
8708
        //
8709
        // fallback for ascii only
8710
        //
8711
8712 8
        if (self::is_ascii($str)) {
8713 4
            return \strlen($str);
8714
        }
8715
8716
        //
8717
        // fallback via vanilla php
8718
        //
8719
8720 8
        \preg_match_all('/./us', $str, $parts);
8721
8722 8
        $returnTmp = \count($parts[0]);
8723 8
        if ($returnTmp === 0) {
8724
            return false;
8725
        }
8726
8727 8
        return $returnTmp;
8728
    }
8729
8730
    /**
8731
     * Get string length in byte.
8732
     *
8733
     * @param string $str
8734
     *
8735
     * @return int
8736
     */
8737
    public static function strlen_in_byte(string $str): int
8738
    {
8739
        if ($str === '') {
8740
            return 0;
8741
        }
8742
8743
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8744
            // "mb_" is available if overload is used, so use it ...
8745
            return \mb_strlen($str, 'CP850'); // 8-BIT
8746
        }
8747
8748
        return \strlen($str);
8749
    }
8750
8751
    /**
8752
     * Case insensitive string comparisons using a "natural order" algorithm.
8753
     *
8754
     * INFO: natural order version of UTF8::strcasecmp()
8755
     *
8756
     * @param string $str1     <p>The first string.</p>
8757
     * @param string $str2     <p>The second string.</p>
8758
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8759
     *
8760
     * @return int
8761
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8762
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8763
     *             <strong>0</strong> if they are equal
8764
     */
8765
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8766
    {
8767 2
        return self::strnatcmp(
8768 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8769 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8770
        );
8771
    }
8772
8773
    /**
8774
     * String comparisons using a "natural order" algorithm
8775
     *
8776
     * INFO: natural order version of UTF8::strcmp()
8777
     *
8778
     * @see  http://php.net/manual/en/function.strnatcmp.php
8779
     *
8780
     * @param string $str1 <p>The first string.</p>
8781
     * @param string $str2 <p>The second string.</p>
8782
     *
8783
     * @return int
8784
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8785
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8786
     *             <strong>0</strong> if they are equal
8787
     */
8788
    public static function strnatcmp(string $str1, string $str2): int
8789
    {
8790 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8791
    }
8792
8793
    /**
8794
     * Case-insensitive string comparison of the first n characters.
8795
     *
8796
     * @see  http://php.net/manual/en/function.strncasecmp.php
8797
     *
8798
     * @param string $str1     <p>The first string.</p>
8799
     * @param string $str2     <p>The second string.</p>
8800
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8801
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8802
     *
8803
     * @return int
8804
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8805
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8806
     *             <strong>0</strong> if they are equal
8807
     */
8808
    public static function strncasecmp(
8809
        string $str1,
8810
        string $str2,
8811
        int $len,
8812
        string $encoding = 'UTF-8'
8813
    ): int {
8814 2
        return self::strncmp(
8815 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8816 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8817 2
            $len
8818
        );
8819
    }
8820
8821
    /**
8822
     * String comparison of the first n characters.
8823
     *
8824
     * @see  http://php.net/manual/en/function.strncmp.php
8825
     *
8826
     * @param string $str1     <p>The first string.</p>
8827
     * @param string $str2     <p>The second string.</p>
8828
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8829
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8830
     *
8831
     * @return int
8832
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8833
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8834
     *             <strong>0</strong> if they are equal
8835
     */
8836
    public static function strncmp(
8837
        string $str1,
8838
        string $str2,
8839
        int $len,
8840
        string $encoding = 'UTF-8'
8841
    ): int {
8842 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8843
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8844
        }
8845
8846 4
        if ($encoding === 'UTF-8') {
8847 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8848 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8849
        } else {
8850
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8851
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8852
        }
8853
8854 4
        return self::strcmp($str1, $str2);
8855
    }
8856
8857
    /**
8858
     * Search a string for any of a set of characters.
8859
     *
8860
     * @see  http://php.net/manual/en/function.strpbrk.php
8861
     *
8862
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8863
     * @param string $char_list <p>This parameter is case sensitive.</p>
8864
     *
8865
     * @return false|string string starting from the character found, or false if it is not found
8866
     */
8867
    public static function strpbrk(string $haystack, string $char_list)
8868
    {
8869 2
        if ($haystack === '' || $char_list === '') {
8870 2
            return false;
8871
        }
8872
8873 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8874 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8875
        }
8876
8877 2
        return false;
8878
    }
8879
8880
    /**
8881
     * Find position of first occurrence of string in a string.
8882
     *
8883
     * @see http://php.net/manual/en/function.mb-strpos.php
8884
     *
8885
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8886
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8887
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8888
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8889
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8890
     *
8891
     * @return false|int
8892
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8893
     *                   string.<br> If needle is not found it returns false.
8894
     */
8895
    public static function strpos(
8896
        string $haystack,
8897
        $needle,
8898
        int $offset = 0,
8899
        $encoding = 'UTF-8',
8900
        bool $cleanUtf8 = false
8901
    ) {
8902 53
        if ($haystack === '') {
8903 4
            return false;
8904
        }
8905
8906
        // iconv and mbstring do not support integer $needle
8907 52
        if ((int) $needle === $needle) {
8908
            $needle = (string) self::chr($needle);
8909
        }
8910 52
        $needle = (string) $needle;
8911
8912 52
        if ($needle === '') {
8913 2
            return false;
8914
        }
8915
8916 52
        if ($cleanUtf8 === true) {
8917
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8918
            // if invalid characters are found in $haystack before $needle
8919 3
            $needle = self::clean($needle);
8920 3
            $haystack = self::clean($haystack);
8921
        }
8922
8923 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8924 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8925
        }
8926
8927
        //
8928
        // fallback via mbstring
8929
        //
8930
8931 52
        if (self::$SUPPORT['mbstring'] === true) {
8932 50
            if ($encoding === 'UTF-8') {
8933 50
                return \mb_strpos($haystack, $needle, $offset);
8934
            }
8935
8936 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8937
        }
8938
8939
        //
8940
        // fallback for binary || ascii only
8941
        //
8942
        if (
8943 4
            $encoding === 'CP850'
8944
            ||
8945 4
            $encoding === 'ASCII'
8946
        ) {
8947 2
            return \strpos($haystack, $needle, $offset);
8948
        }
8949
8950
        if (
8951 4
            $encoding !== 'UTF-8'
8952
            &&
8953 4
            self::$SUPPORT['iconv'] === false
8954
            &&
8955 4
            self::$SUPPORT['mbstring'] === false
8956
        ) {
8957 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8958
        }
8959
8960
        //
8961
        // fallback via intl
8962
        //
8963
8964
        if (
8965 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8966
            &&
8967 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8968
            &&
8969 4
            self::$SUPPORT['intl'] === true
8970
        ) {
8971
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8972
            if ($returnTmp !== false) {
8973
                return $returnTmp;
8974
            }
8975
        }
8976
8977
        //
8978
        // fallback via iconv
8979
        //
8980
8981
        if (
8982 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
8983
            &&
8984 4
            self::$SUPPORT['iconv'] === true
8985
        ) {
8986
            // ignore invalid negative offset to keep compatibility
8987
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8988
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8989
            if ($returnTmp !== false) {
8990
                return $returnTmp;
8991
            }
8992
        }
8993
8994
        //
8995
        // fallback for ascii only
8996
        //
8997
8998 4
        if (self::is_ascii($haystack . $needle)) {
8999 2
            return \strpos($haystack, $needle, $offset);
9000
        }
9001
9002
        //
9003
        // fallback via vanilla php
9004
        //
9005
9006 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9007 4
        if ($haystackTmp === false) {
9008
            $haystackTmp = '';
9009
        }
9010 4
        $haystack = (string) $haystackTmp;
9011
9012 4
        if ($offset < 0) {
9013
            $offset = 0;
9014
        }
9015
9016 4
        $pos = \strpos($haystack, $needle);
9017 4
        if ($pos === false) {
9018 2
            return false;
9019
        }
9020
9021 4
        if ($pos) {
9022 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9023
        }
9024
9025 2
        return $offset + 0;
9026
    }
9027
9028
    /**
9029
     * Find position of first occurrence of string in a string.
9030
     *
9031
     * @param string $haystack <p>
9032
     *                         The string being checked.
9033
     *                         </p>
9034
     * @param string $needle   <p>
9035
     *                         The position counted from the beginning of haystack.
9036
     *                         </p>
9037
     * @param int    $offset   [optional] <p>
9038
     *                         The search offset. If it is not specified, 0 is used.
9039
     *                         </p>
9040
     *
9041
     * @return false|int The numeric position of the first occurrence of needle in the
9042
     *                   haystack string. If needle is not found, it returns false.
9043
     */
9044
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9045
    {
9046
        if ($haystack === '' || $needle === '') {
9047
            return false;
9048
        }
9049
9050
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9051
            // "mb_" is available if overload is used, so use it ...
9052
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9053
        }
9054
9055
        return \strpos($haystack, $needle, $offset);
9056
    }
9057
9058
    /**
9059
     * Finds the last occurrence of a character in a string within another.
9060
     *
9061
     * @see http://php.net/manual/en/function.mb-strrchr.php
9062
     *
9063
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9064
     * @param string $needle        <p>The string to find in haystack</p>
9065
     * @param bool   $before_needle [optional] <p>
9066
     *                              Determines which portion of haystack
9067
     *                              this function returns.
9068
     *                              If set to true, it returns all of haystack
9069
     *                              from the beginning to the last occurrence of needle.
9070
     *                              If set to false, it returns all of haystack
9071
     *                              from the last occurrence of needle to the end,
9072
     *                              </p>
9073
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9074
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9075
     *
9076
     * @return false|string the portion of haystack or false if needle is not found
9077
     */
9078
    public static function strrchr(
9079
        string $haystack,
9080
        string $needle,
9081
        bool $before_needle = false,
9082
        string $encoding = 'UTF-8',
9083
        bool $cleanUtf8 = false
9084
    ) {
9085 2
        if ($haystack === '' || $needle === '') {
9086 2
            return false;
9087
        }
9088
9089 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9090 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9091
        }
9092
9093 2
        if ($cleanUtf8 === true) {
9094
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9095
            // if invalid characters are found in $haystack before $needle
9096 2
            $needle = self::clean($needle);
9097 2
            $haystack = self::clean($haystack);
9098
        }
9099
9100
        //
9101
        // fallback via mbstring
9102
        //
9103
9104 2
        if (self::$SUPPORT['mbstring'] === true) {
9105 2
            if ($encoding === 'UTF-8') {
9106 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9107
            }
9108
9109 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9110
        }
9111
9112
        //
9113
        // fallback for binary || ascii only
9114
        //
9115
9116
        if (
9117
            $before_needle === false
9118
            &&
9119
            (
9120
                $encoding === 'CP850'
9121
                ||
9122
                $encoding === 'ASCII'
9123
            )
9124
        ) {
9125
            return \strrchr($haystack, $needle);
9126
        }
9127
9128
        if (
9129
            $encoding !== 'UTF-8'
9130
            &&
9131
            self::$SUPPORT['mbstring'] === false
9132
        ) {
9133
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9134
        }
9135
9136
        //
9137
        // fallback via iconv
9138
        //
9139
9140
        if (self::$SUPPORT['iconv'] === true) {
9141
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9142
            if ($needleTmp === false) {
9143
                return false;
9144
            }
9145
            $needle = (string) $needleTmp;
9146
9147
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9148
            if ($pos === false) {
9149
                return false;
9150
            }
9151
9152
            if ($before_needle) {
9153
                return self::substr($haystack, 0, $pos, $encoding);
9154
            }
9155
9156
            return self::substr($haystack, $pos, null, $encoding);
9157
        }
9158
9159
        //
9160
        // fallback via vanilla php
9161
        //
9162
9163
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9164
        if ($needleTmp === false) {
9165
            return false;
9166
        }
9167
        $needle = (string) $needleTmp;
9168
9169
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9170
        if ($pos === false) {
9171
            return false;
9172
        }
9173
9174
        if ($before_needle) {
9175
            return self::substr($haystack, 0, $pos, $encoding);
9176
        }
9177
9178
        return self::substr($haystack, $pos, null, $encoding);
9179
    }
9180
9181
    /**
9182
     * Reverses characters order in the string.
9183
     *
9184
     * @param string $str      <p>The input string.</p>
9185
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9186
     *
9187
     * @return string the string with characters in the reverse sequence
9188
     */
9189
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9190
    {
9191 10
        if ($str === '') {
9192 4
            return '';
9193
        }
9194
9195
        // init
9196 8
        $reversed = '';
9197
9198 8
        $str = self::emoji_encode($str, true);
9199
9200 8
        if ($encoding === 'UTF-8') {
9201 8
            if (self::$SUPPORT['intl'] === true) {
9202
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9203 8
                $i = (int) \grapheme_strlen($str);
9204 8
                while ($i--) {
9205 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9206 8
                    if ($reversedTmp !== false) {
9207 8
                        $reversed .= $reversedTmp;
9208
                    }
9209
                }
9210
            } else {
9211
                $i = (int) \mb_strlen($str);
9212 8
                while ($i--) {
9213
                    $reversedTmp = \mb_substr($str, $i, 1);
9214
                    if ($reversedTmp !== false) {
9215
                        $reversed .= $reversedTmp;
9216
                    }
9217
                }
9218
            }
9219
        } else {
9220
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9221
9222
            $i = (int) self::strlen($str, $encoding);
9223
            while ($i--) {
9224
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9225
                if ($reversedTmp !== false) {
9226
                    $reversed .= $reversedTmp;
9227
                }
9228
            }
9229
        }
9230
9231 8
        return self::emoji_decode($reversed, true);
9232
    }
9233
9234
    /**
9235
     * Finds the last occurrence of a character in a string within another, case insensitive.
9236
     *
9237
     * @see http://php.net/manual/en/function.mb-strrichr.php
9238
     *
9239
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9240
     * @param string $needle        <p>The string to find in haystack.</p>
9241
     * @param bool   $before_needle [optional] <p>
9242
     *                              Determines which portion of haystack
9243
     *                              this function returns.
9244
     *                              If set to true, it returns all of haystack
9245
     *                              from the beginning to the last occurrence of needle.
9246
     *                              If set to false, it returns all of haystack
9247
     *                              from the last occurrence of needle to the end,
9248
     *                              </p>
9249
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9250
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9251
     *
9252
     * @return false|string the portion of haystack or<br>false if needle is not found
9253
     */
9254
    public static function strrichr(
9255
        string $haystack,
9256
        string $needle,
9257
        bool $before_needle = false,
9258
        string $encoding = 'UTF-8',
9259
        bool $cleanUtf8 = false
9260
    ) {
9261 3
        if ($haystack === '' || $needle === '') {
9262 2
            return false;
9263
        }
9264
9265 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9266 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9267
        }
9268
9269 3
        if ($cleanUtf8 === true) {
9270
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9271
            // if invalid characters are found in $haystack before $needle
9272 2
            $needle = self::clean($needle);
9273 2
            $haystack = self::clean($haystack);
9274
        }
9275
9276
        //
9277
        // fallback via mbstring
9278
        //
9279
9280 3
        if (self::$SUPPORT['mbstring'] === true) {
9281 3
            if ($encoding === 'UTF-8') {
9282 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9283
            }
9284
9285 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9286
        }
9287
9288
        //
9289
        // fallback via vanilla php
9290
        //
9291
9292
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9293
        if ($needleTmp === false) {
9294
            return false;
9295
        }
9296
        $needle = (string) $needleTmp;
9297
9298
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9299
        if ($pos === false) {
9300
            return false;
9301
        }
9302
9303
        if ($before_needle) {
9304
            return self::substr($haystack, 0, $pos, $encoding);
9305
        }
9306
9307
        return self::substr($haystack, $pos, null, $encoding);
9308
    }
9309
9310
    /**
9311
     * Find position of last occurrence of a case-insensitive string.
9312
     *
9313
     * @param string     $haystack  <p>The string to look in.</p>
9314
     * @param int|string $needle    <p>The string to look for.</p>
9315
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9316
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9317
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9318
     *
9319
     * @return false|int
9320
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9321
     *                   string.<br>If needle is not found, it returns false.
9322
     */
9323
    public static function strripos(
9324
        string $haystack,
9325
        $needle,
9326
        int $offset = 0,
9327
        string $encoding = 'UTF-8',
9328
        bool $cleanUtf8 = false
9329
    ) {
9330 3
        if ($haystack === '') {
9331
            return false;
9332
        }
9333
9334
        // iconv and mbstring do not support integer $needle
9335 3
        if ((int) $needle === $needle && $needle >= 0) {
9336
            $needle = (string) self::chr($needle);
9337
        }
9338 3
        $needle = (string) $needle;
9339
9340 3
        if ($needle === '') {
9341
            return false;
9342
        }
9343
9344 3
        if ($cleanUtf8 === true) {
9345
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9346 2
            $needle = self::clean($needle);
9347 2
            $haystack = self::clean($haystack);
9348
        }
9349
9350 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9351 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9352
        }
9353
9354
        //
9355
        // fallback via mbstrig
9356
        //
9357
9358 3
        if (self::$SUPPORT['mbstring'] === true) {
9359 3
            if ($encoding === 'UTF-8') {
9360 3
                return \mb_strripos($haystack, $needle, $offset);
9361
            }
9362
9363
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9364
        }
9365
9366
        //
9367
        // fallback for binary || ascii only
9368
        //
9369
9370
        if (
9371
            $encoding === 'CP850'
9372
            ||
9373
            $encoding === 'ASCII'
9374
        ) {
9375
            return \strripos($haystack, $needle, $offset);
9376
        }
9377
9378
        if (
9379
            $encoding !== 'UTF-8'
9380
            &&
9381
            self::$SUPPORT['mbstring'] === false
9382
        ) {
9383
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9384
        }
9385
9386
        //
9387
        // fallback via intl
9388
        //
9389
9390
        if (
9391
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9392
            &&
9393
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9394
            &&
9395
            self::$SUPPORT['intl'] === true
9396
        ) {
9397
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9398
            if ($returnTmp !== false) {
9399
                return $returnTmp;
9400
            }
9401
        }
9402
9403
        //
9404
        // fallback for ascii only
9405
        //
9406
9407
        if (self::is_ascii($haystack . $needle)) {
9408
            return \strripos($haystack, $needle, $offset);
9409
        }
9410
9411
        //
9412
        // fallback via vanilla php
9413
        //
9414
9415
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9416
        $needle = self::strtocasefold($needle, true, false, $encoding);
9417
9418
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9419
    }
9420
9421
    /**
9422
     * Finds position of last occurrence of a string within another, case insensitive.
9423
     *
9424
     * @param string $haystack <p>
9425
     *                         The string from which to get the position of the last occurrence
9426
     *                         of needle.
9427
     *                         </p>
9428
     * @param string $needle   <p>
9429
     *                         The string to find in haystack.
9430
     *                         </p>
9431
     * @param int    $offset   [optional] <p>
9432
     *                         The position in haystack
9433
     *                         to start searching.
9434
     *                         </p>
9435
     *
9436
     * @return false|int return the numeric position of the last occurrence of needle in the
9437
     *                   haystack string, or false if needle is not found
9438
     */
9439
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9440
    {
9441
        if ($haystack === '' || $needle === '') {
9442
            return false;
9443
        }
9444
9445
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9446
            // "mb_" is available if overload is used, so use it ...
9447
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9448
        }
9449
9450
        return \strripos($haystack, $needle, $offset);
9451
    }
9452
9453
    /**
9454
     * Find position of last occurrence of a string in a string.
9455
     *
9456
     * @see http://php.net/manual/en/function.mb-strrpos.php
9457
     *
9458
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9459
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9460
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9461
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9462
     *                              the end of the string.
9463
     *                              </p>
9464
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9465
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9466
     *
9467
     * @return false|int
9468
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9469
     *                   string.<br>If needle is not found, it returns false.
9470
     */
9471
    public static function strrpos(
9472
        string $haystack,
9473
        $needle,
9474
        int $offset = 0,
9475
        string $encoding = 'UTF-8',
9476
        bool $cleanUtf8 = false
9477
    ) {
9478 35
        if ($haystack === '') {
9479 3
            return false;
9480
        }
9481
9482
        // iconv and mbstring do not support integer $needle
9483 34
        if ((int) $needle === $needle && $needle >= 0) {
9484 2
            $needle = (string) self::chr($needle);
9485
        }
9486 34
        $needle = (string) $needle;
9487
9488 34
        if ($needle === '' || $haystack === '') {
9489 2
            return false;
9490
        }
9491
9492 34
        if ($cleanUtf8 === true) {
9493
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9494 4
            $needle = self::clean($needle);
9495 4
            $haystack = self::clean($haystack);
9496
        }
9497
9498 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9499 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9500
        }
9501
9502
        //
9503
        // fallback via mbstring
9504
        //
9505
9506 34
        if (self::$SUPPORT['mbstring'] === true) {
9507 34
            if ($encoding === 'UTF-8') {
9508 34
                return \mb_strrpos($haystack, $needle, $offset);
9509
            }
9510
9511 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9512
        }
9513
9514
        //
9515
        // fallback for binary || ascii only
9516
        //
9517
9518
        if (
9519
            $encoding === 'CP850'
9520
            ||
9521
            $encoding === 'ASCII'
9522
        ) {
9523
            return \strrpos($haystack, $needle, $offset);
9524
        }
9525
9526
        if (
9527
            $encoding !== 'UTF-8'
9528
            &&
9529
            self::$SUPPORT['mbstring'] === false
9530
        ) {
9531
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9532
        }
9533
9534
        //
9535
        // fallback via intl
9536
        //
9537
9538
        if (
9539
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9540
            &&
9541
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9542
            &&
9543
            self::$SUPPORT['intl'] === true
9544
        ) {
9545
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9546
            if ($returnTmp !== false) {
9547
                return $returnTmp;
9548
            }
9549
        }
9550
9551
        //
9552
        // fallback for ascii only
9553
        //
9554
9555
        if (self::is_ascii($haystack . $needle)) {
9556
            return \strrpos($haystack, $needle, $offset);
9557
        }
9558
9559
        //
9560
        // fallback via vanilla php
9561
        //
9562
9563
        $haystackTmp = null;
9564
        if ($offset > 0) {
9565
            $haystackTmp = self::substr($haystack, $offset);
9566
        } elseif ($offset < 0) {
9567
            $haystackTmp = self::substr($haystack, 0, $offset);
9568
            $offset = 0;
9569
        }
9570
9571
        if ($haystackTmp !== null) {
9572
            if ($haystackTmp === false) {
9573
                $haystackTmp = '';
9574
            }
9575
            $haystack = (string) $haystackTmp;
9576
        }
9577
9578
        $pos = \strrpos($haystack, $needle);
9579
        if ($pos === false) {
9580
            return false;
9581
        }
9582
9583
        $strTmp = \substr($haystack, 0, $pos);
9584
        if ($strTmp === false) {
9585
            return false;
9586
        }
9587
9588
        return $offset + (int) self::strlen($strTmp);
9589
    }
9590
9591
    /**
9592
     * Find position of last occurrence of a string in a string.
9593
     *
9594
     * @param string $haystack <p>
9595
     *                         The string being checked, for the last occurrence
9596
     *                         of needle.
9597
     *                         </p>
9598
     * @param string $needle   <p>
9599
     *                         The string to find in haystack.
9600
     *                         </p>
9601
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9602
     *                         the string. Negative values will stop searching at an arbitrary point
9603
     *                         prior to the end of the string.
9604
     *
9605
     * @return false|int The numeric position of the last occurrence of needle in the
9606
     *                   haystack string. If needle is not found, it returns false.
9607
     */
9608
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9609
    {
9610
        if ($haystack === '' || $needle === '') {
9611
            return false;
9612
        }
9613
9614
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9615
            // "mb_" is available if overload is used, so use it ...
9616
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9617
        }
9618
9619
        return \strrpos($haystack, $needle, $offset);
9620
    }
9621
9622
    /**
9623
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9624
     * mask.
9625
     *
9626
     * @param string $str      <p>The input string.</p>
9627
     * @param string $mask     <p>The mask of chars</p>
9628
     * @param int    $offset   [optional]
9629
     * @param int    $length   [optional]
9630
     * @param string $encoding [optional] <p>Set the charset.</p>
9631
     *
9632
     * @return false|int
9633
     */
9634
    public static function strspn(
9635
        string $str,
9636
        string $mask,
9637
        int $offset = 0,
9638
        int $length = null,
9639
        string $encoding = 'UTF-8'
9640
    ) {
9641 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9642
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9643
        }
9644
9645 10
        if ($offset || $length !== null) {
9646 2
            if ($encoding === 'UTF-8') {
9647 2
                if ($length === null) {
9648
                    $str = (string) \mb_substr($str, $offset);
9649
                } else {
9650 2
                    $str = (string) \mb_substr($str, $offset, $length);
9651
                }
9652
            } else {
9653
                $str = (string) self::substr($str, $offset, $length, $encoding);
9654
            }
9655
        }
9656
9657 10
        if ($str === '' || $mask === '') {
9658 2
            return 0;
9659
        }
9660
9661 8
        $matches = [];
9662
9663 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9664
    }
9665
9666
    /**
9667
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9668
     *
9669
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9670
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9671
     * @param bool   $before_needle [optional] <p>
9672
     *                              If <b>TRUE</b>, strstr() returns the part of the
9673
     *                              haystack before the first occurrence of the needle (excluding the needle).
9674
     *                              </p>
9675
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9676
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9677
     *
9678
     * @return false|string
9679
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9680
     */
9681
    public static function strstr(
9682
        string $haystack,
9683
        string $needle,
9684
        bool $before_needle = false,
9685
        string $encoding = 'UTF-8',
9686
        $cleanUtf8 = false
9687
    ) {
9688 3
        if ($haystack === '' || $needle === '') {
9689 2
            return false;
9690
        }
9691
9692 3
        if ($cleanUtf8 === true) {
9693
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9694
            // if invalid characters are found in $haystack before $needle
9695
            $needle = self::clean($needle);
9696
            $haystack = self::clean($haystack);
9697
        }
9698
9699 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9700 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9701
        }
9702
9703
        //
9704
        // fallback via mbstring
9705
        //
9706
9707 3
        if (self::$SUPPORT['mbstring'] === true) {
9708 3
            if ($encoding === 'UTF-8') {
9709 3
                return \mb_strstr($haystack, $needle, $before_needle);
9710
            }
9711
9712 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9713
        }
9714
9715
        //
9716
        // fallback for binary || ascii only
9717
        //
9718
9719
        if (
9720
            $encoding === 'CP850'
9721
            ||
9722
            $encoding === 'ASCII'
9723
        ) {
9724
            return \strstr($haystack, $needle, $before_needle);
9725
        }
9726
9727
        if (
9728
            $encoding !== 'UTF-8'
9729
            &&
9730
            self::$SUPPORT['mbstring'] === false
9731
        ) {
9732
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9733
        }
9734
9735
        //
9736
        // fallback via intl
9737
        //
9738
9739
        if (
9740
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9741
            &&
9742
            self::$SUPPORT['intl'] === true
9743
        ) {
9744
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9745
            if ($returnTmp !== false) {
9746
                return $returnTmp;
9747
            }
9748
        }
9749
9750
        //
9751
        // fallback for ascii only
9752
        //
9753
9754
        if (self::is_ascii($haystack . $needle)) {
9755
            return \strstr($haystack, $needle, $before_needle);
9756
        }
9757
9758
        //
9759
        // fallback via vanilla php
9760
        //
9761
9762
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9763
9764
        if (!isset($match[1])) {
9765
            return false;
9766
        }
9767
9768
        if ($before_needle) {
9769
            return $match[1];
9770
        }
9771
9772
        return self::substr($haystack, (int) self::strlen($match[1]));
9773
    }
9774
9775
    /**
9776
     *  * Finds first occurrence of a string within another.
9777
     *
9778
     * @param string $haystack      <p>
9779
     *                              The string from which to get the first occurrence
9780
     *                              of needle.
9781
     *                              </p>
9782
     * @param string $needle        <p>
9783
     *                              The string to find in haystack.
9784
     *                              </p>
9785
     * @param bool   $before_needle [optional] <p>
9786
     *                              Determines which portion of haystack
9787
     *                              this function returns.
9788
     *                              If set to true, it returns all of haystack
9789
     *                              from the beginning to the first occurrence of needle.
9790
     *                              If set to false, it returns all of haystack
9791
     *                              from the first occurrence of needle to the end,
9792
     *                              </p>
9793
     *
9794
     * @return false|string the portion of haystack,
9795
     *                      or false if needle is not found
9796
     */
9797
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9798
    {
9799
        if ($haystack === '' || $needle === '') {
9800
            return false;
9801
        }
9802
9803
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9804
            // "mb_" is available if overload is used, so use it ...
9805
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9806
        }
9807
9808
        return \strstr($haystack, $needle, $before_needle);
9809
    }
9810
9811
    /**
9812
     * Unicode transformation for case-less matching.
9813
     *
9814
     * @see http://unicode.org/reports/tr21/tr21-5.html
9815
     *
9816
     * @param string      $str       <p>The input string.</p>
9817
     * @param bool        $full      [optional] <p>
9818
     *                               <b>true</b>, replace full case folding chars (default)<br>
9819
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9820
     *                               </p>
9821
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9822
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9823
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9824
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9825
     *                               is for some languages better ...</p>
9826
     *
9827
     * @return string
9828
     */
9829
    public static function strtocasefold(
9830
        string $str,
9831
        bool $full = true,
9832
        bool $cleanUtf8 = false,
9833
        string $encoding = 'UTF-8',
9834
        string $lang = null,
9835
        $lower = true
9836
    ): string {
9837 32
        if ($str === '') {
9838 5
            return '';
9839
        }
9840
9841 31
        if ($cleanUtf8 === true) {
9842
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9843
            // if invalid characters are found in $haystack before $needle
9844 2
            $str = self::clean($str);
9845
        }
9846
9847 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9848
9849 31
        if ($lang === null && $encoding === 'UTF-8') {
9850 31
            if ($lower === true) {
9851 2
                return \mb_strtolower($str);
9852
            }
9853
9854 29
            return \mb_strtoupper($str);
9855
        }
9856
9857 2
        if ($lower === true) {
9858
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9859
        }
9860
9861 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9862
    }
9863
9864
    /**
9865
     * Make a string lowercase.
9866
     *
9867
     * @see http://php.net/manual/en/function.mb-strtolower.php
9868
     *
9869
     * @param string      $str                   <p>The string being lowercased.</p>
9870
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9871
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9872
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9873
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9874
     *
9875
     * @return string
9876
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9877
     */
9878
    public static function strtolower(
9879
        $str,
9880
        string $encoding = 'UTF-8',
9881
        bool $cleanUtf8 = false,
9882
        string $lang = null,
9883
        bool $tryToKeepStringLength = false
9884
    ): string {
9885
        // init
9886 73
        $str = (string) $str;
9887
9888 73
        if ($str === '') {
9889 1
            return '';
9890
        }
9891
9892 72
        if ($cleanUtf8 === true) {
9893
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9894
            // if invalid characters are found in $haystack before $needle
9895 2
            $str = self::clean($str);
9896
        }
9897
9898
        // hack for old php version or for the polyfill ...
9899 72
        if ($tryToKeepStringLength === true) {
9900
            $str = self::fixStrCaseHelper($str, true);
9901
        }
9902
9903 72
        if ($lang === null && $encoding === 'UTF-8') {
9904 13
            return \mb_strtolower($str);
9905
        }
9906
9907 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9908
9909 61
        if ($lang !== null) {
9910 2
            if (self::$SUPPORT['intl'] === true) {
9911 2
                $langCode = $lang . '-Lower';
9912 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9913
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9914
9915
                    $langCode = 'Any-Lower';
9916
                }
9917
9918
                /** @noinspection PhpComposerExtensionStubsInspection */
9919
                /** @noinspection UnnecessaryCastingInspection */
9920 2
                return (string) \transliterator_transliterate($langCode, $str);
9921
            }
9922
9923
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9924
        }
9925
9926
        // always fallback via symfony polyfill
9927 61
        return \mb_strtolower($str, $encoding);
9928
    }
9929
9930
    /**
9931
     * Make a string uppercase.
9932
     *
9933
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9934
     *
9935
     * @param string      $str                   <p>The string being uppercased.</p>
9936
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9937
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9938
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9939
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9940
     *
9941
     * @return string
9942
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9943
     */
9944
    public static function strtoupper(
9945
        $str,
9946
        string $encoding = 'UTF-8',
9947
        bool $cleanUtf8 = false,
9948
        string $lang = null,
9949
        bool $tryToKeepStringLength = false
9950
    ): string {
9951
        // init
9952 17
        $str = (string) $str;
9953
9954 17
        if ($str === '') {
9955 1
            return '';
9956
        }
9957
9958 16
        if ($cleanUtf8 === true) {
9959
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9960
            // if invalid characters are found in $haystack before $needle
9961 2
            $str = self::clean($str);
9962
        }
9963
9964
        // hack for old php version or for the polyfill ...
9965 16
        if ($tryToKeepStringLength === true) {
9966 2
            $str = self::fixStrCaseHelper($str, false);
9967
        }
9968
9969 16
        if ($lang === null && $encoding === 'UTF-8') {
9970 8
            return \mb_strtoupper($str);
9971
        }
9972
9973 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9974
9975 10
        if ($lang !== null) {
9976 2
            if (self::$SUPPORT['intl'] === true) {
9977 2
                $langCode = $lang . '-Upper';
9978 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9979
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9980
9981
                    $langCode = 'Any-Upper';
9982
                }
9983
9984
                /** @noinspection PhpComposerExtensionStubsInspection */
9985
                /** @noinspection UnnecessaryCastingInspection */
9986 2
                return (string) \transliterator_transliterate($langCode, $str);
9987
            }
9988
9989
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9990
        }
9991
9992
        // always fallback via symfony polyfill
9993 10
        return \mb_strtoupper($str, $encoding);
9994
    }
9995
9996
    /**
9997
     * Translate characters or replace sub-strings.
9998
     *
9999
     * @see  http://php.net/manual/en/function.strtr.php
10000
     *
10001
     * @param string          $str  <p>The string being translated.</p>
10002
     * @param string|string[] $from <p>The string replacing from.</p>
10003
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10004
     *
10005
     * @return string
10006
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10007
     *                corresponding character in to
10008
     */
10009
    public static function strtr(string $str, $from, $to = ''): string
10010
    {
10011 2
        if ($str === '') {
10012
            return '';
10013
        }
10014
10015 2
        if ($from === $to) {
10016
            return $str;
10017
        }
10018
10019 2
        if ($to !== '') {
10020 2
            $from = self::str_split($from);
10021 2
            $to = self::str_split($to);
10022 2
            $countFrom = \count($from);
10023 2
            $countTo = \count($to);
10024
10025 2
            if ($countFrom > $countTo) {
10026 2
                $from = \array_slice($from, 0, $countTo);
10027 2
            } elseif ($countFrom < $countTo) {
10028 2
                $to = \array_slice($to, 0, $countFrom);
10029
            }
10030
10031 2
            $from = \array_combine($from, $to);
10032 2
            if ($from === false) {
10033
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10034
            }
10035
        }
10036
10037 2
        if (\is_string($from)) {
10038 2
            return \str_replace($from, '', $str);
10039
        }
10040
10041 2
        return \strtr($str, $from);
10042
    }
10043
10044
    /**
10045
     * Return the width of a string.
10046
     *
10047
     * @param string $str       <p>The input string.</p>
10048
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10049
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10050
     *
10051
     * @return int
10052
     */
10053
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10054
    {
10055 2
        if ($str === '') {
10056 2
            return 0;
10057
        }
10058
10059 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10060 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10061
        }
10062
10063 2
        if ($cleanUtf8 === true) {
10064
            // iconv and mbstring are not tolerant to invalid encoding
10065
            // further, their behaviour is inconsistent with that of PHP's substr
10066 2
            $str = self::clean($str);
10067
        }
10068
10069
        //
10070
        // fallback via mbstring
10071
        //
10072
10073 2
        if (self::$SUPPORT['mbstring'] === true) {
10074 2
            if ($encoding === 'UTF-8') {
10075 2
                return \mb_strwidth($str);
10076
            }
10077
10078
            return \mb_strwidth($str, $encoding);
10079
        }
10080
10081
        //
10082
        // fallback via vanilla php
10083
        //
10084
10085
        if ($encoding !== 'UTF-8') {
10086
            $str = self::encode('UTF-8', $str, false, $encoding);
10087
        }
10088
10089
        $wide = 0;
10090
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10091
10092
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10093
    }
10094
10095
    /**
10096
     * Get part of a string.
10097
     *
10098
     * @see http://php.net/manual/en/function.mb-substr.php
10099
     *
10100
     * @param string $str       <p>The string being checked.</p>
10101
     * @param int    $offset    <p>The first position used in str.</p>
10102
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10103
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10104
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10105
     *
10106
     * @return false|string
10107
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10108
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10109
     *                      characters long, <b>FALSE</b> will be returned.
10110
     */
10111
    public static function substr(
10112
        string $str,
10113
        int $offset = 0,
10114
        int $length = null,
10115
        string $encoding = 'UTF-8',
10116
        bool $cleanUtf8 = false
10117
    ) {
10118
        // empty string
10119 172
        if ($str === '' || $length === 0) {
10120 8
            return '';
10121
        }
10122
10123 168
        if ($cleanUtf8 === true) {
10124
            // iconv and mbstring are not tolerant to invalid encoding
10125
            // further, their behaviour is inconsistent with that of PHP's substr
10126 2
            $str = self::clean($str);
10127
        }
10128
10129
        // whole string
10130 168
        if (!$offset && $length === null) {
10131 7
            return $str;
10132
        }
10133
10134 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10135 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10136
        }
10137
10138
        //
10139
        // fallback via mbstring
10140
        //
10141
10142 163
        if (self::$SUPPORT['mbstring'] === true) {
10143 161
            if ($encoding === 'UTF-8') {
10144 161
                if ($length === null) {
10145 64
                    return \mb_substr($str, $offset);
10146
                }
10147
10148 102
                return \mb_substr($str, $offset, $length);
10149
            }
10150
10151
            return self::substr($str, $offset, $length, $encoding);
10152
        }
10153
10154
        //
10155
        // fallback for binary || ascii only
10156
        //
10157
10158
        if (
10159 4
            $encoding === 'CP850'
10160
            ||
10161 4
            $encoding === 'ASCII'
10162
        ) {
10163
            if ($length === null) {
10164
                return \substr($str, $offset);
10165
            }
10166
10167
            return \substr($str, $offset, $length);
10168
        }
10169
10170
        // otherwise we need the string-length
10171 4
        $str_length = 0;
10172 4
        if ($offset || $length === null) {
10173 4
            $str_length = self::strlen($str, $encoding);
10174
        }
10175
10176
        // e.g.: invalid chars + mbstring not installed
10177 4
        if ($str_length === false) {
10178
            return false;
10179
        }
10180
10181
        // empty string
10182 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10183
            return '';
10184
        }
10185
10186
        // impossible
10187 4
        if ($offset && $offset > $str_length) {
10188
            return '';
10189
        }
10190
10191 4
        if ($length === null) {
10192 4
            $length = (int) $str_length;
10193
        } else {
10194 2
            $length = (int) $length;
10195
        }
10196
10197
        if (
10198 4
            $encoding !== 'UTF-8'
10199
            &&
10200 4
            self::$SUPPORT['mbstring'] === false
10201
        ) {
10202 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10203
        }
10204
10205
        //
10206
        // fallback via intl
10207
        //
10208
10209
        if (
10210 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10211
            &&
10212 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10213
            &&
10214 4
            self::$SUPPORT['intl'] === true
10215
        ) {
10216
            $returnTmp = \grapheme_substr($str, $offset, $length);
10217
            if ($returnTmp !== false) {
10218
                return $returnTmp;
10219
            }
10220
        }
10221
10222
        //
10223
        // fallback via iconv
10224
        //
10225
10226
        if (
10227 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10228
            &&
10229 4
            self::$SUPPORT['iconv'] === true
10230
        ) {
10231
            $returnTmp = \iconv_substr($str, $offset, $length);
10232
            if ($returnTmp !== false) {
10233
                return $returnTmp;
10234
            }
10235
        }
10236
10237
        //
10238
        // fallback for ascii only
10239
        //
10240
10241 4
        if (self::is_ascii($str)) {
10242
            return \substr($str, $offset, $length);
10243
        }
10244
10245
        //
10246
        // fallback via vanilla php
10247
        //
10248
10249
        // split to array, and remove invalid characters
10250 4
        $array = self::str_split($str);
10251
10252
        // extract relevant part, and join to make sting again
10253 4
        return \implode('', \array_slice($array, $offset, $length));
10254
    }
10255
10256
    /**
10257
     * Binary safe comparison of two strings from an offset, up to length characters.
10258
     *
10259
     * @param string   $str1               <p>The main string being compared.</p>
10260
     * @param string   $str2               <p>The secondary string being compared.</p>
10261
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10262
     *                                     counting from the end of the string.</p>
10263
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10264
     *                                     of the length of the str compared to the length of main_str less the
10265
     *                                     offset.</p>
10266
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10267
     *                                     insensitive.</p>
10268
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10269
     *
10270
     * @return int
10271
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10272
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10273
     *             <strong>0</strong> if they are equal
10274
     */
10275
    public static function substr_compare(
10276
        string $str1,
10277
        string $str2,
10278
        int $offset = 0,
10279
        int $length = null,
10280
        bool $case_insensitivity = false,
10281
        string $encoding = 'UTF-8'
10282
    ): int {
10283
        if (
10284 2
            $offset !== 0
10285
            ||
10286 2
            $length !== null
10287
        ) {
10288 2
            if ($encoding === 'UTF-8') {
10289 2
                if ($length === null) {
10290 2
                    $str1 = (string) \mb_substr($str1, $offset);
10291
                } else {
10292 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10293
                }
10294 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10295
            } else {
10296
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10297
10298
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10299
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10300
            }
10301
        }
10302
10303 2
        if ($case_insensitivity === true) {
10304 2
            return self::strcasecmp($str1, $str2, $encoding);
10305
        }
10306
10307 2
        return self::strcmp($str1, $str2);
10308
    }
10309
10310
    /**
10311
     * Count the number of substring occurrences.
10312
     *
10313
     * @see  http://php.net/manual/en/function.substr-count.php
10314
     *
10315
     * @param string $haystack  <p>The string to search in.</p>
10316
     * @param string $needle    <p>The substring to search for.</p>
10317
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10318
     * @param int    $length    [optional] <p>
10319
     *                          The maximum length after the specified offset to search for the
10320
     *                          substring. It outputs a warning if the offset plus the length is
10321
     *                          greater than the haystack length.
10322
     *                          </p>
10323
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10324
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10325
     *
10326
     * @return false|int this functions returns an integer or false if there isn't a string
10327
     */
10328
    public static function substr_count(
10329
        string $haystack,
10330
        string $needle,
10331
        int $offset = 0,
10332
        int $length = null,
10333
        string $encoding = 'UTF-8',
10334
        bool $cleanUtf8 = false
10335
    ) {
10336 5
        if ($haystack === '' || $needle === '') {
10337 2
            return false;
10338
        }
10339
10340 5
        if ($length === 0) {
10341 2
            return 0;
10342
        }
10343
10344 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10345 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10346
        }
10347
10348 5
        if ($cleanUtf8 === true) {
10349
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10350
            // if invalid characters are found in $haystack before $needle
10351
            $needle = self::clean($needle);
10352
            $haystack = self::clean($haystack);
10353
        }
10354
10355 5
        if ($offset || $length > 0) {
10356 2
            if ($length === null) {
10357 2
                $lengthTmp = self::strlen($haystack, $encoding);
10358 2
                if ($lengthTmp === false) {
10359
                    return false;
10360
                }
10361 2
                $length = (int) $lengthTmp;
10362
            }
10363
10364 2
            if ($encoding === 'UTF-8') {
10365 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10366
            } else {
10367 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10368
            }
10369
        }
10370
10371
        if (
10372 5
            $encoding !== 'UTF-8'
10373
            &&
10374 5
            self::$SUPPORT['mbstring'] === false
10375
        ) {
10376
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10377
        }
10378
10379 5
        if (self::$SUPPORT['mbstring'] === true) {
10380 5
            if ($encoding === 'UTF-8') {
10381 5
                return \mb_substr_count($haystack, $needle);
10382
            }
10383
10384 2
            return \mb_substr_count($haystack, $needle, $encoding);
10385
        }
10386
10387
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10388
10389
        return \count($matches);
10390
    }
10391
10392
    /**
10393
     * Count the number of substring occurrences.
10394
     *
10395
     * @param string $haystack <p>
10396
     *                         The string being checked.
10397
     *                         </p>
10398
     * @param string $needle   <p>
10399
     *                         The string being found.
10400
     *                         </p>
10401
     * @param int    $offset   [optional] <p>
10402
     *                         The offset where to start counting
10403
     *                         </p>
10404
     * @param int    $length   [optional] <p>
10405
     *                         The maximum length after the specified offset to search for the
10406
     *                         substring. It outputs a warning if the offset plus the length is
10407
     *                         greater than the haystack length.
10408
     *                         </p>
10409
     *
10410
     * @return false|int the number of times the
10411
     *                   needle substring occurs in the
10412
     *                   haystack string
10413
     */
10414
    public static function substr_count_in_byte(
10415
        string $haystack,
10416
        string $needle,
10417
        int $offset = 0,
10418
        int $length = null
10419
    ) {
10420
        if ($haystack === '' || $needle === '') {
10421
            return 0;
10422
        }
10423
10424
        if (
10425
            ($offset || $length !== null)
10426
            &&
10427
            self::$SUPPORT['mbstring_func_overload'] === true
10428
        ) {
10429
            if ($length === null) {
10430
                $lengthTmp = self::strlen($haystack);
10431
                if ($lengthTmp === false) {
10432
                    return false;
10433
                }
10434
                $length = (int) $lengthTmp;
10435
            }
10436
10437
            if (
10438
                (
10439
                    $length !== 0
10440
                    &&
10441
                    $offset !== 0
10442
                )
10443
                &&
10444
                ($length + $offset) <= 0
10445
                &&
10446
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10447
            ) {
10448
                return false;
10449
            }
10450
10451
            $haystackTmp = \substr($haystack, $offset, $length);
10452
            if ($haystackTmp === false) {
10453
                $haystackTmp = '';
10454
            }
10455
            $haystack = (string) $haystackTmp;
10456
        }
10457
10458
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10459
            // "mb_" is available if overload is used, so use it ...
10460
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10461
        }
10462
10463
        if ($length === null) {
10464
            return \substr_count($haystack, $needle, $offset);
10465
        }
10466
10467
        return \substr_count($haystack, $needle, $offset, $length);
10468
    }
10469
10470
    /**
10471
     * Returns the number of occurrences of $substring in the given string.
10472
     * By default, the comparison is case-sensitive, but can be made insensitive
10473
     * by setting $caseSensitive to false.
10474
     *
10475
     * @param string $str           <p>The input string.</p>
10476
     * @param string $substring     <p>The substring to search for.</p>
10477
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10478
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10479
     *
10480
     * @return int
10481
     */
10482
    public static function substr_count_simple(
10483
        string $str,
10484
        string $substring,
10485
        bool $caseSensitive = true,
10486
        string $encoding = 'UTF-8'
10487
    ): int {
10488 15
        if ($str === '' || $substring === '') {
10489 2
            return 0;
10490
        }
10491
10492 13
        if ($encoding === 'UTF-8') {
10493 7
            if ($caseSensitive) {
10494
                return (int) \mb_substr_count($str, $substring);
10495
            }
10496
10497 7
            return (int) \mb_substr_count(
10498 7
                \mb_strtoupper($str),
10499 7
                \mb_strtoupper($substring)
10500
10501
            );
10502
        }
10503
10504 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10505
10506 6
        if ($caseSensitive) {
10507 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10508
        }
10509
10510 3
        return (int) \mb_substr_count(
10511 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10512 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10513 3
            $encoding
10514
        );
10515
    }
10516
10517
    /**
10518
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10519
     *
10520
     * @param string $haystack <p>The string to search in.</p>
10521
     * @param string $needle   <p>The substring to search for.</p>
10522
     *
10523
     * @return string return the sub-string
10524
     */
10525
    public static function substr_ileft(string $haystack, string $needle): string
10526
    {
10527 2
        if ($haystack === '') {
10528 2
            return '';
10529
        }
10530
10531 2
        if ($needle === '') {
10532 2
            return $haystack;
10533
        }
10534
10535 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10536 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10537
        }
10538
10539 2
        return $haystack;
10540
    }
10541
10542
    /**
10543
     * Get part of a string process in bytes.
10544
     *
10545
     * @param string $str    <p>The string being checked.</p>
10546
     * @param int    $offset <p>The first position used in str.</p>
10547
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10548
     *
10549
     * @return false|string
10550
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10551
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10552
     *                      characters long, <b>FALSE</b> will be returned.
10553
     */
10554
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10555
    {
10556
        // empty string
10557
        if ($str === '' || $length === 0) {
10558
            return '';
10559
        }
10560
10561
        // whole string
10562
        if (!$offset && $length === null) {
10563
            return $str;
10564
        }
10565
10566
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10567
            // "mb_" is available if overload is used, so use it ...
10568
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10569
        }
10570
10571
        return \substr($str, $offset, $length ?? 2147483647);
10572
    }
10573
10574
    /**
10575
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10576
     *
10577
     * @param string $haystack <p>The string to search in.</p>
10578
     * @param string $needle   <p>The substring to search for.</p>
10579
     *
10580
     * @return string return the sub-string
10581
     */
10582
    public static function substr_iright(string $haystack, string $needle): string
10583
    {
10584 2
        if ($haystack === '') {
10585 2
            return '';
10586
        }
10587
10588 2
        if ($needle === '') {
10589 2
            return $haystack;
10590
        }
10591
10592 2
        if (self::str_iends_with($haystack, $needle) === true) {
10593 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10594
        }
10595
10596 2
        return $haystack;
10597
    }
10598
10599
    /**
10600
     * Removes an prefix ($needle) from start of the string ($haystack).
10601
     *
10602
     * @param string $haystack <p>The string to search in.</p>
10603
     * @param string $needle   <p>The substring to search for.</p>
10604
     *
10605
     * @return string return the sub-string
10606
     */
10607
    public static function substr_left(string $haystack, string $needle): string
10608
    {
10609 2
        if ($haystack === '') {
10610 2
            return '';
10611
        }
10612
10613 2
        if ($needle === '') {
10614 2
            return $haystack;
10615
        }
10616
10617 2
        if (self::str_starts_with($haystack, $needle) === true) {
10618 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10619
        }
10620
10621 2
        return $haystack;
10622
    }
10623
10624
    /**
10625
     * Replace text within a portion of a string.
10626
     *
10627
     * source: https://gist.github.com/stemar/8287074
10628
     *
10629
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10630
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10631
     * @param int|int[]       $offset      <p>
10632
     *                                     If start is positive, the replacing will begin at the start'th offset
10633
     *                                     into string.
10634
     *                                     <br><br>
10635
     *                                     If start is negative, the replacing will begin at the start'th character
10636
     *                                     from the end of string.
10637
     *                                     </p>
10638
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10639
     *                                     portion of string which is to be replaced. If it is negative, it
10640
     *                                     represents the number of characters from the end of string at which to
10641
     *                                     stop replacing. If it is not given, then it will default to strlen(
10642
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10643
     *                                     length is zero then this function will have the effect of inserting
10644
     *                                     replacement into string at the given start offset.</p>
10645
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10646
     *
10647
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10648
     */
10649
    public static function substr_replace(
10650
        $str,
10651
        $replacement,
10652
        $offset,
10653
        $length = null,
10654
        string $encoding = 'UTF-8'
10655
    ) {
10656 10
        if (\is_array($str) === true) {
10657 1
            $num = \count($str);
10658
10659
            // the replacement
10660 1
            if (\is_array($replacement) === true) {
10661 1
                $replacement = \array_slice($replacement, 0, $num);
10662
            } else {
10663 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10664
            }
10665
10666
            // the offset
10667 1
            if (\is_array($offset) === true) {
10668 1
                $offset = \array_slice($offset, 0, $num);
10669 1
                foreach ($offset as &$valueTmp) {
10670 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10671
                }
10672 1
                unset($valueTmp);
10673
            } else {
10674 1
                $offset = \array_pad([$offset], $num, $offset);
10675
            }
10676
10677
            // the length
10678 1
            if ($length === null) {
10679 1
                $length = \array_fill(0, $num, 0);
10680 1
            } elseif (\is_array($length) === true) {
10681 1
                $length = \array_slice($length, 0, $num);
10682 1
                foreach ($length as &$valueTmpV2) {
10683 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10684
                }
10685 1
                unset($valueTmpV2);
10686
            } else {
10687 1
                $length = \array_pad([$length], $num, $length);
10688
            }
10689
10690
            // recursive call
10691 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10692
        }
10693
10694 10
        if (\is_array($replacement) === true) {
10695 1
            if (\count($replacement) > 0) {
10696 1
                $replacement = $replacement[0];
10697
            } else {
10698 1
                $replacement = '';
10699
            }
10700
        }
10701
10702
        // init
10703 10
        $str = (string) $str;
10704 10
        $replacement = (string) $replacement;
10705
10706 10
        if (\is_array($length) === true) {
10707
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10708
        }
10709
10710 10
        if (\is_array($offset) === true) {
10711
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10712
        }
10713
10714 10
        if ($str === '') {
10715 1
            return $replacement;
10716
        }
10717
10718 9
        if (self::$SUPPORT['mbstring'] === true) {
10719 9
            $string_length = (int) self::strlen($str, $encoding);
10720
10721 9
            if ($offset < 0) {
10722 1
                $offset = (int) \max(0, $string_length + $offset);
10723 9
            } elseif ($offset > $string_length) {
10724 1
                $offset = $string_length;
10725
            }
10726
10727 9
            if ($length !== null && $length < 0) {
10728 1
                $length = (int) \max(0, $string_length - $offset + $length);
10729 9
            } elseif ($length === null || $length > $string_length) {
10730 4
                $length = $string_length;
10731
            }
10732
10733
            /** @noinspection AdditionOperationOnArraysInspection */
10734 9
            if (($offset + $length) > $string_length) {
10735 4
                $length = $string_length - $offset;
10736
            }
10737
10738
            /** @noinspection AdditionOperationOnArraysInspection */
10739 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10740 9
                   $replacement .
10741 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10742
        }
10743
10744
        //
10745
        // fallback for ascii only
10746
        //
10747
10748
        if (self::is_ascii($str)) {
10749
            return ($length === null) ?
10750
                \substr_replace($str, $replacement, $offset) :
10751
                \substr_replace($str, $replacement, $offset, $length);
10752
        }
10753
10754
        //
10755
        // fallback via vanilla php
10756
        //
10757
10758
        \preg_match_all('/./us', $str, $smatches);
10759
        \preg_match_all('/./us', $replacement, $rmatches);
10760
10761
        if ($length === null) {
10762
            $lengthTmp = self::strlen($str, $encoding);
10763
            if ($lengthTmp === false) {
10764
                // e.g.: non mbstring support + invalid chars
10765
                return '';
10766
            }
10767
            $length = (int) $lengthTmp;
10768
        }
10769
10770
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10771
10772
        return \implode('', $smatches[0]);
10773
    }
10774
10775
    /**
10776
     * Removes an suffix ($needle) from end of the string ($haystack).
10777
     *
10778
     * @param string $haystack <p>The string to search in.</p>
10779
     * @param string $needle   <p>The substring to search for.</p>
10780
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10781
     *
10782
     * @return string return the sub-string
10783
     */
10784
    public static function substr_right(
10785
        string $haystack,
10786
        string $needle,
10787
        string $encoding = 'UTF-8'
10788
    ): string {
10789 2
        if ($haystack === '') {
10790 2
            return '';
10791
        }
10792
10793 2
        if ($needle === '') {
10794 2
            return $haystack;
10795
        }
10796
10797
        if (
10798 2
            $encoding === 'UTF-8'
10799
            &&
10800 2
            \substr($haystack, -\strlen($needle)) === $needle
10801
        ) {
10802 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10803
        }
10804
10805 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10806
            return (string) self::substr(
10807
                $haystack,
10808
                0,
10809
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10810
                $encoding
10811
            );
10812
        }
10813
10814 2
        return $haystack;
10815
    }
10816
10817
    /**
10818
     * Returns a case swapped version of the string.
10819
     *
10820
     * @param string $str       <p>The input string.</p>
10821
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10822
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10823
     *
10824
     * @return string each character's case swapped
10825
     */
10826
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10827
    {
10828 6
        if ($str === '') {
10829 1
            return '';
10830
        }
10831
10832 6
        if ($cleanUtf8 === true) {
10833
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10834
            // if invalid characters are found in $haystack before $needle
10835 2
            $str = self::clean($str);
10836
        }
10837
10838 6
        if ($encoding === 'UTF-8') {
10839 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10840
        }
10841
10842 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10843
    }
10844
10845
    /**
10846
     * Checks whether symfony-polyfills are used.
10847
     *
10848
     * @return bool
10849
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10850
     */
10851
    public static function symfony_polyfill_used(): bool
10852
    {
10853
        // init
10854
        $return = false;
10855
10856
        $returnTmp = \extension_loaded('mbstring');
10857
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10858
            $return = true;
10859
        }
10860
10861
        $returnTmp = \extension_loaded('iconv');
10862
        if ($returnTmp === false && \function_exists('iconv')) {
10863
            $return = true;
10864
        }
10865
10866
        return $return;
10867
    }
10868
10869
    /**
10870
     * @param string $str
10871
     * @param int    $tabLength
10872
     *
10873
     * @return string
10874
     */
10875
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10876
    {
10877 6
        if ($tabLength === 4) {
10878 3
            $spaces = '    ';
10879 3
        } elseif ($tabLength === 2) {
10880 1
            $spaces = '  ';
10881
        } else {
10882 2
            $spaces = \str_repeat(' ', $tabLength);
10883
        }
10884
10885 6
        return \str_replace("\t", $spaces, $str);
10886
    }
10887
10888
    /**
10889
     * Converts the first character of each word in the string to uppercase
10890
     * and all other chars to lowercase.
10891
     *
10892
     * @param string      $str                   <p>The input string.</p>
10893
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10894
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10895
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10896
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10897
     *
10898
     * @return string string with all characters of $str being title-cased
10899
     */
10900
    public static function titlecase(
10901
        string $str,
10902
        string $encoding = 'UTF-8',
10903
        bool $cleanUtf8 = false,
10904
        string $lang = null,
10905
        bool $tryToKeepStringLength = false
10906
    ): string {
10907 5
        if ($cleanUtf8 === true) {
10908
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10909
            // if invalid characters are found in $haystack before $needle
10910
            $str = self::clean($str);
10911
        }
10912
10913 5
        if ($lang === null && $tryToKeepStringLength === false) {
10914 5
            if ($encoding === 'UTF-8') {
10915 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10916
            }
10917
10918 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10919
10920 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10921
        }
10922
10923
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10924
    }
10925
10926
    /**
10927
     * alias for "UTF8::to_ascii()"
10928
     *
10929
     * @see        UTF8::to_ascii()
10930
     *
10931
     * @param string $str
10932
     * @param string $subst_chr
10933
     * @param bool   $strict
10934
     *
10935
     * @return string
10936
     *
10937
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10938
     */
10939
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10940
    {
10941 7
        return self::to_ascii($str, $subst_chr, $strict);
10942
    }
10943
10944
    /**
10945
     * alias for "UTF8::to_iso8859()"
10946
     *
10947
     * @see        UTF8::to_iso8859()
10948
     *
10949
     * @param string|string[] $str
10950
     *
10951
     * @return string|string[]
10952
     *
10953
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10954
     */
10955
    public static function toIso8859($str)
10956
    {
10957 2
        return self::to_iso8859($str);
10958
    }
10959
10960
    /**
10961
     * alias for "UTF8::to_latin1()"
10962
     *
10963
     * @see        UTF8::to_latin1()
10964
     *
10965
     * @param string|string[] $str
10966
     *
10967
     * @return string|string[]
10968
     *
10969
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10970
     */
10971
    public static function toLatin1($str)
10972
    {
10973 2
        return self::to_latin1($str);
10974
    }
10975
10976
    /**
10977
     * alias for "UTF8::to_utf8()"
10978
     *
10979
     * @see        UTF8::to_utf8()
10980
     *
10981
     * @param string|string[] $str
10982
     *
10983
     * @return string|string[]
10984
     *
10985
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10986
     */
10987
    public static function toUTF8($str)
10988
    {
10989 2
        return self::to_utf8($str);
10990
    }
10991
10992
    /**
10993
     * Convert a string into ASCII.
10994
     *
10995
     * @param string $str     <p>The input string.</p>
10996
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10997
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10998
     *                        performance</p>
10999
     *
11000
     * @return string
11001
     */
11002
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11003
    {
11004 38
        static $UTF8_TO_ASCII;
11005
11006 38
        if ($str === '') {
11007 3
            return '';
11008
        }
11009
11010
        // check if we only have ASCII, first (better performance)
11011 35
        if (self::is_ascii($str) === true) {
11012 9
            return $str;
11013
        }
11014
11015 28
        $str = self::clean(
11016 28
            $str,
11017 28
            true,
11018 28
            true,
11019 28
            true,
11020 28
            false,
11021 28
            true,
11022 28
            true
11023
        );
11024
11025
        // check again, if we only have ASCII, now ...
11026 28
        if (self::is_ascii($str) === true) {
11027 10
            return $str;
11028
        }
11029
11030
        if (
11031 19
            $strict === true
11032
            &&
11033 19
            self::$SUPPORT['intl'] === true
11034
        ) {
11035
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11036
            /** @noinspection PhpComposerExtensionStubsInspection */
11037
            /** @noinspection UnnecessaryCastingInspection */
11038 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11039
11040
            // check again, if we only have ASCII, now ...
11041 1
            if (self::is_ascii($str) === true) {
11042 1
                return $str;
11043
            }
11044
        }
11045
11046 19
        if (self::$ORD === null) {
11047
            self::$ORD = self::getData('ord');
11048
        }
11049
11050 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11051 19
        $chars = $ar[0];
11052 19
        $ord = null;
11053 19
        foreach ($chars as &$c) {
11054 19
            $ordC0 = self::$ORD[$c[0]];
11055
11056 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11057 15
                continue;
11058
            }
11059
11060 19
            $ordC1 = self::$ORD[$c[1]];
11061
11062
            // ASCII - next please
11063 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11064 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11065
            }
11066
11067 19
            if ($ordC0 >= 224) {
11068 8
                $ordC2 = self::$ORD[$c[2]];
11069
11070 8
                if ($ordC0 <= 239) {
11071 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11072
                }
11073
11074 8
                if ($ordC0 >= 240) {
11075 2
                    $ordC3 = self::$ORD[$c[3]];
11076
11077 2
                    if ($ordC0 <= 247) {
11078 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11079
                    }
11080
11081 2
                    if ($ordC0 >= 248) {
11082
                        $ordC4 = self::$ORD[$c[4]];
11083
11084
                        if ($ordC0 <= 251) {
11085
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11086
                        }
11087
11088
                        if ($ordC0 >= 252) {
11089
                            $ordC5 = self::$ORD[$c[5]];
11090
11091
                            if ($ordC0 <= 253) {
11092
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11093
                            }
11094
                        }
11095
                    }
11096
                }
11097
            }
11098
11099 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11100
                $c = $unknown;
11101
11102
                continue;
11103
            }
11104
11105 19
            if ($ord === null) {
11106
                $c = $unknown;
11107
11108
                continue;
11109
            }
11110
11111 19
            $bank = $ord >> 8;
11112 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11113 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11114 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11115 2
                    $UTF8_TO_ASCII[$bank] = [];
11116
                }
11117
            }
11118
11119 19
            $newchar = $ord & 255;
11120
11121
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11122 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11123
11124
                // keep for debugging
11125
                /*
11126
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11127
                echo "char: " . $c . "\n";
11128
                echo "ord: " . $ord . "\n";
11129
                echo "newchar: " . $newchar . "\n";
11130
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11131
                echo "bank:" . $bank . "\n\n";
11132
                 */
11133
11134 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11135
            } else {
11136
11137
                // keep for debugging missing chars
11138
                /*
11139
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11140
                echo "char: " . $c . "\n";
11141
                echo "ord: " . $ord . "\n";
11142
                echo "newchar: " . $newchar . "\n";
11143
                echo "bank:" . $bank . "\n\n";
11144
                 */
11145
11146 19
                $c = $unknown;
11147
            }
11148
        }
11149
11150 19
        return \implode('', $chars);
11151
    }
11152
11153
    /**
11154
     * @param mixed $str
11155
     *
11156
     * @return bool
11157
     */
11158
    public static function to_boolean($str): bool
11159
    {
11160
        // init
11161 19
        $str = (string) $str;
11162
11163 19
        if ($str === '') {
11164 2
            return false;
11165
        }
11166
11167
        // Info: http://php.net/manual/en/filter.filters.validate.php
11168
        $map = [
11169 17
            'true'  => true,
11170
            '1'     => true,
11171
            'on'    => true,
11172
            'yes'   => true,
11173
            'false' => false,
11174
            '0'     => false,
11175
            'off'   => false,
11176
            'no'    => false,
11177
        ];
11178
11179 17
        if (isset($map[$str])) {
11180 11
            return $map[$str];
11181
        }
11182
11183 6
        $key = \strtolower($str);
11184 6
        if (isset($map[$key])) {
11185 2
            return $map[$key];
11186
        }
11187
11188 4
        if (\is_numeric($str)) {
11189 2
            return ((float) $str + 0) > 0;
11190
        }
11191
11192 2
        return (bool) \trim($str);
11193
    }
11194
11195
    /**
11196
     * Convert given string to safe filename (and keep string case).
11197
     *
11198
     * @param string $string
11199
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11200
     *                                  simply replaced with hyphen.
11201
     * @param string $fallback_char
11202
     *
11203
     * @return string
11204
     */
11205
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11206
    {
11207 1
        if ($use_transliterate === true) {
11208 1
            $string = self::str_transliterate($string, $fallback_char);
11209
        }
11210
11211 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11212
11213 1
        $string = (string) \preg_replace(
11214
            [
11215 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11216 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
11217 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
11218
            ],
11219
            [
11220 1
                '',
11221 1
                $fallback_char,
11222 1
                $fallback_char,
11223
            ],
11224 1
            $string
11225
        );
11226
11227
        // trim "$fallback_char" from beginning and end of the string
11228 1
        return \trim($string, $fallback_char);
11229
    }
11230
11231
    /**
11232
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11233
     *
11234
     * @param string|string[] $str
11235
     *
11236
     * @return string|string[]
11237
     */
11238
    public static function to_iso8859($str)
11239
    {
11240 8
        if (\is_array($str) === true) {
11241 2
            foreach ($str as $k => &$v) {
11242 2
                $v = self::to_iso8859($v);
11243
            }
11244
11245 2
            return $str;
11246
        }
11247
11248 8
        $str = (string) $str;
11249 8
        if ($str === '') {
11250 2
            return '';
11251
        }
11252
11253 8
        return self::utf8_decode($str);
11254
    }
11255
11256
    /**
11257
     * alias for "UTF8::to_iso8859()"
11258
     *
11259
     * @see UTF8::to_iso8859()
11260
     *
11261
     * @param string|string[] $str
11262
     *
11263
     * @return string|string[]
11264
     */
11265
    public static function to_latin1($str)
11266
    {
11267 2
        return self::to_iso8859($str);
11268
    }
11269
11270
    /**
11271
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11272
     *
11273
     * <ul>
11274
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11275
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11276
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11277
     * case.</li>
11278
     * </ul>
11279
     *
11280
     * @param string|string[] $str                    <p>Any string or array.</p>
11281
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11282
     *
11283
     * @return string|string[] the UTF-8 encoded string
11284
     */
11285
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11286
    {
11287 37
        if (\is_array($str) === true) {
11288 4
            foreach ($str as $k => &$v) {
11289 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11290
            }
11291
11292 4
            return $str;
11293
        }
11294
11295 37
        $str = (string) $str;
11296 37
        if ($str === '') {
11297 6
            return $str;
11298
        }
11299
11300 37
        $max = \strlen($str);
11301 37
        $buf = '';
11302
11303 37
        for ($i = 0; $i < $max; ++$i) {
11304 37
            $c1 = $str[$i];
11305
11306 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11307
11308 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11309
11310 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11311
11312 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11313 16
                        $buf .= $c1 . $c2;
11314 16
                        ++$i;
11315
                    } else { // not valid UTF8 - convert it
11316 30
                        $buf .= self::to_utf8_convert_helper($c1);
11317
                    }
11318 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11319
11320 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11321 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11322
11323 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11324 14
                        $buf .= $c1 . $c2 . $c3;
11325 14
                        $i += 2;
11326
                    } else { // not valid UTF8 - convert it
11327 32
                        $buf .= self::to_utf8_convert_helper($c1);
11328
                    }
11329 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11330
11331 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11332 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11333 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11334
11335 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11336 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11337 8
                        $i += 3;
11338
                    } else { // not valid UTF8 - convert it
11339 25
                        $buf .= self::to_utf8_convert_helper($c1);
11340
                    }
11341
                } else { // doesn't look like UTF8, but should be converted
11342
11343 33
                    $buf .= self::to_utf8_convert_helper($c1);
11344
                }
11345 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11346
11347 3
                $buf .= self::to_utf8_convert_helper($c1);
11348
            } else { // it doesn't need conversion
11349
11350 34
                $buf .= $c1;
11351
            }
11352
        }
11353
11354
        // decode unicode escape sequences + unicode surrogate pairs
11355 37
        $buf = \preg_replace_callback(
11356 37
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11357
            /**
11358
             * @param array $matches
11359
             *
11360
             * @return string
11361
             */
11362
            static function (array $matches): string {
11363 9
                if (isset($matches[3])) {
11364 9
                    $cp = (int) \hexdec($matches[3]);
11365
                } else {
11366
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11367
                    $cp = ((int) \hexdec($matches[1]) << 10)
11368
                          + (int) \hexdec($matches[2])
11369
                          + 0x10000
11370
                          - (0xD800 << 10)
11371
                          - 0xDC00;
11372
                }
11373
11374
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11375
                //
11376
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11377
11378 9
                if ($cp < 0x80) {
11379 7
                    return (string) self::chr($cp);
11380
                }
11381
11382 6
                if ($cp < 0xA0) {
11383
                    /** @noinspection UnnecessaryCastingInspection */
11384
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11385
                }
11386
11387 6
                return self::decimal_to_chr($cp);
11388 37
            },
11389 37
            $buf
11390
        );
11391
11392 37
        if ($buf === null) {
11393
            return '';
11394
        }
11395
11396
        // decode UTF-8 codepoints
11397 37
        if ($decodeHtmlEntityToUtf8 === true) {
11398 2
            $buf = self::html_entity_decode($buf);
11399
        }
11400
11401 37
        return $buf;
11402
    }
11403
11404
    /**
11405
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11406
     *
11407
     * INFO: This is slower then "trim()"
11408
     *
11409
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11410
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11411
     *
11412
     * @param string      $str   <p>The string to be trimmed</p>
11413
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11414
     *
11415
     * @return string the trimmed string
11416
     */
11417
    public static function trim(string $str = '', string $chars = null): string
11418
    {
11419 55
        if ($str === '') {
11420 9
            return '';
11421
        }
11422
11423 48
        if ($chars) {
11424 27
            $chars = \preg_quote($chars, '/');
11425 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11426
        } else {
11427 21
            $pattern = "^[\s]+|[\s]+\$";
11428
        }
11429
11430 48
        if (self::$SUPPORT['mbstring'] === true) {
11431
            /** @noinspection PhpComposerExtensionStubsInspection */
11432 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11433
        }
11434
11435 8
        return self::regex_replace($str, $pattern, '', '', '/');
11436
    }
11437
11438
    /**
11439
     * Makes string's first char uppercase.
11440
     *
11441
     * @param string      $str                   <p>The input string.</p>
11442
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11443
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11444
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11445
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11446
     *
11447
     * @return string the resulting string
11448
     */
11449
    public static function ucfirst(
11450
        string $str,
11451
        string $encoding = 'UTF-8',
11452
        bool $cleanUtf8 = false,
11453
        string $lang = null,
11454
        bool $tryToKeepStringLength = false
11455
    ): string {
11456 69
        if ($str === '') {
11457 3
            return '';
11458
        }
11459
11460 68
        if ($cleanUtf8 === true) {
11461
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11462
            // if invalid characters are found in $haystack before $needle
11463 1
            $str = self::clean($str);
11464
        }
11465
11466 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11467
11468 68
        if ($encoding === 'UTF-8') {
11469 22
            $strPartTwo = (string) \mb_substr($str, 1);
11470
11471 22
            if ($useMbFunction === true) {
11472 22
                $strPartOne = \mb_strtoupper(
11473 22
                    (string) \mb_substr($str, 0, 1)
11474
                );
11475
            } else {
11476
                $strPartOne = self::strtoupper(
11477
                    (string) \mb_substr($str, 0, 1),
11478
                    $encoding,
11479
                    false,
11480
                    $lang,
11481 22
                    $tryToKeepStringLength
11482
                );
11483
            }
11484
        } else {
11485 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11486
11487 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11488
11489 47
            if ($useMbFunction === true) {
11490 47
                $strPartOne = \mb_strtoupper(
11491 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11492 47
                    $encoding
11493
                );
11494
            } else {
11495
                $strPartOne = self::strtoupper(
11496
                    (string) self::substr($str, 0, 1, $encoding),
11497
                    $encoding,
11498
                    false,
11499
                    $lang,
11500
                    $tryToKeepStringLength
11501
                );
11502
            }
11503
        }
11504
11505 68
        return $strPartOne . $strPartTwo;
11506
    }
11507
11508
    /**
11509
     * alias for "UTF8::ucfirst()"
11510
     *
11511
     * @see UTF8::ucfirst()
11512
     *
11513
     * @param string $str
11514
     * @param string $encoding
11515
     * @param bool   $cleanUtf8
11516
     *
11517
     * @return string
11518
     */
11519
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11520
    {
11521 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11522
    }
11523
11524
    /**
11525
     * Uppercase for all words in the string.
11526
     *
11527
     * @param string   $str        <p>The input string.</p>
11528
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11529
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11530
     *                             word.</p>
11531
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11532
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11533
     *
11534
     * @return string
11535
     */
11536
    public static function ucwords(
11537
        string $str,
11538
        array $exceptions = [],
11539
        string $charlist = '',
11540
        string $encoding = 'UTF-8',
11541
        bool $cleanUtf8 = false
11542
    ): string {
11543 8
        if (!$str) {
11544 2
            return '';
11545
        }
11546
11547
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11548
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11549
11550 7
        if ($cleanUtf8 === true) {
11551
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11552
            // if invalid characters are found in $haystack before $needle
11553 1
            $str = self::clean($str);
11554
        }
11555
11556 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11557
11558
        if (
11559 7
            $usePhpDefaultFunctions === true
11560
            &&
11561 7
            self::is_ascii($str) === true
11562
        ) {
11563
            return \ucwords($str);
11564
        }
11565
11566 7
        $words = self::str_to_words($str, $charlist);
11567 7
        $useExceptions = \count($exceptions) > 0;
11568
11569 7
        foreach ($words as &$word) {
11570 7
            if (!$word) {
11571 7
                continue;
11572
            }
11573
11574
            if (
11575 7
                $useExceptions === false
11576
                ||
11577 7
                !\in_array($word, $exceptions, true)
11578
            ) {
11579 7
                $word = self::ucfirst($word, $encoding);
11580
            }
11581
        }
11582
11583 7
        return \implode('', $words);
11584
    }
11585
11586
    /**
11587
     * Multi decode html entity & fix urlencoded-win1252-chars.
11588
     *
11589
     * e.g:
11590
     * 'test+test'                     => 'test test'
11591
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11592
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11593
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11594
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11595
     * 'Düsseldorf'                   => 'Düsseldorf'
11596
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11597
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11598
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11599
     *
11600
     * @param string $str          <p>The input string.</p>
11601
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11602
     *
11603
     * @return string
11604
     */
11605
    public static function urldecode(string $str, bool $multi_decode = true): string
11606
    {
11607 2
        if ($str === '') {
11608 2
            return '';
11609
        }
11610
11611 2
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11612 2
        if (\preg_match($pattern, $str)) {
11613 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11614
        }
11615
11616 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11617
11618 2
        if ($multi_decode === true) {
11619
            do {
11620 2
                $str_compare = $str;
11621
11622
                /**
11623
                 * @psalm-suppress PossiblyInvalidArgument
11624
                 */
11625 2
                $str = self::fix_simple_utf8(
11626 2
                    \urldecode(
11627 2
                        self::html_entity_decode(
11628 2
                            self::to_utf8($str),
11629 2
                            $flags
11630
                        )
11631
                    )
11632
                );
11633 2
            } while ($str_compare !== $str);
11634
        }
11635
11636 2
        return $str;
11637
    }
11638
11639
    /**
11640
     * Return a array with "urlencoded"-win1252 -> UTF-8
11641
     *
11642
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11643
     *
11644
     * @return string[]
11645
     */
11646
    public static function urldecode_fix_win1252_chars(): array
11647
    {
11648
        return [
11649 2
            '%20' => ' ',
11650
            '%21' => '!',
11651
            '%22' => '"',
11652
            '%23' => '#',
11653
            '%24' => '$',
11654
            '%25' => '%',
11655
            '%26' => '&',
11656
            '%27' => "'",
11657
            '%28' => '(',
11658
            '%29' => ')',
11659
            '%2A' => '*',
11660
            '%2B' => '+',
11661
            '%2C' => ',',
11662
            '%2D' => '-',
11663
            '%2E' => '.',
11664
            '%2F' => '/',
11665
            '%30' => '0',
11666
            '%31' => '1',
11667
            '%32' => '2',
11668
            '%33' => '3',
11669
            '%34' => '4',
11670
            '%35' => '5',
11671
            '%36' => '6',
11672
            '%37' => '7',
11673
            '%38' => '8',
11674
            '%39' => '9',
11675
            '%3A' => ':',
11676
            '%3B' => ';',
11677
            '%3C' => '<',
11678
            '%3D' => '=',
11679
            '%3E' => '>',
11680
            '%3F' => '?',
11681
            '%40' => '@',
11682
            '%41' => 'A',
11683
            '%42' => 'B',
11684
            '%43' => 'C',
11685
            '%44' => 'D',
11686
            '%45' => 'E',
11687
            '%46' => 'F',
11688
            '%47' => 'G',
11689
            '%48' => 'H',
11690
            '%49' => 'I',
11691
            '%4A' => 'J',
11692
            '%4B' => 'K',
11693
            '%4C' => 'L',
11694
            '%4D' => 'M',
11695
            '%4E' => 'N',
11696
            '%4F' => 'O',
11697
            '%50' => 'P',
11698
            '%51' => 'Q',
11699
            '%52' => 'R',
11700
            '%53' => 'S',
11701
            '%54' => 'T',
11702
            '%55' => 'U',
11703
            '%56' => 'V',
11704
            '%57' => 'W',
11705
            '%58' => 'X',
11706
            '%59' => 'Y',
11707
            '%5A' => 'Z',
11708
            '%5B' => '[',
11709
            '%5C' => '\\',
11710
            '%5D' => ']',
11711
            '%5E' => '^',
11712
            '%5F' => '_',
11713
            '%60' => '`',
11714
            '%61' => 'a',
11715
            '%62' => 'b',
11716
            '%63' => 'c',
11717
            '%64' => 'd',
11718
            '%65' => 'e',
11719
            '%66' => 'f',
11720
            '%67' => 'g',
11721
            '%68' => 'h',
11722
            '%69' => 'i',
11723
            '%6A' => 'j',
11724
            '%6B' => 'k',
11725
            '%6C' => 'l',
11726
            '%6D' => 'm',
11727
            '%6E' => 'n',
11728
            '%6F' => 'o',
11729
            '%70' => 'p',
11730
            '%71' => 'q',
11731
            '%72' => 'r',
11732
            '%73' => 's',
11733
            '%74' => 't',
11734
            '%75' => 'u',
11735
            '%76' => 'v',
11736
            '%77' => 'w',
11737
            '%78' => 'x',
11738
            '%79' => 'y',
11739
            '%7A' => 'z',
11740
            '%7B' => '{',
11741
            '%7C' => '|',
11742
            '%7D' => '}',
11743
            '%7E' => '~',
11744
            '%7F' => '',
11745
            '%80' => '`',
11746
            '%81' => '',
11747
            '%82' => '‚',
11748
            '%83' => 'ƒ',
11749
            '%84' => '„',
11750
            '%85' => '…',
11751
            '%86' => '†',
11752
            '%87' => '‡',
11753
            '%88' => 'ˆ',
11754
            '%89' => '‰',
11755
            '%8A' => 'Š',
11756
            '%8B' => '‹',
11757
            '%8C' => 'Œ',
11758
            '%8D' => '',
11759
            '%8E' => 'Ž',
11760
            '%8F' => '',
11761
            '%90' => '',
11762
            '%91' => '‘',
11763
            '%92' => '’',
11764
            '%93' => '“',
11765
            '%94' => '”',
11766
            '%95' => '•',
11767
            '%96' => '–',
11768
            '%97' => '—',
11769
            '%98' => '˜',
11770
            '%99' => '™',
11771
            '%9A' => 'š',
11772
            '%9B' => '›',
11773
            '%9C' => 'œ',
11774
            '%9D' => '',
11775
            '%9E' => 'ž',
11776
            '%9F' => 'Ÿ',
11777
            '%A0' => '',
11778
            '%A1' => '¡',
11779
            '%A2' => '¢',
11780
            '%A3' => '£',
11781
            '%A4' => '¤',
11782
            '%A5' => '¥',
11783
            '%A6' => '¦',
11784
            '%A7' => '§',
11785
            '%A8' => '¨',
11786
            '%A9' => '©',
11787
            '%AA' => 'ª',
11788
            '%AB' => '«',
11789
            '%AC' => '¬',
11790
            '%AD' => '',
11791
            '%AE' => '®',
11792
            '%AF' => '¯',
11793
            '%B0' => '°',
11794
            '%B1' => '±',
11795
            '%B2' => '²',
11796
            '%B3' => '³',
11797
            '%B4' => '´',
11798
            '%B5' => 'µ',
11799
            '%B6' => '¶',
11800
            '%B7' => '·',
11801
            '%B8' => '¸',
11802
            '%B9' => '¹',
11803
            '%BA' => 'º',
11804
            '%BB' => '»',
11805
            '%BC' => '¼',
11806
            '%BD' => '½',
11807
            '%BE' => '¾',
11808
            '%BF' => '¿',
11809
            '%C0' => 'À',
11810
            '%C1' => 'Á',
11811
            '%C2' => 'Â',
11812
            '%C3' => 'Ã',
11813
            '%C4' => 'Ä',
11814
            '%C5' => 'Å',
11815
            '%C6' => 'Æ',
11816
            '%C7' => 'Ç',
11817
            '%C8' => 'È',
11818
            '%C9' => 'É',
11819
            '%CA' => 'Ê',
11820
            '%CB' => 'Ë',
11821
            '%CC' => 'Ì',
11822
            '%CD' => 'Í',
11823
            '%CE' => 'Î',
11824
            '%CF' => 'Ï',
11825
            '%D0' => 'Ð',
11826
            '%D1' => 'Ñ',
11827
            '%D2' => 'Ò',
11828
            '%D3' => 'Ó',
11829
            '%D4' => 'Ô',
11830
            '%D5' => 'Õ',
11831
            '%D6' => 'Ö',
11832
            '%D7' => '×',
11833
            '%D8' => 'Ø',
11834
            '%D9' => 'Ù',
11835
            '%DA' => 'Ú',
11836
            '%DB' => 'Û',
11837
            '%DC' => 'Ü',
11838
            '%DD' => 'Ý',
11839
            '%DE' => 'Þ',
11840
            '%DF' => 'ß',
11841
            '%E0' => 'à',
11842
            '%E1' => 'á',
11843
            '%E2' => 'â',
11844
            '%E3' => 'ã',
11845
            '%E4' => 'ä',
11846
            '%E5' => 'å',
11847
            '%E6' => 'æ',
11848
            '%E7' => 'ç',
11849
            '%E8' => 'è',
11850
            '%E9' => 'é',
11851
            '%EA' => 'ê',
11852
            '%EB' => 'ë',
11853
            '%EC' => 'ì',
11854
            '%ED' => 'í',
11855
            '%EE' => 'î',
11856
            '%EF' => 'ï',
11857
            '%F0' => 'ð',
11858
            '%F1' => 'ñ',
11859
            '%F2' => 'ò',
11860
            '%F3' => 'ó',
11861
            '%F4' => 'ô',
11862
            '%F5' => 'õ',
11863
            '%F6' => 'ö',
11864
            '%F7' => '÷',
11865
            '%F8' => 'ø',
11866
            '%F9' => 'ù',
11867
            '%FA' => 'ú',
11868
            '%FB' => 'û',
11869
            '%FC' => 'ü',
11870
            '%FD' => 'ý',
11871
            '%FE' => 'þ',
11872
            '%FF' => 'ÿ',
11873
        ];
11874
    }
11875
11876
    /**
11877
     * Decodes an UTF-8 string to ISO-8859-1.
11878
     *
11879
     * @param string $str           <p>The input string.</p>
11880
     * @param bool   $keepUtf8Chars
11881
     *
11882
     * @return string
11883
     */
11884
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11885
    {
11886 14
        if ($str === '') {
11887 5
            return '';
11888
        }
11889
11890 14
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11891 14
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11892
11893 14
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11894 1
            if (self::$WIN1252_TO_UTF8 === null) {
11895
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11896
            }
11897
11898 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11899 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11900
        }
11901
11902 14
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11903
11904
        // save for later comparision
11905 14
        $str_backup = $str;
11906 14
        $len = \strlen($str);
11907
11908 14
        if (self::$ORD === null) {
11909
            self::$ORD = self::getData('ord');
11910
        }
11911
11912 14
        if (self::$CHR === null) {
11913
            self::$CHR = self::getData('chr');
11914
        }
11915
11916 14
        $noCharFound = '?';
11917
        /** @noinspection ForeachInvariantsInspection */
11918 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11919 14
            switch ($str[$i] & "\xF0") {
11920 14
                case "\xC0":
11921 13
                case "\xD0":
11922 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11923 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11924
11925 13
                    break;
11926
11927
                /** @noinspection PhpMissingBreakStatementInspection */
11928 13
                case "\xF0":
11929
                    ++$i;
11930
11931
                // no break
11932
11933 13
                case "\xE0":
11934 11
                    $str[$j] = $noCharFound;
11935 11
                    $i += 2;
11936
11937 11
                    break;
11938
11939
                default:
11940 12
                    $str[$j] = $str[$i];
11941
            }
11942
        }
11943
11944 14
        $return = \substr($str, 0, $j);
11945 14
        if ($return === false) {
11946
            $return = '';
11947
        }
11948
11949
        if (
11950 14
            $keepUtf8Chars === true
11951
            &&
11952 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11953
        ) {
11954 2
            return $str_backup;
11955
        }
11956
11957 14
        return $return;
11958
    }
11959
11960
    /**
11961
     * Encodes an ISO-8859-1 string to UTF-8.
11962
     *
11963
     * @param string $str <p>The input string.</p>
11964
     *
11965
     * @return string
11966
     */
11967
    public static function utf8_encode(string $str): string
11968
    {
11969 14
        if ($str === '') {
11970 13
            return '';
11971
        }
11972
11973 14
        $str = \utf8_encode($str);
11974
11975
        // the polyfill maybe return false
11976
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11977
        /** @psalm-suppress TypeDoesNotContainType */
11978 14
        if ($str === false) {
11979
            return '';
11980
        }
11981
11982 14
        if (\strpos($str, "\xC2") === false) {
11983 6
            return $str;
11984
        }
11985
11986 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11987 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11988
11989 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11990 1
            if (self::$WIN1252_TO_UTF8 === null) {
11991
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11992
            }
11993
11994 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11995 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11996
        }
11997
11998 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11999
    }
12000
12001
    /**
12002
     * fix -> utf8-win1252 chars
12003
     *
12004
     * @param string $str <p>The input string.</p>
12005
     *
12006
     * @return string
12007
     *
12008
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12009
     */
12010
    public static function utf8_fix_win1252_chars(string $str): string
12011
    {
12012 2
        return self::fix_simple_utf8($str);
12013
    }
12014
12015
    /**
12016
     * Returns an array with all utf8 whitespace characters.
12017
     *
12018
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12019
     *
12020
     * @author: Derek E. [email protected]
12021
     *
12022
     * @return string[]
12023
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12024
     *                  as defined in above URL
12025
     */
12026
    public static function whitespace_table(): array
12027
    {
12028 2
        return self::$WHITESPACE_TABLE;
12029
    }
12030
12031
    /**
12032
     * Limit the number of words in a string.
12033
     *
12034
     * @param string $str      <p>The input string.</p>
12035
     * @param int    $limit    <p>The limit of words as integer.</p>
12036
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12037
     *
12038
     * @return string
12039
     */
12040
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12041
    {
12042 2
        if ($str === '' || $limit < 1) {
12043 2
            return '';
12044
        }
12045
12046 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12047
12048
        if (
12049 2
            !isset($matches[0])
12050
            ||
12051 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12052
        ) {
12053 2
            return $str;
12054
        }
12055
12056 2
        return \rtrim($matches[0]) . $strAddOn;
12057
    }
12058
12059
    /**
12060
     * Wraps a string to a given number of characters
12061
     *
12062
     * @see  http://php.net/manual/en/function.wordwrap.php
12063
     *
12064
     * @param string $str   <p>The input string.</p>
12065
     * @param int    $width [optional] <p>The column width.</p>
12066
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12067
     * @param bool   $cut   [optional] <p>
12068
     *                      If the cut is set to true, the string is
12069
     *                      always wrapped at or before the specified width. So if you have
12070
     *                      a word that is larger than the given width, it is broken apart.
12071
     *                      </p>
12072
     *
12073
     * @return string
12074
     *                <p>The given string wrapped at the specified column.</p>
12075
     */
12076
    public static function wordwrap(
12077
        string $str,
12078
        int $width = 75,
12079
        string $break = "\n",
12080
        bool $cut = false
12081
    ): string {
12082 10
        if ($str === '' || $break === '') {
12083 3
            return '';
12084
        }
12085
12086 8
        $w = '';
12087 8
        $strSplit = \explode($break, $str);
12088 8
        if ($strSplit === false) {
12089
            return '';
12090
        }
12091 8
        $chars = [];
12092
12093 8
        foreach ($strSplit as $i => $iValue) {
12094 8
            if ($i) {
12095 1
                $chars[] = $break;
12096 1
                $w .= '#';
12097
            }
12098
12099 8
            $c = $iValue;
12100 8
            unset($strSplit[$i]);
12101
12102 8
            foreach (self::str_split($c) as $c) {
12103 8
                $chars[] = $c;
12104 8
                $w .= $c === ' ' ? ' ' : '?';
12105
            }
12106
        }
12107
12108 8
        $strReturn = '';
12109 8
        $j = 0;
12110 8
        $b = $i = -1;
12111 8
        $w = \wordwrap($w, $width, '#', $cut);
12112
12113 8
        while (false !== $b = \mb_strpos($w, '#', $b + 1)) {
12114 6
            for (++$i; $i < $b; ++$i) {
12115 6
                $strReturn .= $chars[$j];
12116 6
                unset($chars[$j++]);
12117
            }
12118
12119 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
12120 3
                unset($chars[$j++]);
12121
            }
12122
12123 6
            $strReturn .= $break;
12124
        }
12125
12126 8
        return $strReturn . \implode('', $chars);
12127
    }
12128
12129
    /**
12130
     * Line-Wrap the string after $limit, but also after the next word.
12131
     *
12132
     * @param string $str
12133
     * @param int    $limit
12134
     *
12135
     * @return string
12136
     */
12137
    public static function wordwrap_per_line(string $str, int $limit): string
12138
    {
12139 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12140
12141 1
        $string = '';
12142 1
        foreach ($strings as &$value) {
12143 1
            if ($value === false) {
12144
                continue;
12145
            }
12146
12147 1
            $string .= \wordwrap($value, $limit);
12148 1
            $string .= "\n";
12149
        }
12150
12151 1
        return $string;
12152
    }
12153
12154
    /**
12155
     * Returns an array of Unicode White Space characters.
12156
     *
12157
     * @return string[] an array with numeric code point as key and White Space Character as value
12158
     */
12159
    public static function ws(): array
12160
    {
12161 2
        return self::$WHITESPACE;
12162
    }
12163
12164
    /**
12165
     * @return void
12166
     */
12167
    private static function initEmojiData()
12168
    {
12169 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12170 1
            if (self::$EMOJI === null) {
12171 1
                self::$EMOJI = self::getData('emoji');
12172
            }
12173
12174 1
            \uksort(
12175 1
                self::$EMOJI,
12176
                static function ($a, $b) {
12177 1
                    return \strlen($b) <=> \strlen($a);
12178 1
                }
12179
            );
12180
12181 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12182 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12183
12184 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12185 1
                $tmpKey = \crc32($key);
12186 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12187
            }
12188
        }
12189 9
    }
12190
12191
    /**
12192
     * @param string $str
12193
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12194
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12195
     *
12196
     * @return string
12197
     */
12198
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12199
    {
12200 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12201 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12202
12203 33
        if ($useLower === true) {
12204 2
            $str = \str_replace(
12205 2
                $upper,
12206 2
                $lower,
12207 2
                $str
12208
            );
12209
        } else {
12210 31
            $str = \str_replace(
12211 31
                $lower,
12212 31
                $upper,
12213 31
                $str
12214
            );
12215
        }
12216
12217 33
        if ($fullCaseFold) {
12218 31
            static $FULL_CASE_FOLD = null;
12219 31
            if ($FULL_CASE_FOLD === null) {
12220 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12221
            }
12222
12223 31
            if ($useLower === true) {
12224 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12225
            } else {
12226 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12227
            }
12228
        }
12229
12230 33
        return $str;
12231
    }
12232
12233
    /**
12234
     * get data from "/data/*.php"
12235
     *
12236
     * @param string $file
12237
     *
12238
     * @return mixed
12239
     */
12240
    private static function getData(string $file)
12241
    {
12242
        /** @noinspection PhpIncludeInspection */
12243
        /** @noinspection UsingInclusionReturnValueInspection */
12244
        /** @psalm-suppress UnresolvableInclude */
12245 5
        return include __DIR__ . '/data/' . $file . '.php';
12246
    }
12247
12248
    /**
12249
     * get data from "/data/*.php"
12250
     *
12251
     * @param string $file
12252
     *
12253
     * @return false|mixed will return false on error
12254
     */
12255
    private static function getDataIfExists(string $file)
12256
    {
12257 9
        $file = __DIR__ . '/data/' . $file . '.php';
12258 9
        if (\file_exists($file)) {
12259
            /** @noinspection PhpIncludeInspection */
12260
            /** @noinspection UsingInclusionReturnValueInspection */
12261 8
            return include $file;
12262
        }
12263
12264 2
        return false;
12265
    }
12266
12267
    /**
12268
     * Checks whether mbstring "overloaded" is active on the server.
12269
     *
12270
     * @return bool
12271
     */
12272
    private static function mbstring_overloaded(): bool
12273
    {
12274
        /**
12275
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12276
         */
12277
12278
        /** @noinspection PhpComposerExtensionStubsInspection */
12279
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12280
        return \defined('MB_OVERLOAD_STRING')
12281
               &&
12282
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12283
    }
12284
12285
    /**
12286
     * @param array $strings
12287
     * @param bool  $removeEmptyValues
12288
     * @param int   $removeShortValues
12289
     *
12290
     * @return array
12291
     */
12292
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12293
    {
12294
        // init
12295 2
        $return = [];
12296
12297 2
        foreach ($strings as &$str) {
12298
            if (
12299 2
                $removeShortValues !== null
12300
                &&
12301 2
                \mb_strlen($str) <= $removeShortValues
12302
            ) {
12303 2
                continue;
12304
            }
12305
12306
            if (
12307 2
                $removeEmptyValues === true
12308
                &&
12309 2
                \trim($str) === ''
12310
            ) {
12311 2
                continue;
12312
            }
12313
12314 2
            $return[] = $str;
12315
        }
12316
12317 2
        return $return;
12318
    }
12319
12320
    /**
12321
     * rxClass
12322
     *
12323
     * @param string $s
12324
     * @param string $class
12325
     *
12326
     * @return string
12327
     */
12328
    private static function rxClass(string $s, string $class = ''): string
12329
    {
12330 33
        static $RX_CLASSS_CACHE = [];
12331
12332 33
        $cacheKey = $s . $class;
12333
12334 33
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
12335 21
            return $RX_CLASSS_CACHE[$cacheKey];
12336
        }
12337
12338 16
        $class = [$class];
12339
12340
        /** @noinspection SuspiciousLoopInspection */
12341
        /** @noinspection AlterInForeachInspection */
12342 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12343 15
            if ($s === '-') {
12344
                $class[0] = '-' . $class[0];
12345 15
            } elseif (!isset($s[2])) {
12346 15
                $class[0] .= \preg_quote($s, '/');
12347 1
            } elseif (self::strlen($s) === 1) {
12348 1
                $class[0] .= $s;
12349
            } else {
12350 15
                $class[] = $s;
12351
            }
12352
        }
12353
12354 16
        if ($class[0]) {
12355 16
            $class[0] = '[' . $class[0] . ']';
12356
        }
12357
12358 16
        if (\count($class) === 1) {
12359 16
            $return = $class[0];
12360
        } else {
12361
            $return = '(?:' . \implode('|', $class) . ')';
12362
        }
12363
12364 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
12365
12366 16
        return $return;
12367
    }
12368
12369
    /**
12370
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12371
     *
12372
     * @param string $names
12373
     * @param string $delimiter
12374
     * @param string $encoding
12375
     *
12376
     * @return string
12377
     */
12378
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12379
    {
12380
        // init
12381 1
        $namesArray = \explode($delimiter, $names);
12382
12383 1
        if ($namesArray === false) {
12384
            return '';
12385
        }
12386
12387
        $specialCases = [
12388 1
            'names' => [
12389
                'ab',
12390
                'af',
12391
                'al',
12392
                'and',
12393
                'ap',
12394
                'bint',
12395
                'binte',
12396
                'da',
12397
                'de',
12398
                'del',
12399
                'den',
12400
                'der',
12401
                'di',
12402
                'dit',
12403
                'ibn',
12404
                'la',
12405
                'mac',
12406
                'nic',
12407
                'of',
12408
                'ter',
12409
                'the',
12410
                'und',
12411
                'van',
12412
                'von',
12413
                'y',
12414
                'zu',
12415
            ],
12416
            'prefixes' => [
12417
                'al-',
12418
                "d'",
12419
                'ff',
12420
                "l'",
12421
                'mac',
12422
                'mc',
12423
                'nic',
12424
            ],
12425
        ];
12426
12427 1
        foreach ($namesArray as &$name) {
12428 1
            if (\in_array($name, $specialCases['names'], true)) {
12429 1
                continue;
12430
            }
12431
12432 1
            $continue = false;
12433
12434 1
            if ($delimiter === '-') {
12435 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12436 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12437 1
                        $continue = true;
12438
                    }
12439
                }
12440 1
                unset($beginning);
12441
            }
12442
12443 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12444 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12445 1
                    $continue = true;
12446
                }
12447
            }
12448 1
            unset($beginning);
12449
12450 1
            if ($continue === true) {
12451 1
                continue;
12452
            }
12453
12454 1
            $name = self::ucfirst($name);
12455
        }
12456
12457 1
        return \implode($delimiter, $namesArray);
12458
    }
12459
12460
    /**
12461
     * Generic case sensitive transformation for collation matching.
12462
     *
12463
     * @param string $str <p>The input string</p>
12464
     *
12465
     * @return string|null
12466
     */
12467
    private static function strtonatfold(string $str)
12468
    {
12469 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
12470
    }
12471
12472
    /**
12473
     * @param int|string $input
12474
     *
12475
     * @return string
12476
     */
12477
    private static function to_utf8_convert_helper($input): string
12478
    {
12479
        // init
12480 29
        $buf = '';
12481
12482 29
        if (self::$ORD === null) {
12483 1
            self::$ORD = self::getData('ord');
12484
        }
12485
12486 29
        if (self::$CHR === null) {
12487 1
            self::$CHR = self::getData('chr');
12488
        }
12489
12490 29
        if (self::$WIN1252_TO_UTF8 === null) {
12491 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12492
        }
12493
12494 29
        $ordC1 = self::$ORD[$input];
12495 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12496 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12497
        } else {
12498 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12499 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12500 1
            $buf .= $cc1 . $cc2;
12501
        }
12502
12503 29
        return $buf;
12504
    }
12505
}
12506