Passed
Push — master ( b2052d...98cca6 )
by Lars
03:19
created

UTF8::stristr()   C

Complexity

Conditions 15
Paths 43

Size

Total Lines 68
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 63.6

Importance

Changes 0
Metric Value
cc 15
eloc 33
nc 43
nop 5
dl 0
loc 68
ccs 12
cts 30
cp 0.4
crap 63.6
rs 5.9166
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $EMOJI;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI_VALUES_CACHE;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_KEYS_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $CHR;
234
235
    /**
236
     * __construct()
237
     */
238 32
    public function __construct()
239
    {
240 32
    }
241
242
    /**
243
     * Return the character at the specified position: $str[1] like functionality.
244
     *
245
     * @param string $str      <p>A UTF-8 string.</p>
246
     * @param int    $pos      <p>The position of character to return.</p>
247
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
248
     *
249
     * @return string single multi-byte character
250
     */
251 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
252
    {
253 3
        if ($str === '' || $pos < 0) {
254 2
            return '';
255
        }
256
257 3
        if ($encoding === 'UTF-8') {
258 3
            return (string) \mb_substr($str, $pos, 1);
259
        }
260
261
        return (string) self::substr($str, $pos, 1, $encoding);
262
    }
263
264
    /**
265
     * Prepends UTF-8 BOM character to the string and returns the whole string.
266
     *
267
     * INFO: If BOM already existed there, the Input string is returned.
268
     *
269
     * @param string $str <p>The input string.</p>
270
     *
271
     * @return string the output string that contains BOM
272
     */
273 2
    public static function add_bom_to_string(string $str): string
274
    {
275 2
        if (self::string_has_bom($str) === false) {
276 2
            $str = self::bom() . $str;
277
        }
278
279 2
        return $str;
280
    }
281
282
    /**
283
     * Changes all keys in an array.
284
     *
285
     * @param array  $array    <p>The array to work on</p>
286
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
287
     *                         or <strong>CASE_LOWER</strong> (default)</p>
288
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
289
     *
290
     * @return string[] an array with its keys lower or uppercased
291
     */
292 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
293
    {
294
        if (
295 2
            $case !== \CASE_LOWER
296
            &&
297 2
            $case !== \CASE_UPPER
298
        ) {
299
            $case = \CASE_LOWER;
300
        }
301
302 2
        $return = [];
303 2
        foreach ($array as $key => &$value) {
304 2
            $key = $case === \CASE_LOWER
305 2
                ? self::strtolower((string) $key, $encoding)
306 2
                : self::strtoupper((string) $key, $encoding);
307
308 2
            $return[$key] = $value;
309
        }
310
311 2
        return $return;
312
    }
313
314
    /**
315
     * Returns the substring between $start and $end, if found, or an empty
316
     * string. An optional offset may be supplied from which to begin the
317
     * search for the start string.
318
     *
319
     * @param string $str
320
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
321
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
322
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
324
     *
325
     * @return string
326
     */
327 16
    public static function between(
328
        string $str,
329
        string $start,
330
        string $end,
331
        int $offset = 0,
332
        string $encoding = 'UTF-8'
333
    ): string {
334 16
        if ($encoding === 'UTF-8') {
335 8
            $posStart = \mb_strpos($str, $start, $offset);
336 8
            if ($posStart === false) {
337 1
                return '';
338
            }
339
340 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
341 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
342
            if (
343 7
                $posEnd === false
344
                ||
345 7
                $posEnd === $substrIndex
346
            ) {
347 2
                return '';
348
            }
349
350 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
351
        }
352
353 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
354
355 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
356 8
        if ($posStart === false) {
357 1
            return '';
358
        }
359
360 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
361 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
362
        if (
363 7
            $posEnd === false
364
            ||
365 7
            $posEnd === $substrIndex
366
        ) {
367 2
            return '';
368
        }
369
370 5
        return (string) self::substr(
371 5
            $str,
372 5
            $substrIndex,
373 5
            $posEnd - $substrIndex,
374 5
            $encoding
375
        );
376
    }
377
378
    /**
379
     * Convert binary into an string.
380
     *
381
     * @param mixed $bin 1|0
382
     *
383
     * @return string
384
     */
385 2
    public static function binary_to_str($bin): string
386
    {
387 2
        if (!isset($bin[0])) {
388
            return '';
389
        }
390
391 2
        $convert = \base_convert($bin, 2, 16);
392 2
        if ($convert === '0') {
393 1
            return '';
394
        }
395
396 2
        return \pack('H*', $convert);
397
    }
398
399
    /**
400
     * Returns the UTF-8 Byte Order Mark Character.
401
     *
402
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
     *
404
     * @return string UTF-8 Byte Order Mark
405
     */
406 4
    public static function bom(): string
407
    {
408 4
        return "\xef\xbb\xbf";
409
    }
410
411
    /**
412
     * @alias of UTF8::chr_map()
413
     *
414
     * @see   UTF8::chr_map()
415
     *
416
     * @param array|string $callback
417
     * @param string       $str
418
     *
419
     * @return string[]
420
     */
421 2
    public static function callback($callback, string $str): array
422
    {
423 2
        return self::chr_map($callback, $str);
424
    }
425
426
    /**
427
     * Returns the character at $index, with indexes starting at 0.
428
     *
429
     * @param string $str      <p>The input string.</p>
430
     * @param int    $index    <p>Position of the character.</p>
431
     * @param string $encoding [optional] <p>Default is UTF-8</p>
432
     *
433
     * @return string the character at $index
434
     */
435 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
    {
437 9
        if ($encoding === 'UTF-8') {
438 5
            return (string) \mb_substr($str, $index, 1);
439
        }
440
441 4
        return (string) self::substr($str, $index, 1, $encoding);
442
    }
443
444
    /**
445
     * Returns an array consisting of the characters in the string.
446
     *
447
     * @param string $str <p>The input string.</p>
448
     *
449
     * @return string[] an array of chars
450
     */
451 3
    public static function chars(string $str): array
452
    {
453 3
        return self::str_split($str);
454
    }
455
456
    /**
457
     * This method will auto-detect your server environment for UTF-8 support.
458
     *
459
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
460
     *
461
     * @return void
462
     */
463 5
    public static function checkForSupport()
464
    {
465 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
466
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
467
468
            // http://php.net/manual/en/book.mbstring.php
469
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
470
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
471
            if (self::$SUPPORT['mbstring'] === true) {
472
                \mb_internal_encoding('UTF-8');
473
                /** @noinspection UnusedFunctionResultInspection */
474
                /** @noinspection PhpComposerExtensionStubsInspection */
475
                \mb_regex_encoding('UTF-8');
476
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
477
            }
478
479
            // http://php.net/manual/en/book.iconv.php
480
            self::$SUPPORT['iconv'] = self::iconv_loaded();
481
482
            // http://php.net/manual/en/book.intl.php
483
            self::$SUPPORT['intl'] = self::intl_loaded();
484
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
485
486
            if (
487
                self::$SUPPORT['intl'] === true
488
                &&
489
                \function_exists('transliterator_list_ids') === true
490
            ) {
491
                /** @noinspection PhpComposerExtensionStubsInspection */
492
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
493
            }
494
495
            // http://php.net/manual/en/class.intlchar.php
496
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
497
498
            // http://php.net/manual/en/book.ctype.php
499
            self::$SUPPORT['ctype'] = self::ctype_loaded();
500
501
            // http://php.net/manual/en/class.finfo.php
502
            self::$SUPPORT['finfo'] = self::finfo_loaded();
503
504
            // http://php.net/manual/en/book.json.php
505
            self::$SUPPORT['json'] = self::json_loaded();
506
507
            // http://php.net/manual/en/book.pcre.php
508
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
509
510
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
511
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
512
                \mb_internal_encoding('UTF-8');
513
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
514
            }
515
        }
516 5
    }
517
518
    /**
519
     * Generates a UTF-8 encoded character from the given code point.
520
     *
521
     * INFO: opposite to UTF8::ord()
522
     *
523
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
524
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
525
     *
526
     * @return string|null multi-byte character, returns null on failure or empty input
527
     */
528 24
    public static function chr($code_point, string $encoding = 'UTF-8')
529
    {
530
        // init
531 24
        static $CHAR_CACHE = [];
532
533 24
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
534 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
535
        }
536
537
        if (
538 24
            $encoding !== 'UTF-8'
539
            &&
540 24
            $encoding !== 'ISO-8859-1'
541
            &&
542 24
            $encoding !== 'WINDOWS-1252'
543
            &&
544 24
            self::$SUPPORT['mbstring'] === false
545
        ) {
546
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
547
        }
548
549 24
        $cacheKey = $code_point . $encoding;
550 24
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
551 22
            return $CHAR_CACHE[$cacheKey];
552
        }
553
554 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
555
556 12
            if (self::$CHR === null) {
557
                self::$CHR = (array) self::getData('chr');
558
            }
559
560
            /**
561
             * @psalm-suppress PossiblyNullArrayAccess
562
             */
563 12
            $chr = self::$CHR[$code_point];
564
565 12
            if ($encoding !== 'UTF-8') {
566 1
                $chr = self::encode($encoding, $chr);
567
            }
568
569 12
            return $CHAR_CACHE[$cacheKey] = $chr;
570
        }
571
572
        //
573
        // fallback via "IntlChar"
574
        //
575
576 7
        if (self::$SUPPORT['intlChar'] === true) {
577
            /** @noinspection PhpComposerExtensionStubsInspection */
578 7
            $chr = \IntlChar::chr($code_point);
579
580 7
            if ($encoding !== 'UTF-8') {
581
                $chr = self::encode($encoding, $chr);
582
            }
583
584 7
            return $CHAR_CACHE[$cacheKey] = $chr;
585
        }
586
587
        //
588
        // fallback via vanilla php
589
        //
590
591
        if (self::$CHR === null) {
592
            self::$CHR = (array) self::getData('chr');
593
        }
594
595
        $code_point = (int) $code_point;
596
        if ($code_point <= 0x7F) {
597
            /**
598
             * @psalm-suppress PossiblyNullArrayAccess
599
             */
600
            $chr = self::$CHR[$code_point];
601
        } elseif ($code_point <= 0x7FF) {
602
            /**
603
             * @psalm-suppress PossiblyNullArrayAccess
604
             */
605
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
606
                   self::$CHR[($code_point & 0x3F) + 0x80];
607
        } elseif ($code_point <= 0xFFFF) {
608
            /**
609
             * @psalm-suppress PossiblyNullArrayAccess
610
             */
611
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
612
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
613
                   self::$CHR[($code_point & 0x3F) + 0x80];
614
        } else {
615
            /**
616
             * @psalm-suppress PossiblyNullArrayAccess
617
             */
618
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
619
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
620
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
621
                   self::$CHR[($code_point & 0x3F) + 0x80];
622
        }
623
624
        if ($encoding !== 'UTF-8') {
625
            $chr = self::encode($encoding, $chr);
626
        }
627
628
        return $CHAR_CACHE[$cacheKey] = $chr;
629
    }
630
631
    /**
632
     * Applies callback to all characters of a string.
633
     *
634
     * @param array|string $callback <p>The callback function.</p>
635
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
636
     *
637
     * @return string[] the outcome of callback
638
     */
639 2
    public static function chr_map($callback, string $str): array
640
    {
641 2
        return \array_map(
642 2
            $callback,
643 2
            self::str_split($str)
644
        );
645
    }
646
647
    /**
648
     * Generates an array of byte length of each character of a Unicode string.
649
     *
650
     * 1 byte => U+0000  - U+007F
651
     * 2 byte => U+0080  - U+07FF
652
     * 3 byte => U+0800  - U+FFFF
653
     * 4 byte => U+10000 - U+10FFFF
654
     *
655
     * @param string $str <p>The original unicode string.</p>
656
     *
657
     * @return int[] an array of byte lengths of each character
658
     */
659 4
    public static function chr_size_list(string $str): array
660
    {
661 4
        if ($str === '') {
662 4
            return [];
663
        }
664
665 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
666
            return \array_map(
667
                static function (string $data): int {
668
                    // "mb_" is available if overload is used, so use it ...
669
                    return \mb_strlen($data, 'CP850'); // 8-BIT
670
                },
671
                self::str_split($str)
672
            );
673
        }
674
675 4
        return \array_map('\strlen', self::str_split($str));
676
    }
677
678
    /**
679
     * Get a decimal code representation of a specific character.
680
     *
681
     * @param string $char <p>The input character.</p>
682
     *
683
     * @return int
684
     */
685 4
    public static function chr_to_decimal(string $char): int
686
    {
687 4
        $code = self::ord($char[0]);
688 4
        $bytes = 1;
689
690 4
        if (!($code & 0x80)) {
691
            // 0xxxxxxx
692 4
            return $code;
693
        }
694
695 4
        if (($code & 0xe0) === 0xc0) {
696
            // 110xxxxx
697 4
            $bytes = 2;
698 4
            $code &= ~0xc0;
699 4
        } elseif (($code & 0xf0) === 0xe0) {
700
            // 1110xxxx
701 4
            $bytes = 3;
702 4
            $code &= ~0xe0;
703 2
        } elseif (($code & 0xf8) === 0xf0) {
704
            // 11110xxx
705 2
            $bytes = 4;
706 2
            $code &= ~0xf0;
707
        }
708
709 4
        for ($i = 2; $i <= $bytes; ++$i) {
710
            // 10xxxxxx
711 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
712
        }
713
714 4
        return $code;
715
    }
716
717
    /**
718
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
719
     *
720
     * @param int|string $char <p>The input character</p>
721
     * @param string     $pfix [optional]
722
     *
723
     * @return string The code point encoded as U+xxxx
724
     */
725 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
726
    {
727 2
        if ($char === '') {
728 2
            return '';
729
        }
730
731 2
        if ($char === '&#0;') {
732 2
            $char = '';
733
        }
734
735 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
736
    }
737
738
    /**
739
     * alias for "UTF8::chr_to_decimal()"
740
     *
741
     * @see UTF8::chr_to_decimal()
742
     *
743
     * @param string $chr
744
     *
745
     * @return int
746
     */
747 2
    public static function chr_to_int(string $chr): int
748
    {
749 2
        return self::chr_to_decimal($chr);
750
    }
751
752
    /**
753
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
754
     *
755
     * @param string $body     <p>The original string to be split.</p>
756
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
757
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
758
     *
759
     * @return string the chunked string
760
     */
761 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
762
    {
763 4
        return \implode($end, self::str_split($body, $chunklen));
764
    }
765
766
    /**
767
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
768
     *
769
     * @param string $str                           <p>The string to be sanitized.</p>
770
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
771
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
772
     *                                              whitespace.</p>
773
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
774
     *                                              e.g.: "…"
775
     *                                              => "..."</p>
776
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
777
     *                                              combination with
778
     *                                              $normalize_whitespace</p>
779
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
780
     *                                              mark e.g.: "�"</p>
781
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
782
     *                                              characters e.g.: "\0"</p>
783
     *
784
     * @return string clean UTF-8 encoded string
785
     */
786 113
    public static function clean(
787
        string $str,
788
        bool $remove_bom = false,
789
        bool $normalize_whitespace = false,
790
        bool $normalize_msword = false,
791
        bool $keep_non_breaking_space = false,
792
        bool $replace_diamond_question_mark = false,
793
        bool $remove_invisible_characters = true
794
    ): string {
795
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
796
        // caused connection reset problem on larger strings
797
798 113
        $regx = '/
799
          (
800
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
801
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
802
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
803
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
804
            ){1,100}                      # ...one or more times
805
          )
806
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
807
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
808
        /x';
809 113
        $str = (string) \preg_replace($regx, '$1', $str);
810
811 113
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 113
        if ($remove_invisible_characters === true) {
816 113
            $str = self::remove_invisible_characters($str);
817
        }
818
819 113
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 113
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 113
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 113
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 16
    public static function decimal_to_chr($int): string
993
    {
994 16
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Encode a string with a new charset-encoding.
1026
     *
1027
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1028
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1029
     *
1030
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1031
     * @param string $str                    <p>The input string</p>
1032
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1033
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1034
     *                                       string-encoding</p>
1035
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1036
     *                                       A empty string will trigger the autodetect anyway.</p>
1037
     *
1038
     * @return string
1039
     *
1040
     * @psalm-suppress InvalidReturnStatement
1041
     */
1042 28
    public static function encode(
1043
        string $toEncoding,
1044
        string $str,
1045
        bool $autodetectFromEncoding = true,
1046
        string $fromEncoding = ''
1047
    ): string {
1048 28
        if ($str === '' || $toEncoding === '') {
1049 13
            return $str;
1050
        }
1051
1052 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1053 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1054
        }
1055
1056 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1057 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1058
        }
1059
1060
        if (
1061 28
            $toEncoding
1062
            &&
1063 28
            $fromEncoding
1064
            &&
1065 28
            $fromEncoding === $toEncoding
1066
        ) {
1067
            return $str;
1068
        }
1069
1070 28
        if ($toEncoding === 'JSON') {
1071 1
            $return = self::json_encode($str);
1072 1
            if ($return === false) {
1073
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1074
            }
1075
1076 1
            return $return;
1077
        }
1078 28
        if ($fromEncoding === 'JSON') {
1079 1
            $str = self::json_decode($str);
1080 1
            $fromEncoding = '';
1081
        }
1082
1083 28
        if ($toEncoding === 'BASE64') {
1084 2
            return \base64_encode($str);
1085
        }
1086 28
        if ($fromEncoding === 'BASE64') {
1087 2
            $str = \base64_decode($str, true);
1088 2
            $fromEncoding = '';
1089
        }
1090
1091 28
        if ($toEncoding === 'HTML-ENTITIES') {
1092 2
            return self::html_encode($str, true, 'UTF-8');
1093
        }
1094 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1095 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1096 2
            $fromEncoding = '';
1097
        }
1098
1099 28
        $fromEncodingDetected = false;
1100
        if (
1101 28
            $autodetectFromEncoding === true
1102
            ||
1103 28
            !$fromEncoding
1104
        ) {
1105 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1106
        }
1107
1108
        // DEBUG
1109
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1110
1111 28
        if ($fromEncodingDetected !== false) {
1112 24
            $fromEncoding = $fromEncodingDetected;
1113 7
        } elseif ($autodetectFromEncoding === true) {
1114
            // fallback for the "autodetect"-mode
1115 7
            return self::to_utf8($str);
1116
        }
1117
1118
        if (
1119 24
            !$fromEncoding
1120
            ||
1121 24
            $fromEncoding === $toEncoding
1122
        ) {
1123 15
            return $str;
1124
        }
1125
1126
        if (
1127 18
            $toEncoding === 'UTF-8'
1128
            &&
1129
            (
1130 16
                $fromEncoding === 'WINDOWS-1252'
1131
                ||
1132 18
                $fromEncoding === 'ISO-8859-1'
1133
            )
1134
        ) {
1135 13
            return self::to_utf8($str);
1136
        }
1137
1138
        if (
1139 11
            $toEncoding === 'ISO-8859-1'
1140
            &&
1141
            (
1142 6
                $fromEncoding === 'WINDOWS-1252'
1143
                ||
1144 11
                $fromEncoding === 'UTF-8'
1145
            )
1146
        ) {
1147 6
            return self::to_iso8859($str);
1148
        }
1149
1150
        if (
1151 9
            $toEncoding !== 'UTF-8'
1152
            &&
1153 9
            $toEncoding !== 'ISO-8859-1'
1154
            &&
1155 9
            $toEncoding !== 'WINDOWS-1252'
1156
            &&
1157 9
            self::$SUPPORT['mbstring'] === false
1158
        ) {
1159
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1160
        }
1161
1162 9
        if (self::$SUPPORT['mbstring'] === true) {
1163
            // warning: do not use the symfony polyfill here
1164 9
            $strEncoded = \mb_convert_encoding(
1165 9
                $str,
1166 9
                $toEncoding,
1167 9
                $fromEncoding
1168
            );
1169
1170 9
            if ($strEncoded) {
1171 9
                return $strEncoded;
1172
            }
1173
        }
1174
1175
        $return = \iconv($fromEncoding, $toEncoding, $str);
1176
        if ($return !== false) {
1177
            return $return;
1178
        }
1179
1180
        return $str;
1181
    }
1182
1183
    /**
1184
     * @param string $str
1185
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1186
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1187
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1188
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1189
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1190
     *
1191
     * @return false|string
1192
     *                      An encoded MIME field on success,
1193
     *                      or false if an error occurs during the encoding
1194
     */
1195
    public static function encode_mimeheader(
1196
        $str,
1197
        $fromCharset = 'UTF-8',
1198
        $toCharset = 'UTF-8',
1199
        $transferEncoding = 'Q',
1200
        $linefeed = "\r\n",
1201
        $indent = 76
1202
    ) {
1203
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1204
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1205
        }
1206
1207
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1208
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1209
        }
1210
1211
        return \iconv_mime_encode(
1212
            '',
1213
            $str,
1214
            [
1215
                'scheme'           => $transferEncoding,
1216
                'line-length'      => $indent,
1217
                'input-charset'    => $fromCharset,
1218
                'output-charset'   => $toCharset,
1219
                'line-break-chars' => $linefeed,
1220
            ]
1221
        );
1222
    }
1223
1224
    /**
1225
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1226
     *
1227
     * @param string   $str                    <p>The input string.</p>
1228
     * @param string   $search                 <p>The searched string.</p>
1229
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1230
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1231
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1232
     *
1233
     * @return string
1234
     */
1235 1
    public static function extract_text(
1236
        string $str,
1237
        string $search = '',
1238
        int $length = null,
1239
        string $replacerForSkippedText = '…',
1240
        string $encoding = 'UTF-8'
1241
    ): string {
1242 1
        if ($str === '') {
1243 1
            return '';
1244
        }
1245
1246 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1247
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1248
        }
1249
1250 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1251
1252 1
        if ($length === null) {
1253 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1254
        }
1255
1256 1
        if ($search === '') {
1257 1
            if ($encoding === 'UTF-8') {
1258 1
                if ($length > 0) {
1259 1
                    $stringLength = (int) \mb_strlen($str);
1260 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1261
                } else {
1262 1
                    $end = 0;
1263
                }
1264
1265 1
                $pos = (int) \min(
1266 1
                    \mb_strpos($str, ' ', $end),
1267 1
                    \mb_strpos($str, '.', $end)
1268
                );
1269
            } else {
1270
                if ($length > 0) {
1271
                    $stringLength = (int) self::strlen($str, $encoding);
1272
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1273
                } else {
1274
                    $end = 0;
1275
                }
1276
1277
                $pos = (int) \min(
1278
                    self::strpos($str, ' ', $end, $encoding),
1279
                    self::strpos($str, '.', $end, $encoding)
1280
                );
1281
            }
1282
1283 1
            if ($pos) {
1284 1
                if ($encoding === 'UTF-8') {
1285 1
                    $strSub = \mb_substr($str, 0, $pos);
1286
                } else {
1287
                    $strSub = self::substr($str, 0, $pos, $encoding);
1288
                }
1289
1290 1
                if ($strSub === false) {
1291
                    return '';
1292
                }
1293
1294 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1295
            }
1296
1297
            return $str;
1298
        }
1299
1300 1
        if ($encoding === 'UTF-8') {
1301 1
            $wordPos = (int) \mb_stripos($str, $search);
1302 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1303
        } else {
1304
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1305
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1306
        }
1307
1308 1
        $pos_start = 0;
1309 1
        if ($halfSide > 0) {
1310 1
            if ($encoding === 'UTF-8') {
1311 1
                $halfText = \mb_substr($str, 0, $halfSide);
1312
            } else {
1313
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1314
            }
1315 1
            if ($halfText !== false) {
1316 1
                if ($encoding === 'UTF-8') {
1317 1
                    $pos_start = (int) \max(
1318 1
                        \mb_strrpos($halfText, ' '),
1319 1
                        \mb_strrpos($halfText, '.')
1320
                    );
1321
                } else {
1322
                    $pos_start = (int) \max(
1323
                        self::strrpos($halfText, ' ', 0, $encoding),
1324
                        self::strrpos($halfText, '.', 0, $encoding)
1325
                    );
1326
                }
1327
            }
1328
        }
1329
1330 1
        if ($wordPos && $halfSide > 0) {
1331 1
            $offset = $pos_start + $length - 1;
1332 1
            $realLength = (int) self::strlen($str, $encoding);
1333
1334 1
            if ($offset > $realLength) {
1335
                $offset = $realLength;
1336
            }
1337
1338 1
            if ($encoding === 'UTF-8') {
1339 1
                $pos_end = (int) \min(
1340 1
                    \mb_strpos($str, ' ', $offset),
1341 1
                    \mb_strpos($str, '.', $offset)
1342 1
                ) - $pos_start;
1343
            } else {
1344
                $pos_end = (int) \min(
1345
                    self::strpos($str, ' ', $offset, $encoding),
1346
                    self::strpos($str, '.', $offset, $encoding)
1347
                ) - $pos_start;
1348
            }
1349
1350 1
            if (!$pos_end || $pos_end <= 0) {
1351 1
                if ($encoding === 'UTF-8') {
1352 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1353
                } else {
1354
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1355
                }
1356 1
                if ($strSub !== false) {
1357 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1358
                } else {
1359 1
                    $extract = '';
1360
                }
1361
            } else {
1362 1
                if ($encoding === 'UTF-8') {
1363 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1364
                } else {
1365
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1366
                }
1367 1
                if ($strSub !== false) {
1368 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1369
                } else {
1370 1
                    $extract = '';
1371
                }
1372
            }
1373
        } else {
1374 1
            $offset = $length - 1;
1375 1
            $trueLength = (int) self::strlen($str, $encoding);
1376
1377 1
            if ($offset > $trueLength) {
1378
                $offset = $trueLength;
1379
            }
1380
1381 1
            if ($encoding === 'UTF-8') {
1382 1
                $pos_end = (int) \min(
1383 1
                    \mb_strpos($str, ' ', $offset),
1384 1
                    \mb_strpos($str, '.', $offset)
1385
                );
1386
            } else {
1387
                $pos_end = (int) \min(
1388
                    self::strpos($str, ' ', $offset, $encoding),
1389
                    self::strpos($str, '.', $offset, $encoding)
1390
                );
1391
            }
1392
1393 1
            if ($pos_end) {
1394 1
                if ($encoding === 'UTF-8') {
1395 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1396
                } else {
1397
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1398
                }
1399 1
                if ($strSub !== false) {
1400 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1401
                } else {
1402 1
                    $extract = '';
1403
                }
1404
            } else {
1405 1
                $extract = $str;
1406
            }
1407
        }
1408
1409 1
        return $extract;
1410
    }
1411
1412
    /**
1413
     * Reads entire file into a string.
1414
     *
1415
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1416
     *
1417
     * @see http://php.net/manual/en/function.file-get-contents.php
1418
     *
1419
     * @param string        $filename         <p>
1420
     *                                        Name of the file to read.
1421
     *                                        </p>
1422
     * @param bool          $use_include_path [optional] <p>
1423
     *                                        Prior to PHP 5, this parameter is called
1424
     *                                        use_include_path and is a bool.
1425
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1426
     *                                        to trigger include path
1427
     *                                        search.
1428
     *                                        </p>
1429
     * @param resource|null $context          [optional] <p>
1430
     *                                        A valid context resource created with
1431
     *                                        stream_context_create. If you don't need to use a
1432
     *                                        custom context, you can skip this parameter by &null;.
1433
     *                                        </p>
1434
     * @param int|null      $offset           [optional] <p>
1435
     *                                        The offset where the reading starts.
1436
     *                                        </p>
1437
     * @param int|null      $maxLength        [optional] <p>
1438
     *                                        Maximum length of data read. The default is to read until end
1439
     *                                        of file is reached.
1440
     *                                        </p>
1441
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1442
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1443
     *                                        some files, because they used non default utf-8 chars. Binary files
1444
     *                                        like images or pdf will not be converted.</p>
1445
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1446
     *                                        A empty string will trigger the autodetect anyway.</p>
1447
     *
1448
     * @return false|string The function returns the read data as string or <b>false</b> on failure.
1449
     */
1450 12
    public static function file_get_contents(
1451
        string $filename,
1452
        bool $use_include_path = false,
1453
        $context = null,
1454
        int $offset = null,
1455
        int $maxLength = null,
1456
        int $timeout = 10,
1457
        bool $convertToUtf8 = true,
1458
        string $fromEncoding = ''
1459
    ) {
1460
        // init
1461 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1462
1463 12
        if ($filename === false) {
1464
            return false;
1465
        }
1466
1467 12
        if ($timeout && $context === null) {
1468 9
            $context = \stream_context_create(
1469
                [
1470
                    'http' => [
1471 9
                        'timeout' => $timeout,
1472
                    ],
1473
                ]
1474
            );
1475
        }
1476
1477 12
        if ($offset === null) {
1478 12
            $offset = 0;
1479
        }
1480
1481 12
        if (\is_int($maxLength) === true) {
1482 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1483
        } else {
1484 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1485
        }
1486
1487
        // return false on error
1488 12
        if ($data === false) {
1489
            return false;
1490
        }
1491
1492 12
        if ($convertToUtf8 === true) {
1493
            if (
1494 12
                self::is_binary($data, true) === true
1495
                &&
1496 12
                self::is_utf16($data, false) === false
1497
                &&
1498 12
                self::is_utf32($data, false) === false
1499 7
            ) {
1500
                // do nothing, it's binary and not UTF16 or UTF32
1501
            } else {
1502 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1503 9
                $data = self::cleanup($data);
1504
            }
1505
        }
1506
1507 12
        return $data;
1508
    }
1509
1510
    /**
1511
     * Checks if a file starts with BOM (Byte Order Mark) character.
1512
     *
1513
     * @param string $file_path <p>Path to a valid file.</p>
1514
     *
1515
     * @throws \RuntimeException if file_get_contents() returned false
1516
     *
1517
     * @return bool
1518
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1519
     */
1520 2
    public static function file_has_bom(string $file_path): bool
1521
    {
1522 2
        $file_content = \file_get_contents($file_path);
1523 2
        if ($file_content === false) {
1524
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1525
        }
1526
1527 2
        return self::string_has_bom($file_content);
1528
    }
1529
1530
    /**
1531
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
     *
1533
     * @param mixed  $var
1534
     * @param int    $normalization_form
1535
     * @param string $leading_combining
1536
     *
1537
     * @return mixed
1538
     */
1539 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1540
    {
1541 62
        switch (\gettype($var)) {
1542 62
            case 'array':
1543 6
                foreach ($var as $k => &$v) {
1544 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1545
                }
1546 6
                unset($v);
1547
1548 6
                break;
1549 62
            case 'object':
1550 4
                foreach ($var as $k => &$v) {
1551 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1552
                }
1553 4
                unset($v);
1554
1555 4
                break;
1556 62
            case 'string':
1557
1558 62
                if (\strpos($var, "\r") !== false) {
1559
                    // Workaround https://bugs.php.net/65732
1560 3
                    $var = self::normalize_line_ending($var);
1561
                }
1562
1563 62
                if (self::is_ascii($var) === false) {
1564 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1565 27
                        $n = '-';
1566
                    } else {
1567 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1568
1569 12
                        if (isset($n[0])) {
1570 7
                            $var = $n;
1571
                        } else {
1572 8
                            $var = self::encode('UTF-8', $var, true);
1573
                        }
1574
                    }
1575
1576
                    if (
1577 32
                        $var[0] >= "\x80"
1578
                        &&
1579 32
                        isset($n[0], $leading_combining[0])
1580
                        &&
1581 32
                        \preg_match('/^\p{Mn}/u', $var)
1582
                    ) {
1583
                        // Prevent leading combining chars
1584
                        // for NFC-safe concatenations.
1585 3
                        $var = $leading_combining . $var;
1586
                    }
1587
                }
1588
1589 62
                break;
1590
        }
1591
1592 62
        return $var;
1593
    }
1594
1595
    /**
1596
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * Gets a specific external variable by name and optionally filters it
1599
     *
1600
     * @see  http://php.net/manual/en/function.filter-input.php
1601
     *
1602
     * @param int    $type          <p>
1603
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1604
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1605
     *                              <b>INPUT_ENV</b>.
1606
     *                              </p>
1607
     * @param string $variable_name <p>
1608
     *                              Name of a variable to get.
1609
     *                              </p>
1610
     * @param int    $filter        [optional] <p>
1611
     *                              The ID of the filter to apply. The
1612
     *                              manual page lists the available filters.
1613
     *                              </p>
1614
     * @param mixed  $options       [optional] <p>
1615
     *                              Associative array of options or bitwise disjunction of flags. If filter
1616
     *                              accepts options, flags can be provided in "flags" field of array.
1617
     *                              </p>
1618
     *
1619
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1620
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1621
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1622
     */
1623
    public static function filter_input(
1624
        int $type,
1625
        string $variable_name,
1626
        int $filter = \FILTER_DEFAULT,
1627
        $options = null
1628
    ) {
1629
        if (\func_num_args() < 4) {
1630
            $var = \filter_input($type, $variable_name, $filter);
1631
        } else {
1632
            $var = \filter_input($type, $variable_name, $filter, $options);
1633
        }
1634
1635
        return self::filter($var);
1636
    }
1637
1638
    /**
1639
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1640
     *
1641
     * Gets external variables and optionally filters them
1642
     *
1643
     * @see  http://php.net/manual/en/function.filter-input-array.php
1644
     *
1645
     * @param int   $type       <p>
1646
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1647
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1648
     *                          <b>INPUT_ENV</b>.
1649
     *                          </p>
1650
     * @param mixed $definition [optional] <p>
1651
     *                          An array defining the arguments. A valid key is a string
1652
     *                          containing a variable name and a valid value is either a filter type, or an array
1653
     *                          optionally specifying the filter, flags and options. If the value is an
1654
     *                          array, valid keys are filter which specifies the
1655
     *                          filter type,
1656
     *                          flags which specifies any flags that apply to the
1657
     *                          filter, and options which specifies any options that
1658
     *                          apply to the filter. See the example below for a better understanding.
1659
     *                          </p>
1660
     *                          <p>
1661
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1662
     *                          input array are filtered by this filter.
1663
     *                          </p>
1664
     * @param bool  $add_empty  [optional] <p>
1665
     *                          Add missing keys as <b>NULL</b> to the return value.
1666
     *                          </p>
1667
     *
1668
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1669
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1670
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1671
     *               is not set and <b>NULL</b> if the filter fails.
1672
     */
1673
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1674
    {
1675
        if (\func_num_args() < 2) {
1676
            $a = \filter_input_array($type);
1677
        } else {
1678
            $a = \filter_input_array($type, $definition, $add_empty);
1679
        }
1680
1681
        return self::filter($a);
1682
    }
1683
1684
    /**
1685
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1686
     *
1687
     * Filters a variable with a specified filter
1688
     *
1689
     * @see  http://php.net/manual/en/function.filter-var.php
1690
     *
1691
     * @param mixed $variable <p>
1692
     *                        Value to filter.
1693
     *                        </p>
1694
     * @param int   $filter   [optional] <p>
1695
     *                        The ID of the filter to apply. The
1696
     *                        manual page lists the available filters.
1697
     *                        </p>
1698
     * @param mixed $options  [optional] <p>
1699
     *                        Associative array of options or bitwise disjunction of flags. If filter
1700
     *                        accepts options, flags can be provided in "flags" field of array. For
1701
     *                        the "callback" filter, callable type should be passed. The
1702
     *                        callback must accept one argument, the value to be filtered, and return
1703
     *                        the value after filtering/sanitizing it.
1704
     *                        </p>
1705
     *                        <p>
1706
     *                        <code>
1707
     *                        // for filters that accept options, use this format
1708
     *                        $options = array(
1709
     *                        'options' => array(
1710
     *                        'default' => 3, // value to return if the filter fails
1711
     *                        // other options here
1712
     *                        'min_range' => 0
1713
     *                        ),
1714
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1715
     *                        );
1716
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1717
     *                        // for filter that only accept flags, you can pass them directly
1718
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1719
     *                        // for filter that only accept flags, you can also pass as an array
1720
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1721
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1722
     *                        // callback validate filter
1723
     *                        function foo($value)
1724
     *                        {
1725
     *                        // Expected format: Surname, GivenNames
1726
     *                        if (strpos($value, ", ") === false) return false;
1727
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1728
     *                        $empty = (empty($surname) || empty($givennames));
1729
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1730
     *                        if ($empty || $notstrings) {
1731
     *                        return false;
1732
     *                        } else {
1733
     *                        return $value;
1734
     *                        }
1735
     *                        }
1736
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1737
     *                        </code>
1738
     *                        </p>
1739
     *
1740
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1741
     */
1742 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1743
    {
1744 2
        if (\func_num_args() < 3) {
1745 2
            $variable = \filter_var($variable, $filter);
1746
        } else {
1747 2
            $variable = \filter_var($variable, $filter, $options);
1748
        }
1749
1750 2
        return self::filter($variable);
1751
    }
1752
1753
    /**
1754
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1755
     *
1756
     * Gets multiple variables and optionally filters them
1757
     *
1758
     * @see  http://php.net/manual/en/function.filter-var-array.php
1759
     *
1760
     * @param array $data       <p>
1761
     *                          An array with string keys containing the data to filter.
1762
     *                          </p>
1763
     * @param mixed $definition [optional] <p>
1764
     *                          An array defining the arguments. A valid key is a string
1765
     *                          containing a variable name and a valid value is either a
1766
     *                          filter type, or an
1767
     *                          array optionally specifying the filter, flags and options.
1768
     *                          If the value is an array, valid keys are filter
1769
     *                          which specifies the filter type,
1770
     *                          flags which specifies any flags that apply to the
1771
     *                          filter, and options which specifies any options that
1772
     *                          apply to the filter. See the example below for a better understanding.
1773
     *                          </p>
1774
     *                          <p>
1775
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1776
     *                          input array are filtered by this filter.
1777
     *                          </p>
1778
     * @param bool  $add_empty  [optional] <p>
1779
     *                          Add missing keys as <b>NULL</b> to the return value.
1780
     *                          </p>
1781
     *
1782
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1783
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1784
     *               set
1785
     */
1786 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1787
    {
1788 2
        if (\func_num_args() < 2) {
1789 2
            $a = \filter_var_array($data);
1790
        } else {
1791 2
            $a = \filter_var_array($data, $definition, $add_empty);
1792
        }
1793
1794 2
        return self::filter($a);
1795
    }
1796
1797
    /**
1798
     * Checks whether finfo is available on the server.
1799
     *
1800
     * @return bool
1801
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1802
     */
1803
    public static function finfo_loaded(): bool
1804
    {
1805
        return \class_exists('finfo');
1806
    }
1807
1808
    /**
1809
     * Returns the first $n characters of the string.
1810
     *
1811
     * @param string $str      <p>The input string.</p>
1812
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1813
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1814
     *
1815
     * @return string
1816
     */
1817 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1818
    {
1819 13
        if ($str === '' || $n <= 0) {
1820 5
            return '';
1821
        }
1822
1823 8
        if ($encoding === 'UTF-8') {
1824 4
            return (string) \mb_substr($str, 0, $n);
1825
        }
1826
1827 4
        return (string) self::substr($str, 0, $n, $encoding);
1828
    }
1829
1830
    /**
1831
     * Check if the number of unicode characters are not more than the specified integer.
1832
     *
1833
     * @param string $str      the original string to be checked
1834
     * @param int    $box_size the size in number of chars to be checked against string
1835
     *
1836
     * @return bool true if string is less than or equal to $box_size, false otherwise
1837
     */
1838 2
    public static function fits_inside(string $str, int $box_size): bool
1839
    {
1840 2
        return self::strlen($str) <= $box_size;
1841
    }
1842
1843
    /**
1844
     * Try to fix simple broken UTF-8 strings.
1845
     *
1846
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1847
     *
1848
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1849
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1850
     * See: http://en.wikipedia.org/wiki/Windows-1252
1851
     *
1852
     * @param string $str <p>The input string</p>
1853
     *
1854
     * @return string
1855
     */
1856 42
    public static function fix_simple_utf8(string $str): string
1857
    {
1858 42
        if ($str === '') {
1859 4
            return '';
1860
        }
1861
1862 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1863 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1864
1865 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1866 1
            if (self::$BROKEN_UTF8_FIX === null) {
1867 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1868
            }
1869
1870 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1871 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1872
        }
1873
1874 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1875
    }
1876
1877
    /**
1878
     * Fix a double (or multiple) encoded UTF8 string.
1879
     *
1880
     * @param string|string[] $str you can use a string or an array of strings
1881
     *
1882
     * @return string|string[]
1883
     *                         Will return the fixed input-"array" or
1884
     *                         the fixed input-"string"
1885
     *
1886
     * @psalm-suppress InvalidReturnType
1887
     */
1888 2
    public static function fix_utf8($str)
1889
    {
1890 2
        if (\is_array($str) === true) {
1891 2
            foreach ($str as $k => &$v) {
1892 2
                $v = self::fix_utf8($v);
1893
            }
1894 2
            unset($v);
1895
1896
            /**
1897
             * @psalm-suppress InvalidReturnStatement
1898
             */
1899 2
            return $str;
1900
        }
1901
1902 2
        $str = (string) $str;
1903 2
        $last = '';
1904 2
        while ($last !== $str) {
1905 2
            $last = $str;
1906
            /**
1907
             * @psalm-suppress PossiblyInvalidArgument
1908
             */
1909 2
            $str = self::to_utf8(
1910 2
                self::utf8_decode($str, true)
1911
            );
1912
        }
1913
1914
        /**
1915
         * @psalm-suppress InvalidReturnStatement
1916
         */
1917 2
        return $str;
1918
    }
1919
1920
    /**
1921
     * Get character of a specific character.
1922
     *
1923
     * @param string $char
1924
     *
1925
     * @return string 'RTL' or 'LTR'
1926
     */
1927 2
    public static function getCharDirection(string $char): string
1928
    {
1929 2
        if (self::$SUPPORT['intlChar'] === true) {
1930
            /** @noinspection PhpComposerExtensionStubsInspection */
1931 2
            $tmpReturn = \IntlChar::charDirection($char);
1932
1933
            // from "IntlChar"-Class
1934
            $charDirection = [
1935 2
                'RTL' => [1, 13, 14, 15, 21],
1936
                'LTR' => [0, 11, 12, 20],
1937
            ];
1938
1939 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1940
                return 'LTR';
1941
            }
1942
1943 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1944 2
                return 'RTL';
1945
            }
1946
        }
1947
1948 2
        $c = static::chr_to_decimal($char);
1949
1950 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1951 2
            return 'LTR';
1952
        }
1953
1954 2
        if ($c <= 0x85e) {
1955 2
            if ($c === 0x5be ||
1956 2
                $c === 0x5c0 ||
1957 2
                $c === 0x5c3 ||
1958 2
                $c === 0x5c6 ||
1959 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1960 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1961 2
                $c === 0x608 ||
1962 2
                $c === 0x60b ||
1963 2
                $c === 0x60d ||
1964 2
                $c === 0x61b ||
1965 2
                ($c >= 0x61e && $c <= 0x64a) ||
1966
                ($c >= 0x66d && $c <= 0x66f) ||
1967
                ($c >= 0x671 && $c <= 0x6d5) ||
1968
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1969
                ($c >= 0x6ee && $c <= 0x6ef) ||
1970
                ($c >= 0x6fa && $c <= 0x70d) ||
1971
                $c === 0x710 ||
1972
                ($c >= 0x712 && $c <= 0x72f) ||
1973
                ($c >= 0x74d && $c <= 0x7a5) ||
1974
                $c === 0x7b1 ||
1975
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1976
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1977
                $c === 0x7fa ||
1978
                ($c >= 0x800 && $c <= 0x815) ||
1979
                $c === 0x81a ||
1980
                $c === 0x824 ||
1981
                $c === 0x828 ||
1982
                ($c >= 0x830 && $c <= 0x83e) ||
1983
                ($c >= 0x840 && $c <= 0x858) ||
1984 2
                $c === 0x85e
1985
            ) {
1986 2
                return 'RTL';
1987
            }
1988 2
        } elseif ($c === 0x200f) {
1989
            return 'RTL';
1990 2
        } elseif ($c >= 0xfb1d) {
1991 2
            if ($c === 0xfb1d ||
1992 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1993 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1994 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1995 2
                $c === 0xfb3e ||
1996 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1997 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1998 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1999 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2000 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2001 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2002 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2003 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2004 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2005 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2006 2
                $c === 0x10808 ||
2007 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2008 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2009 2
                $c === 0x1083c ||
2010 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2011 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2012 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2013 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2014 2
                $c === 0x1093f ||
2015 2
                $c === 0x10a00 ||
2016 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2017 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2018 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2019 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2020 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2021 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2022 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2023 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2024 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2025 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2026
            ) {
2027 2
                return 'RTL';
2028
            }
2029
        }
2030
2031 2
        return 'LTR';
2032
    }
2033
2034
    /**
2035
     * Check for php-support.
2036
     *
2037
     * @param string|null $key
2038
     *
2039
     * @return mixed
2040
     *               Return the full support-"array", if $key === null<br>
2041
     *               return bool-value, if $key is used and available<br>
2042
     *               otherwise return <strong>null</strong>
2043
     */
2044 27
    public static function getSupportInfo(string $key = null)
2045
    {
2046 27
        if ($key === null) {
2047 4
            return self::$SUPPORT;
2048
        }
2049
2050 25
        if (!isset(self::$SUPPORT[$key])) {
2051 2
            return null;
2052
        }
2053
2054 23
        return self::$SUPPORT[$key];
2055
    }
2056
2057
    /**
2058
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2059
     *          if you need more supported types, please use e.g. "finfo"
2060
     *
2061
     * @param string $str
2062
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2063
     *
2064
     * @return array
2065
     *               with this keys: 'ext', 'mime', 'type'
2066
     */
2067 39
    public static function get_file_type(
2068
        string $str,
2069
        array $fallback = [
2070
            'ext'  => null,
2071
            'mime' => 'application/octet-stream',
2072
            'type' => null,
2073
        ]
2074
    ): array {
2075 39
        if ($str === '') {
2076
            return $fallback;
2077
        }
2078
2079 39
        $str_info = \substr($str, 0, 2);
2080 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2081 11
            return $fallback;
2082
        }
2083
2084 35
        $str_info = \unpack('C2chars', $str_info);
2085 35
        if ($str_info === false) {
2086
            return $fallback;
2087
        }
2088 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2089
2090
        // DEBUG
2091
        //var_dump($type_code);
2092
2093
        switch ($type_code) {
2094 35
            case 3780:
2095 5
                $ext = 'pdf';
2096 5
                $mime = 'application/pdf';
2097 5
                $type = 'binary';
2098
2099 5
                break;
2100 35
            case 7790:
2101
                $ext = 'exe';
2102
                $mime = 'application/octet-stream';
2103
                $type = 'binary';
2104
2105
                break;
2106 35
            case 7784:
2107
                $ext = 'midi';
2108
                $mime = 'audio/x-midi';
2109
                $type = 'binary';
2110
2111
                break;
2112 35
            case 8075:
2113 7
                $ext = 'zip';
2114 7
                $mime = 'application/zip';
2115 7
                $type = 'binary';
2116
2117 7
                break;
2118 35
            case 8297:
2119
                $ext = 'rar';
2120
                $mime = 'application/rar';
2121
                $type = 'binary';
2122
2123
                break;
2124 35
            case 255216:
2125
                $ext = 'jpg';
2126
                $mime = 'image/jpeg';
2127
                $type = 'binary';
2128
2129
                break;
2130 35
            case 7173:
2131
                $ext = 'gif';
2132
                $mime = 'image/gif';
2133
                $type = 'binary';
2134
2135
                break;
2136 35
            case 6677:
2137
                $ext = 'bmp';
2138
                $mime = 'image/bmp';
2139
                $type = 'binary';
2140
2141
                break;
2142 35
            case 13780:
2143 7
                $ext = 'png';
2144 7
                $mime = 'image/png';
2145 7
                $type = 'binary';
2146
2147 7
                break;
2148
            default:
2149 32
                return $fallback;
2150
        }
2151
2152
        return [
2153 7
            'ext'  => $ext,
2154 7
            'mime' => $mime,
2155 7
            'type' => $type,
2156
        ];
2157
    }
2158
2159
    /**
2160
     * @param int    $length        <p>Length of the random string.</p>
2161
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2162
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2163
     *
2164
     * @return string
2165
     */
2166 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2167
    {
2168
        // init
2169 1
        $i = 0;
2170 1
        $str = '';
2171
2172
        //
2173
        // add random chars
2174
        //
2175
2176 1
        if ($encoding === 'UTF-8') {
2177 1
            $maxlength = (int) \mb_strlen($possibleChars);
2178 1
            if ($maxlength === 0) {
2179 1
                return '';
2180
            }
2181
2182 1
            while ($i < $length) {
2183
                try {
2184 1
                    $randInt = \random_int(0, $maxlength - 1);
2185
                } catch (\Exception $e) {
2186
                    /** @noinspection RandomApiMigrationInspection */
2187
                    $randInt = \mt_rand(0, $maxlength - 1);
2188
                }
2189 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2190 1
                if ($char !== false) {
2191 1
                    $str .= $char;
2192 1
                    ++$i;
2193
                }
2194
            }
2195
        } else {
2196
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2197
2198
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2199
            if ($maxlength === 0) {
2200
                return '';
2201
            }
2202
2203
            while ($i < $length) {
2204
                try {
2205
                    $randInt = \random_int(0, $maxlength - 1);
2206
                } catch (\Exception $e) {
2207
                    /** @noinspection RandomApiMigrationInspection */
2208
                    $randInt = \mt_rand(0, $maxlength - 1);
2209
                }
2210
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2211
                if ($char !== false) {
2212
                    $str .= $char;
2213
                    ++$i;
2214
                }
2215
            }
2216
        }
2217
2218 1
        return $str;
2219
    }
2220
2221
    /**
2222
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2223
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2224
     *
2225
     * @return string
2226
     */
2227 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2228
    {
2229 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2230 1
                        \session_id() .
2231 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2232 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2233 1
                        $entropyExtra;
2234
2235 1
        $uniqueString = \uniqid($uniqueHelper, true);
2236
2237 1
        if ($md5) {
2238 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2239
        }
2240
2241 1
        return $uniqueString;
2242
    }
2243
2244
    /**
2245
     * alias for "UTF8::string_has_bom()"
2246
     *
2247
     * @see        UTF8::string_has_bom()
2248
     *
2249
     * @param string $str
2250
     *
2251
     * @return bool
2252
     *
2253
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2254
     */
2255 2
    public static function hasBom(string $str): bool
2256
    {
2257 2
        return self::string_has_bom($str);
2258
    }
2259
2260
    /**
2261
     * Returns true if the string contains a lower case char, false otherwise.
2262
     *
2263
     * @param string $str <p>The input string.</p>
2264
     *
2265
     * @return bool whether or not the string contains a lower case character
2266
     */
2267 47
    public static function has_lowercase(string $str): bool
2268
    {
2269 47
        if (self::$SUPPORT['mbstring'] === true) {
2270
            /** @noinspection PhpComposerExtensionStubsInspection */
2271 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2272
        }
2273
2274
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2275
    }
2276
2277
    /**
2278
     * Returns true if the string contains an upper case char, false otherwise.
2279
     *
2280
     * @param string $str <p>The input string.</p>
2281
     *
2282
     * @return bool whether or not the string contains an upper case character
2283
     */
2284 12
    public static function has_uppercase(string $str): bool
2285
    {
2286 12
        if (self::$SUPPORT['mbstring'] === true) {
2287
            /** @noinspection PhpComposerExtensionStubsInspection */
2288 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2289
        }
2290
2291
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2292
    }
2293
2294
    /**
2295
     * Converts a hexadecimal-value into an UTF-8 character.
2296
     *
2297
     * @param string $hexdec <p>The hexadecimal value.</p>
2298
     *
2299
     * @return false|string one single UTF-8 character
2300
     */
2301 4
    public static function hex_to_chr(string $hexdec)
2302
    {
2303 4
        return self::decimal_to_chr(\hexdec($hexdec));
2304
    }
2305
2306
    /**
2307
     * Converts hexadecimal U+xxxx code point representation to integer.
2308
     *
2309
     * INFO: opposite to UTF8::int_to_hex()
2310
     *
2311
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2312
     *
2313
     * @return false|int the code point, or false on failure
2314
     */
2315 2
    public static function hex_to_int($hexDec)
2316
    {
2317
        // init
2318 2
        $hexDec = (string) $hexDec;
2319
2320 2
        if ($hexDec === '') {
2321 2
            return false;
2322
        }
2323
2324 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2325 2
            return \intval($match[1], 16);
2326
        }
2327
2328 2
        return false;
2329
    }
2330
2331
    /**
2332
     * alias for "UTF8::html_entity_decode()"
2333
     *
2334
     * @see UTF8::html_entity_decode()
2335
     *
2336
     * @param string $str
2337
     * @param int    $flags
2338
     * @param string $encoding
2339
     *
2340
     * @return string
2341
     */
2342 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2343
    {
2344 4
        return self::html_entity_decode($str, $flags, $encoding);
2345
    }
2346
2347
    /**
2348
     * Converts a UTF-8 string to a series of HTML numbered entities.
2349
     *
2350
     * INFO: opposite to UTF8::html_decode()
2351
     *
2352
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2353
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2354
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2355
     *
2356
     * @return string HTML numbered entities
2357
     */
2358 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2359
    {
2360 13
        if ($str === '') {
2361 4
            return '';
2362
        }
2363
2364 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2365 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2366
        }
2367
2368
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2369 13
        if (self::$SUPPORT['mbstring'] === true) {
2370 13
            $startCode = 0x00;
2371 13
            if ($keepAsciiChars === true) {
2372 13
                $startCode = 0x80;
2373
            }
2374
2375 13
            if ($encoding === 'UTF-8') {
2376 13
                return \mb_encode_numericentity(
2377 13
                    $str,
2378 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2379
                );
2380
            }
2381
2382 4
            return \mb_encode_numericentity(
2383 4
                $str,
2384 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2385 4
                $encoding
2386
            );
2387
        }
2388
2389
        //
2390
        // fallback via vanilla php
2391
        //
2392
2393
        return \implode(
2394
            '',
2395
            \array_map(
2396
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2397
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2398
                },
2399
                self::str_split($str)
2400
            )
2401
        );
2402
    }
2403
2404
    /**
2405
     * UTF-8 version of html_entity_decode()
2406
     *
2407
     * The reason we are not using html_entity_decode() by itself is because
2408
     * while it is not technically correct to leave out the semicolon
2409
     * at the end of an entity most browsers will still interpret the entity
2410
     * correctly. html_entity_decode() does not convert entities without
2411
     * semicolons, so we are left with our own little solution here. Bummer.
2412
     *
2413
     * Convert all HTML entities to their applicable characters
2414
     *
2415
     * INFO: opposite to UTF8::html_encode()
2416
     *
2417
     * @see http://php.net/manual/en/function.html-entity-decode.php
2418
     *
2419
     * @param string $str      <p>
2420
     *                         The input string.
2421
     *                         </p>
2422
     * @param int    $flags    [optional] <p>
2423
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2424
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2425
     *                         <table>
2426
     *                         Available <i>flags</i> constants
2427
     *                         <tr valign="top">
2428
     *                         <td>Constant Name</td>
2429
     *                         <td>Description</td>
2430
     *                         </tr>
2431
     *                         <tr valign="top">
2432
     *                         <td><b>ENT_COMPAT</b></td>
2433
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2434
     *                         </tr>
2435
     *                         <tr valign="top">
2436
     *                         <td><b>ENT_QUOTES</b></td>
2437
     *                         <td>Will convert both double and single quotes.</td>
2438
     *                         </tr>
2439
     *                         <tr valign="top">
2440
     *                         <td><b>ENT_NOQUOTES</b></td>
2441
     *                         <td>Will leave both double and single quotes unconverted.</td>
2442
     *                         </tr>
2443
     *                         <tr valign="top">
2444
     *                         <td><b>ENT_HTML401</b></td>
2445
     *                         <td>
2446
     *                         Handle code as HTML 4.01.
2447
     *                         </td>
2448
     *                         </tr>
2449
     *                         <tr valign="top">
2450
     *                         <td><b>ENT_XML1</b></td>
2451
     *                         <td>
2452
     *                         Handle code as XML 1.
2453
     *                         </td>
2454
     *                         </tr>
2455
     *                         <tr valign="top">
2456
     *                         <td><b>ENT_XHTML</b></td>
2457
     *                         <td>
2458
     *                         Handle code as XHTML.
2459
     *                         </td>
2460
     *                         </tr>
2461
     *                         <tr valign="top">
2462
     *                         <td><b>ENT_HTML5</b></td>
2463
     *                         <td>
2464
     *                         Handle code as HTML 5.
2465
     *                         </td>
2466
     *                         </tr>
2467
     *                         </table>
2468
     *                         </p>
2469
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2470
     *
2471
     * @return string the decoded string
2472
     */
2473 42
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2474
    {
2475
        if (
2476 42
            !isset($str[3]) // examples: &; || &x;
2477
            ||
2478 42
            \strpos($str, '&') === false // no "&"
2479
        ) {
2480 19
            return $str;
2481
        }
2482
2483 41
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2484 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2485
        }
2486
2487 41
        if ($flags === null) {
2488 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2489
        }
2490
2491
        if (
2492 41
            $encoding !== 'UTF-8'
2493
            &&
2494 41
            $encoding !== 'ISO-8859-1'
2495
            &&
2496 41
            $encoding !== 'WINDOWS-1252'
2497
            &&
2498 41
            self::$SUPPORT['mbstring'] === false
2499
        ) {
2500
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2501
        }
2502
2503
        do {
2504 41
            $str_compare = $str;
2505
2506
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2507 41
            if (self::$SUPPORT['mbstring'] === true) {
2508 41
                if ($encoding === 'UTF-8') {
2509 41
                    $str = \mb_decode_numericentity(
2510 41
                        $str,
2511 41
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2512
                    );
2513
                } else {
2514 4
                    $str = \mb_decode_numericentity(
2515 4
                        $str,
2516 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2517 41
                        $encoding
2518
                    );
2519
                }
2520
            } else {
2521
                $str = (string) \preg_replace_callback(
2522
                    "/&#\d{2,6};/",
2523
                    /**
2524
                     * @param string[] $matches
2525
                     *
2526
                     * @return string
2527
                     */
2528
                    static function (array $matches) use ($encoding): string {
2529
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2530
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2531
                            return $returnTmp;
2532
                        }
2533
2534
                        return $matches[0];
2535
                    },
2536
                    $str
2537
                );
2538
            }
2539
2540 41
            if (\strpos($str, '&') !== false) {
2541 37
                if (\strpos($str, '&#') !== false) {
2542
                    // decode also numeric & UTF16 two byte entities
2543 29
                    $str = (string) \preg_replace(
2544 29
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2545 29
                        '$1;',
2546 29
                        $str
2547
                    );
2548
                }
2549
2550 37
                $str = \html_entity_decode(
2551 37
                    $str,
2552 37
                    $flags,
2553 37
                    $encoding
2554
                );
2555
            }
2556 41
        } while ($str_compare !== $str);
2557
2558 41
        return $str;
2559
    }
2560
2561
    /**
2562
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2563
     *
2564
     * @param string $str
2565
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2566
     *
2567
     * @return string
2568
     */
2569 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2570
    {
2571 6
        return self::htmlspecialchars(
2572 6
            $str,
2573 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2574 6
            $encoding
2575
        );
2576
    }
2577
2578
    /**
2579
     * Remove empty html-tag.
2580
     *
2581
     * e.g.: <tag></tag>
2582
     *
2583
     * @param string $str
2584
     *
2585
     * @return string
2586
     */
2587 1
    public static function html_stripe_empty_tags(string $str): string
2588
    {
2589 1
        return (string) \preg_replace(
2590 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2591 1
            '',
2592 1
            $str
2593
        );
2594
    }
2595
2596
    /**
2597
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2598
     *
2599
     * @see http://php.net/manual/en/function.htmlentities.php
2600
     *
2601
     * @param string $str           <p>
2602
     *                              The input string.
2603
     *                              </p>
2604
     * @param int    $flags         [optional] <p>
2605
     *                              A bitmask of one or more of the following flags, which specify how to handle
2606
     *                              quotes, invalid code unit sequences and the used document type. The default is
2607
     *                              ENT_COMPAT | ENT_HTML401.
2608
     *                              <table>
2609
     *                              Available <i>flags</i> constants
2610
     *                              <tr valign="top">
2611
     *                              <td>Constant Name</td>
2612
     *                              <td>Description</td>
2613
     *                              </tr>
2614
     *                              <tr valign="top">
2615
     *                              <td><b>ENT_COMPAT</b></td>
2616
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2617
     *                              </tr>
2618
     *                              <tr valign="top">
2619
     *                              <td><b>ENT_QUOTES</b></td>
2620
     *                              <td>Will convert both double and single quotes.</td>
2621
     *                              </tr>
2622
     *                              <tr valign="top">
2623
     *                              <td><b>ENT_NOQUOTES</b></td>
2624
     *                              <td>Will leave both double and single quotes unconverted.</td>
2625
     *                              </tr>
2626
     *                              <tr valign="top">
2627
     *                              <td><b>ENT_IGNORE</b></td>
2628
     *                              <td>
2629
     *                              Silently discard invalid code unit sequences instead of returning
2630
     *                              an empty string. Using this flag is discouraged as it
2631
     *                              may have security implications.
2632
     *                              </td>
2633
     *                              </tr>
2634
     *                              <tr valign="top">
2635
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2636
     *                              <td>
2637
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2638
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2639
     *                              string.
2640
     *                              </td>
2641
     *                              </tr>
2642
     *                              <tr valign="top">
2643
     *                              <td><b>ENT_DISALLOWED</b></td>
2644
     *                              <td>
2645
     *                              Replace invalid code points for the given document type with a
2646
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2647
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2648
     *                              instance, to ensure the well-formedness of XML documents with
2649
     *                              embedded external content.
2650
     *                              </td>
2651
     *                              </tr>
2652
     *                              <tr valign="top">
2653
     *                              <td><b>ENT_HTML401</b></td>
2654
     *                              <td>
2655
     *                              Handle code as HTML 4.01.
2656
     *                              </td>
2657
     *                              </tr>
2658
     *                              <tr valign="top">
2659
     *                              <td><b>ENT_XML1</b></td>
2660
     *                              <td>
2661
     *                              Handle code as XML 1.
2662
     *                              </td>
2663
     *                              </tr>
2664
     *                              <tr valign="top">
2665
     *                              <td><b>ENT_XHTML</b></td>
2666
     *                              <td>
2667
     *                              Handle code as XHTML.
2668
     *                              </td>
2669
     *                              </tr>
2670
     *                              <tr valign="top">
2671
     *                              <td><b>ENT_HTML5</b></td>
2672
     *                              <td>
2673
     *                              Handle code as HTML 5.
2674
     *                              </td>
2675
     *                              </tr>
2676
     *                              </table>
2677
     *                              </p>
2678
     * @param string $encoding      [optional] <p>
2679
     *                              Like <b>htmlspecialchars</b>,
2680
     *                              <b>htmlentities</b> takes an optional third argument
2681
     *                              <i>encoding</i> which defines encoding used in
2682
     *                              conversion.
2683
     *                              Although this argument is technically optional, you are highly
2684
     *                              encouraged to specify the correct value for your code.
2685
     *                              </p>
2686
     * @param bool   $double_encode [optional] <p>
2687
     *                              When <i>double_encode</i> is turned off PHP will not
2688
     *                              encode existing html entities. The default is to convert everything.
2689
     *                              </p>
2690
     *
2691
     * @return string
2692
     *                <p>
2693
     *                The encoded string.
2694
     *                <br><br>
2695
     *                If the input <i>string</i> contains an invalid code unit
2696
     *                sequence within the given <i>encoding</i> an empty string
2697
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2698
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2699
     *                </p>
2700
     */
2701 9
    public static function htmlentities(
2702
        string $str,
2703
        int $flags = \ENT_COMPAT,
2704
        string $encoding = 'UTF-8',
2705
        bool $double_encode = true
2706
    ): string {
2707 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2708 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2709
        }
2710
2711 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2712
2713
        /**
2714
         * PHP doesn't replace a backslash to its html entity since this is something
2715
         * that's mostly used to escape characters when inserting in a database. Since
2716
         * we're using a decent database layer, we don't need this shit and we're replacing
2717
         * the double backslashes by its' html entity equivalent.
2718
         *
2719
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2720
         */
2721 9
        $str = \str_replace('\\', '&#92;', $str);
2722
2723 9
        return self::html_encode($str, true, $encoding);
2724
    }
2725
2726
    /**
2727
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2728
     *
2729
     * INFO: Take a look at "UTF8::htmlentities()"
2730
     *
2731
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2732
     *
2733
     * @param string $str           <p>
2734
     *                              The string being converted.
2735
     *                              </p>
2736
     * @param int    $flags         [optional] <p>
2737
     *                              A bitmask of one or more of the following flags, which specify how to handle
2738
     *                              quotes, invalid code unit sequences and the used document type. The default is
2739
     *                              ENT_COMPAT | ENT_HTML401.
2740
     *                              <table>
2741
     *                              Available <i>flags</i> constants
2742
     *                              <tr valign="top">
2743
     *                              <td>Constant Name</td>
2744
     *                              <td>Description</td>
2745
     *                              </tr>
2746
     *                              <tr valign="top">
2747
     *                              <td><b>ENT_COMPAT</b></td>
2748
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2749
     *                              </tr>
2750
     *                              <tr valign="top">
2751
     *                              <td><b>ENT_QUOTES</b></td>
2752
     *                              <td>Will convert both double and single quotes.</td>
2753
     *                              </tr>
2754
     *                              <tr valign="top">
2755
     *                              <td><b>ENT_NOQUOTES</b></td>
2756
     *                              <td>Will leave both double and single quotes unconverted.</td>
2757
     *                              </tr>
2758
     *                              <tr valign="top">
2759
     *                              <td><b>ENT_IGNORE</b></td>
2760
     *                              <td>
2761
     *                              Silently discard invalid code unit sequences instead of returning
2762
     *                              an empty string. Using this flag is discouraged as it
2763
     *                              may have security implications.
2764
     *                              </td>
2765
     *                              </tr>
2766
     *                              <tr valign="top">
2767
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2768
     *                              <td>
2769
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2770
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2771
     *                              string.
2772
     *                              </td>
2773
     *                              </tr>
2774
     *                              <tr valign="top">
2775
     *                              <td><b>ENT_DISALLOWED</b></td>
2776
     *                              <td>
2777
     *                              Replace invalid code points for the given document type with a
2778
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2779
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2780
     *                              instance, to ensure the well-formedness of XML documents with
2781
     *                              embedded external content.
2782
     *                              </td>
2783
     *                              </tr>
2784
     *                              <tr valign="top">
2785
     *                              <td><b>ENT_HTML401</b></td>
2786
     *                              <td>
2787
     *                              Handle code as HTML 4.01.
2788
     *                              </td>
2789
     *                              </tr>
2790
     *                              <tr valign="top">
2791
     *                              <td><b>ENT_XML1</b></td>
2792
     *                              <td>
2793
     *                              Handle code as XML 1.
2794
     *                              </td>
2795
     *                              </tr>
2796
     *                              <tr valign="top">
2797
     *                              <td><b>ENT_XHTML</b></td>
2798
     *                              <td>
2799
     *                              Handle code as XHTML.
2800
     *                              </td>
2801
     *                              </tr>
2802
     *                              <tr valign="top">
2803
     *                              <td><b>ENT_HTML5</b></td>
2804
     *                              <td>
2805
     *                              Handle code as HTML 5.
2806
     *                              </td>
2807
     *                              </tr>
2808
     *                              </table>
2809
     *                              </p>
2810
     * @param string $encoding      [optional] <p>
2811
     *                              Defines encoding used in conversion.
2812
     *                              </p>
2813
     *                              <p>
2814
     *                              For the purposes of this function, the encodings
2815
     *                              ISO-8859-1, ISO-8859-15,
2816
     *                              UTF-8, cp866,
2817
     *                              cp1251, cp1252, and
2818
     *                              KOI8-R are effectively equivalent, provided the
2819
     *                              <i>string</i> itself is valid for the encoding, as
2820
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2821
     *                              the same positions in all of these encodings.
2822
     *                              </p>
2823
     * @param bool   $double_encode [optional] <p>
2824
     *                              When <i>double_encode</i> is turned off PHP will not
2825
     *                              encode existing html entities, the default is to convert everything.
2826
     *                              </p>
2827
     *
2828
     * @return string the converted string.
2829
     *                </p>
2830
     *                <p>
2831
     *                If the input <i>string</i> contains an invalid code unit
2832
     *                sequence within the given <i>encoding</i> an empty string
2833
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2834
     *                <b>ENT_SUBSTITUTE</b> flags are set
2835
     */
2836 8
    public static function htmlspecialchars(
2837
        string $str,
2838
        int $flags = \ENT_COMPAT,
2839
        string $encoding = 'UTF-8',
2840
        bool $double_encode = true
2841
    ): string {
2842 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2843 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2844
        }
2845
2846 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2847
    }
2848
2849
    /**
2850
     * Checks whether iconv is available on the server.
2851
     *
2852
     * @return bool
2853
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2854
     */
2855
    public static function iconv_loaded(): bool
2856
    {
2857
        return \extension_loaded('iconv');
2858
    }
2859
2860
    /**
2861
     * alias for "UTF8::decimal_to_chr()"
2862
     *
2863
     * @see UTF8::decimal_to_chr()
2864
     *
2865
     * @param mixed $int
2866
     *
2867
     * @return string
2868
     */
2869 4
    public static function int_to_chr($int): string
2870
    {
2871 4
        return self::decimal_to_chr($int);
2872
    }
2873
2874
    /**
2875
     * Converts Integer to hexadecimal U+xxxx code point representation.
2876
     *
2877
     * INFO: opposite to UTF8::hex_to_int()
2878
     *
2879
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2880
     * @param string $pfix [optional]
2881
     *
2882
     * @return string the code point, or empty string on failure
2883
     */
2884 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2885
    {
2886 6
        $hex = \dechex($int);
2887
2888 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2889
2890 6
        return $pfix . $hex . '';
2891
    }
2892
2893
    /**
2894
     * Checks whether intl-char is available on the server.
2895
     *
2896
     * @return bool
2897
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2898
     */
2899
    public static function intlChar_loaded(): bool
2900
    {
2901
        return \class_exists('IntlChar');
2902
    }
2903
2904
    /**
2905
     * Checks whether intl is available on the server.
2906
     *
2907
     * @return bool
2908
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2909
     */
2910 5
    public static function intl_loaded(): bool
2911
    {
2912 5
        return \extension_loaded('intl');
2913
    }
2914
2915
    /**
2916
     * alias for "UTF8::is_ascii()"
2917
     *
2918
     * @see        UTF8::is_ascii()
2919
     *
2920
     * @param string $str
2921
     *
2922
     * @return bool
2923
     *
2924
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2925
     */
2926 2
    public static function isAscii(string $str): bool
2927
    {
2928 2
        return self::is_ascii($str);
2929
    }
2930
2931
    /**
2932
     * alias for "UTF8::is_base64()"
2933
     *
2934
     * @see        UTF8::is_base64()
2935
     *
2936
     * @param string $str
2937
     *
2938
     * @return bool
2939
     *
2940
     * @deprecated <p>use "UTF8::is_base64()"</p>
2941
     */
2942 2
    public static function isBase64($str): bool
2943
    {
2944 2
        return self::is_base64($str);
2945
    }
2946
2947
    /**
2948
     * alias for "UTF8::is_binary()"
2949
     *
2950
     * @see        UTF8::is_binary()
2951
     *
2952
     * @param mixed $str
2953
     * @param bool  $strict
2954
     *
2955
     * @return bool
2956
     *
2957
     * @deprecated <p>use "UTF8::is_binary()"</p>
2958
     */
2959 4
    public static function isBinary($str, $strict = false): bool
2960
    {
2961 4
        return self::is_binary($str, $strict);
2962
    }
2963
2964
    /**
2965
     * alias for "UTF8::is_bom()"
2966
     *
2967
     * @see        UTF8::is_bom()
2968
     *
2969
     * @param string $utf8_chr
2970
     *
2971
     * @return bool
2972
     *
2973
     * @deprecated <p>use "UTF8::is_bom()"</p>
2974
     */
2975 2
    public static function isBom(string $utf8_chr): bool
2976
    {
2977 2
        return self::is_bom($utf8_chr);
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_html()"
2982
     *
2983
     * @see        UTF8::is_html()
2984
     *
2985
     * @param string $str
2986
     *
2987
     * @return bool
2988
     *
2989
     * @deprecated <p>use "UTF8::is_html()"</p>
2990
     */
2991 2
    public static function isHtml(string $str): bool
2992
    {
2993 2
        return self::is_html($str);
2994
    }
2995
2996
    /**
2997
     * alias for "UTF8::is_json()"
2998
     *
2999
     * @see        UTF8::is_json()
3000
     *
3001
     * @param string $str
3002
     *
3003
     * @return bool
3004
     *
3005
     * @deprecated <p>use "UTF8::is_json()"</p>
3006
     */
3007
    public static function isJson(string $str): bool
3008
    {
3009
        return self::is_json($str);
3010
    }
3011
3012
    /**
3013
     * alias for "UTF8::is_utf16()"
3014
     *
3015
     * @see        UTF8::is_utf16()
3016
     *
3017
     * @param mixed $str
3018
     *
3019
     * @return false|int
3020
     *                   <strong>false</strong> if is't not UTF16,<br>
3021
     *                   <strong>1</strong> for UTF-16LE,<br>
3022
     *                   <strong>2</strong> for UTF-16BE
3023
     *
3024
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3025
     */
3026 2
    public static function isUtf16($str)
3027
    {
3028 2
        return self::is_utf16($str);
3029
    }
3030
3031
    /**
3032
     * alias for "UTF8::is_utf32()"
3033
     *
3034
     * @see        UTF8::is_utf32()
3035
     *
3036
     * @param mixed $str
3037
     *
3038
     * @return false|int
3039
     *                   <strong>false</strong> if is't not UTF16,
3040
     *                   <strong>1</strong> for UTF-32LE,
3041
     *                   <strong>2</strong> for UTF-32BE
3042
     *
3043
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3044
     */
3045 2
    public static function isUtf32($str)
3046
    {
3047 2
        return self::is_utf32($str);
3048
    }
3049
3050
    /**
3051
     * alias for "UTF8::is_utf8()"
3052
     *
3053
     * @see        UTF8::is_utf8()
3054
     *
3055
     * @param string $str
3056
     * @param bool   $strict
3057
     *
3058
     * @return bool
3059
     *
3060
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3061
     */
3062 17
    public static function isUtf8($str, $strict = false): bool
3063
    {
3064 17
        return self::is_utf8($str, $strict);
3065
    }
3066
3067
    /**
3068
     * Returns true if the string contains only alphabetic chars, false otherwise.
3069
     *
3070
     * @param string $str
3071
     *
3072
     * @return bool
3073
     *              Whether or not $str contains only alphabetic chars
3074
     */
3075 10
    public static function is_alpha(string $str): bool
3076
    {
3077 10
        if (self::$SUPPORT['mbstring'] === true) {
3078
            /** @noinspection PhpComposerExtensionStubsInspection */
3079 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3080
        }
3081
3082
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3083
    }
3084
3085
    /**
3086
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3087
     *
3088
     * @param string $str
3089
     *
3090
     * @return bool
3091
     *              Whether or not $str contains only alphanumeric chars
3092
     */
3093 13
    public static function is_alphanumeric(string $str): bool
3094
    {
3095 13
        if (self::$SUPPORT['mbstring'] === true) {
3096
            /** @noinspection PhpComposerExtensionStubsInspection */
3097 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3098
        }
3099
3100
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3101
    }
3102
3103
    /**
3104
     * Checks if a string is 7 bit ASCII.
3105
     *
3106
     * @param string $str <p>The string to check.</p>
3107
     *
3108
     * @return bool
3109
     *              <strong>true</strong> if it is ASCII<br>
3110
     *              <strong>false</strong> otherwise
3111
     */
3112 137
    public static function is_ascii(string $str): bool
3113
    {
3114 137
        if ($str === '') {
3115 10
            return true;
3116
        }
3117
3118 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3119
    }
3120
3121
    /**
3122
     * Returns true if the string is base64 encoded, false otherwise.
3123
     *
3124
     * @param mixed|string $str                <p>The input string.</p>
3125
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3126
     *
3127
     * @return bool whether or not $str is base64 encoded
3128
     */
3129 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3130
    {
3131 16
        if ($emptyStringIsValid === false && $str === '') {
3132 3
            return false;
3133
        }
3134
3135
        /**
3136
         * @psalm-suppress RedundantConditionGivenDocblockType
3137
         */
3138 15
        if (\is_string($str) === false) {
3139 2
            return false;
3140
        }
3141
3142 15
        $base64String = \base64_decode($str, true);
3143
3144 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3145
    }
3146
3147
    /**
3148
     * Check if the input is binary... (is look like a hack).
3149
     *
3150
     * @param mixed $input
3151
     * @param bool  $strict
3152
     *
3153
     * @return bool
3154
     */
3155 39
    public static function is_binary($input, bool $strict = false): bool
3156
    {
3157 39
        $input = (string) $input;
3158 39
        if ($input === '') {
3159 10
            return false;
3160
        }
3161
3162 39
        if (\preg_match('~^[01]+$~', $input)) {
3163 13
            return true;
3164
        }
3165
3166 39
        $ext = self::get_file_type($input);
3167 39
        if ($ext['type'] === 'binary') {
3168 7
            return true;
3169
        }
3170
3171 36
        $testLength = \strlen($input);
3172 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3173 36
        if (($testNull / $testLength) > 0.25) {
3174 12
            return true;
3175
        }
3176
3177 34
        if ($strict === true) {
3178 34
            if (self::$SUPPORT['finfo'] === false) {
3179
                throw new \RuntimeException('ext-fileinfo: is not installed');
3180
            }
3181
3182
            /** @noinspection PhpComposerExtensionStubsInspection */
3183 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3184 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3185 15
                return true;
3186
            }
3187
        }
3188
3189 30
        return false;
3190
    }
3191
3192
    /**
3193
     * Check if the file is binary.
3194
     *
3195
     * @param string $file
3196
     *
3197
     * @return bool
3198
     */
3199 6
    public static function is_binary_file($file): bool
3200
    {
3201
        // init
3202 6
        $block = '';
3203
3204 6
        $fp = \fopen($file, 'rb');
3205 6
        if (\is_resource($fp)) {
3206 6
            $block = \fread($fp, 512);
3207 6
            \fclose($fp);
3208
        }
3209
3210 6
        if ($block === '') {
3211 2
            return false;
3212
        }
3213
3214 6
        return self::is_binary($block, true);
3215
    }
3216
3217
    /**
3218
     * Returns true if the string contains only whitespace chars, false otherwise.
3219
     *
3220
     * @param string $str
3221
     *
3222
     * @return bool
3223
     *              Whether or not $str contains only whitespace characters
3224
     */
3225 15
    public static function is_blank(string $str): bool
3226
    {
3227 15
        if (self::$SUPPORT['mbstring'] === true) {
3228
            /** @noinspection PhpComposerExtensionStubsInspection */
3229 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3230
        }
3231
3232
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3233
    }
3234
3235
    /**
3236
     * Checks if the given string is equal to any "Byte Order Mark".
3237
     *
3238
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3239
     *
3240
     * @param string $str <p>The input string.</p>
3241
     *
3242
     * @return bool
3243
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3244
     */
3245 2
    public static function is_bom($str): bool
3246
    {
3247
        /** @noinspection PhpUnusedLocalVariableInspection */
3248 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3249 2
            if ($str === $bomString) {
3250 2
                return true;
3251
            }
3252
        }
3253
3254 2
        return false;
3255
    }
3256
3257
    /**
3258
     * Determine whether the string is considered to be empty.
3259
     *
3260
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3261
     * empty() does not generate a warning if the variable does not exist.
3262
     *
3263
     * @param mixed $str
3264
     *
3265
     * @return bool whether or not $str is empty()
3266
     */
3267
    public static function is_empty($str): bool
3268
    {
3269
        return empty($str);
3270
    }
3271
3272
    /**
3273
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3274
     *
3275
     * @param string $str
3276
     *
3277
     * @return bool
3278
     *              Whether or not $str contains only hexadecimal chars
3279
     */
3280 13
    public static function is_hexadecimal(string $str): bool
3281
    {
3282 13
        if (self::$SUPPORT['mbstring'] === true) {
3283
            /** @noinspection PhpComposerExtensionStubsInspection */
3284 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3285
        }
3286
3287
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3288
    }
3289
3290
    /**
3291
     * Check if the string contains any html-tags <lall>.
3292
     *
3293
     * @param string $str <p>The input string.</p>
3294
     *
3295
     * @return bool
3296
     */
3297 3
    public static function is_html(string $str): bool
3298
    {
3299 3
        if ($str === '') {
3300 3
            return false;
3301
        }
3302
3303
        // init
3304 3
        $matches = [];
3305
3306 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3307
3308 3
        return \count($matches) !== 0;
3309
    }
3310
3311
    /**
3312
     * Try to check if "$str" is an json-string.
3313
     *
3314
     * @param string $str                              <p>The input string.</p>
3315
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3316
     *
3317
     * @return bool
3318
     */
3319 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3320
    {
3321 42
        if ($str === '') {
3322 4
            return false;
3323
        }
3324
3325 40
        if (self::$SUPPORT['json'] === false) {
3326
            throw new \RuntimeException('ext-json: is not installed');
3327
        }
3328
3329 40
        $json = self::json_decode($str);
3330 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3331 18
            return false;
3332
        }
3333
3334
        if (
3335 24
            $onlyArrayOrObjectResultsAreValid === true
3336
            &&
3337 24
            \is_object($json) === false
3338
            &&
3339 24
            \is_array($json) === false
3340
        ) {
3341 5
            return false;
3342
        }
3343
3344
        /** @noinspection PhpComposerExtensionStubsInspection */
3345 19
        return \json_last_error() === \JSON_ERROR_NONE;
3346
    }
3347
3348
    /**
3349
     * @param string $str
3350
     *
3351
     * @return bool
3352
     */
3353 8
    public static function is_lowercase(string $str): bool
3354
    {
3355 8
        if (self::$SUPPORT['mbstring'] === true) {
3356
            /** @noinspection PhpComposerExtensionStubsInspection */
3357 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3358
        }
3359
3360
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3361
    }
3362
3363
    /**
3364
     * Returns true if the string is serialized, false otherwise.
3365
     *
3366
     * @param string $str
3367
     *
3368
     * @return bool whether or not $str is serialized
3369
     */
3370 7
    public static function is_serialized(string $str): bool
3371
    {
3372 7
        if ($str === '') {
3373 1
            return false;
3374
        }
3375
3376
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3377
        /** @noinspection UnserializeExploitsInspection */
3378 6
        return $str === 'b:0;'
3379
               ||
3380 6
               @\unserialize($str) !== false;
3381
    }
3382
3383
    /**
3384
     * Returns true if the string contains only lower case chars, false
3385
     * otherwise.
3386
     *
3387
     * @param string $str <p>The input string.</p>
3388
     *
3389
     * @return bool
3390
     *              Whether or not $str contains only lower case characters
3391
     */
3392 8
    public static function is_uppercase(string $str): bool
3393
    {
3394 8
        if (self::$SUPPORT['mbstring'] === true) {
3395
            /** @noinspection PhpComposerExtensionStubsInspection */
3396 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3397
        }
3398
3399
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3400
    }
3401
3402
    /**
3403
     * Check if the string is UTF-16.
3404
     *
3405
     * @param mixed $str                   <p>The input string.</p>
3406
     * @param bool  $checkIfStringIsBinary
3407
     *
3408
     * @return false|int
3409
     *                   <strong>false</strong> if is't not UTF-16,<br>
3410
     *                   <strong>1</strong> for UTF-16LE,<br>
3411
     *                   <strong>2</strong> for UTF-16BE
3412
     */
3413 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3414
    {
3415
        // init
3416 22
        $str = (string) $str;
3417 22
        $strChars = [];
3418
3419
        if (
3420 22
            $checkIfStringIsBinary === true
3421
            &&
3422 22
            self::is_binary($str, true) === false
3423
        ) {
3424 2
            return false;
3425
        }
3426
3427 22
        if (self::$SUPPORT['mbstring'] === false) {
3428 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3429
        }
3430
3431 22
        $str = self::remove_bom($str);
3432
3433 22
        $maybeUTF16LE = 0;
3434 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3435 22
        if ($test) {
3436 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3437 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3438 15
            if ($test3 === $test) {
3439 15
                if (\count($strChars) === 0) {
3440 15
                    $strChars = self::count_chars($str, true, false);
3441
                }
3442 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3443 15
                    if (\in_array($test3char, $strChars, true) === true) {
3444 15
                        ++$maybeUTF16LE;
3445
                    }
3446
                }
3447 15
                unset($test3charEmpty);
3448
            }
3449
        }
3450
3451 22
        $maybeUTF16BE = 0;
3452 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3453 22
        if ($test) {
3454 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3455 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3456 15
            if ($test3 === $test) {
3457 15
                if (\count($strChars) === 0) {
3458 7
                    $strChars = self::count_chars($str, true, false);
3459
                }
3460 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3461 15
                    if (\in_array($test3char, $strChars, true) === true) {
3462 15
                        ++$maybeUTF16BE;
3463
                    }
3464
                }
3465 15
                unset($test3charEmpty);
3466
            }
3467
        }
3468
3469 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3470 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3471 4
                return 1;
3472
            }
3473
3474 6
            return 2;
3475
        }
3476
3477 18
        return false;
3478
    }
3479
3480
    /**
3481
     * Check if the string is UTF-32.
3482
     *
3483
     * @param mixed $str                   <p>The input string.</p>
3484
     * @param bool  $checkIfStringIsBinary
3485
     *
3486
     * @return false|int
3487
     *                   <strong>false</strong> if is't not UTF-32,<br>
3488
     *                   <strong>1</strong> for UTF-32LE,<br>
3489
     *                   <strong>2</strong> for UTF-32BE
3490
     */
3491 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3492
    {
3493
        // init
3494 18
        $str = (string) $str;
3495 18
        $strChars = [];
3496
3497
        if (
3498 18
            $checkIfStringIsBinary === true
3499
            &&
3500 18
            self::is_binary($str, true) === false
3501
        ) {
3502 2
            return false;
3503
        }
3504
3505 18
        if (self::$SUPPORT['mbstring'] === false) {
3506 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3507
        }
3508
3509 18
        $str = self::remove_bom($str);
3510
3511 18
        $maybeUTF32LE = 0;
3512 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3513 18
        if ($test) {
3514 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3515 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3516 11
            if ($test3 === $test) {
3517 11
                if (\count($strChars) === 0) {
3518 11
                    $strChars = self::count_chars($str, true, false);
3519
                }
3520 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3521 11
                    if (\in_array($test3char, $strChars, true) === true) {
3522 11
                        ++$maybeUTF32LE;
3523
                    }
3524
                }
3525 11
                unset($test3charEmpty);
3526
            }
3527
        }
3528
3529 18
        $maybeUTF32BE = 0;
3530 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3531 18
        if ($test) {
3532 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3533 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3534 11
            if ($test3 === $test) {
3535 11
                if (\count($strChars) === 0) {
3536 7
                    $strChars = self::count_chars($str, true, false);
3537
                }
3538 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3539 11
                    if (\in_array($test3char, $strChars, true) === true) {
3540 11
                        ++$maybeUTF32BE;
3541
                    }
3542
                }
3543 11
                unset($test3charEmpty);
3544
            }
3545
        }
3546
3547 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3548 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3549 2
                return 1;
3550
            }
3551
3552 2
            return 2;
3553
        }
3554
3555 18
        return false;
3556
    }
3557
3558
    /**
3559
     * Encode a string with emoji chars into a non-emoji string.
3560
     *
3561
     * @param string $str                        <p>The input string</p>
3562
     * @param bool   $useReversibleStringMapping [optional] <p>
3563
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3564
     *                                           between "emoji_encode" and "emoji_decode"</p>
3565
     *
3566
     * @return string
3567
     */
3568 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3569
    {
3570 9
        self::initEmojiData();
3571
3572 9
        if ($useReversibleStringMapping === true) {
3573 9
            return (string) \str_replace(
3574 9
                (array) self::$EMOJI_VALUES_CACHE,
3575 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3576 9
                $str
3577
            );
3578
        }
3579
3580 1
        return (string) \str_replace(
3581 1
            (array) self::$EMOJI_VALUES_CACHE,
3582 1
            (array) self::$EMOJI_KEYS_CACHE,
3583 1
            $str
3584
        );
3585
    }
3586
3587
    /**
3588
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3589
     *
3590
     * @param string $str                        <p>The input string.</p>
3591
     * @param bool   $useReversibleStringMapping [optional] <p>
3592
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3593
     *                                           between "emoji_encode" and "emoji_decode".</p>
3594
     *
3595
     * @return string
3596
     */
3597 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3598
    {
3599 9
        self::initEmojiData();
3600
3601 9
        if ($useReversibleStringMapping === true) {
3602 9
            return (string) \str_replace(
3603 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3604 9
                (array) self::$EMOJI_VALUES_CACHE,
3605 9
                $str
3606
            );
3607
        }
3608
3609 1
        return (string) \str_replace(
3610 1
            (array) self::$EMOJI_KEYS_CACHE,
3611 1
            (array) self::$EMOJI_VALUES_CACHE,
3612 1
            $str
3613
        );
3614
    }
3615
3616
    /**
3617
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3618
     *
3619
     * @see    http://hsivonen.iki.fi/php-utf8/
3620
     *
3621
     * @param string|string[] $str    <p>The string to be checked.</p>
3622
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3623
     *
3624
     * @return bool
3625
     */
3626 106
    public static function is_utf8($str, bool $strict = false): bool
3627
    {
3628 106
        if (\is_array($str) === true) {
3629 2
            foreach ($str as &$v) {
3630 2
                if (self::is_utf8($v, $strict) === false) {
3631 2
                    return false;
3632
                }
3633
            }
3634
3635
            return true;
3636
        }
3637
3638 106
        if ($str === '') {
3639 12
            return true;
3640
        }
3641
3642 102
        if ($strict === true) {
3643 2
            $isBinary = self::is_binary($str, true);
3644
3645 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3646 2
                return false;
3647
            }
3648
3649
            if ($isBinary && self::is_utf32($str, false) !== false) {
3650
                return false;
3651
            }
3652
        }
3653
3654 102
        if (self::pcre_utf8_support() !== true) {
3655
3656
            // If even just the first character can be matched, when the /u
3657
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3658
            // invalid, nothing at all will match, even if the string contains
3659
            // some valid sequences
3660
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3661
        }
3662
3663 102
        $mState = 0; // cached expected number of octets after the current octet
3664
        // until the beginning of the next UTF8 character sequence
3665 102
        $mUcs4 = 0; // cached Unicode character
3666 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3667
3668 102
        if (self::$ORD === null) {
3669
            self::$ORD = self::getData('ord');
3670
        }
3671
3672 102
        $len = \strlen((string) $str);
3673
        /** @noinspection ForeachInvariantsInspection */
3674 102
        for ($i = 0; $i < $len; ++$i) {
3675 102
            $in = self::$ORD[$str[$i]];
3676 102
            if ($mState === 0) {
3677
                // When mState is zero we expect either a US-ASCII character or a
3678
                // multi-octet sequence.
3679 102
                if ((0x80 & $in) === 0) {
3680
                    // US-ASCII, pass straight through.
3681 97
                    $mBytes = 1;
3682 83
                } elseif ((0xE0 & $in) === 0xC0) {
3683
                    // First octet of 2 octet sequence.
3684 73
                    $mUcs4 = $in;
3685 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3686 73
                    $mState = 1;
3687 73
                    $mBytes = 2;
3688 58
                } elseif ((0xF0 & $in) === 0xE0) {
3689
                    // First octet of 3 octet sequence.
3690 42
                    $mUcs4 = $in;
3691 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3692 42
                    $mState = 2;
3693 42
                    $mBytes = 3;
3694 29
                } elseif ((0xF8 & $in) === 0xF0) {
3695
                    // First octet of 4 octet sequence.
3696 18
                    $mUcs4 = $in;
3697 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3698 18
                    $mState = 3;
3699 18
                    $mBytes = 4;
3700 13
                } elseif ((0xFC & $in) === 0xF8) {
3701
                    /* First octet of 5 octet sequence.
3702
                     *
3703
                     * This is illegal because the encoded codepoint must be either
3704
                     * (a) not the shortest form or
3705
                     * (b) outside the Unicode range of 0-0x10FFFF.
3706
                     * Rather than trying to resynchronize, we will carry on until the end
3707
                     * of the sequence and let the later error handling code catch it.
3708
                     */
3709 5
                    $mUcs4 = $in;
3710 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3711 5
                    $mState = 4;
3712 5
                    $mBytes = 5;
3713 10
                } elseif ((0xFE & $in) === 0xFC) {
3714
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3715 5
                    $mUcs4 = $in;
3716 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3717 5
                    $mState = 5;
3718 5
                    $mBytes = 6;
3719
                } else {
3720
                    // Current octet is neither in the US-ASCII range nor a legal first
3721
                    // octet of a multi-octet sequence.
3722 102
                    return false;
3723
                }
3724 83
            } elseif ((0xC0 & $in) === 0x80) {
3725
3726
                // When mState is non-zero, we expect a continuation of the multi-octet
3727
                // sequence
3728
3729
                // Legal continuation.
3730 75
                $shift = ($mState - 1) * 6;
3731 75
                $tmp = $in;
3732 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3733 75
                $mUcs4 |= $tmp;
3734
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3735
                // Unicode code point to be output.
3736 75
                if (--$mState === 0) {
3737
                    // Check for illegal sequences and code points.
3738
                    //
3739
                    // From Unicode 3.1, non-shortest form is illegal
3740
                    if (
3741 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3742
                        ||
3743 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3744
                        ||
3745 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3746
                        ||
3747 75
                        ($mBytes > 4)
3748
                        ||
3749
                        // From Unicode 3.2, surrogate characters are illegal.
3750 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3751
                        ||
3752
                        // Code points outside the Unicode range are illegal.
3753 75
                        ($mUcs4 > 0x10FFFF)
3754
                    ) {
3755 8
                        return false;
3756
                    }
3757
                    // initialize UTF8 cache
3758 75
                    $mState = 0;
3759 75
                    $mUcs4 = 0;
3760 75
                    $mBytes = 1;
3761
                }
3762
            } else {
3763
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3764
                // Incomplete multi-octet sequence.
3765 35
                return false;
3766
            }
3767
        }
3768
3769 67
        return true;
3770
    }
3771
3772
    /**
3773
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3774
     * Decodes a JSON string
3775
     *
3776
     * @see http://php.net/manual/en/function.json-decode.php
3777
     *
3778
     * @param string $json    <p>
3779
     *                        The <i>json</i> string being decoded.
3780
     *                        </p>
3781
     *                        <p>
3782
     *                        This function only works with UTF-8 encoded strings.
3783
     *                        </p>
3784
     *                        <p>PHP implements a superset of
3785
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3786
     *                        only supports these values when they are nested inside an array or an object.
3787
     *                        </p>
3788
     * @param bool   $assoc   [optional] <p>
3789
     *                        When <b>TRUE</b>, returned objects will be converted into
3790
     *                        associative arrays.
3791
     *                        </p>
3792
     * @param int    $depth   [optional] <p>
3793
     *                        User specified recursion depth.
3794
     *                        </p>
3795
     * @param int    $options [optional] <p>
3796
     *                        Bitmask of JSON decode options. Currently only
3797
     *                        <b>JSON_BIGINT_AS_STRING</b>
3798
     *                        is supported (default is to cast large integers as floats)
3799
     *                        </p>
3800
     *
3801
     * @return mixed
3802
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3803
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3804
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3805
     *               is deeper than the recursion limit.
3806
     */
3807 43
    public static function json_decode(
3808
        string $json,
3809
        bool $assoc = false,
3810
        int $depth = 512,
3811
        int $options = 0
3812
    ) {
3813 43
        $json = self::filter($json);
3814
3815 43
        if (self::$SUPPORT['json'] === false) {
3816
            throw new \RuntimeException('ext-json: is not installed');
3817
        }
3818
3819
        /** @noinspection PhpComposerExtensionStubsInspection */
3820 43
        return \json_decode($json, $assoc, $depth, $options);
3821
    }
3822
3823
    /**
3824
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3825
     * Returns the JSON representation of a value.
3826
     *
3827
     * @see http://php.net/manual/en/function.json-encode.php
3828
     *
3829
     * @param mixed $value   <p>
3830
     *                       The <i>value</i> being encoded. Can be any type except
3831
     *                       a resource.
3832
     *                       </p>
3833
     *                       <p>
3834
     *                       All string data must be UTF-8 encoded.
3835
     *                       </p>
3836
     *                       <p>PHP implements a superset of
3837
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3838
     *                       only supports these values when they are nested inside an array or an object.
3839
     *                       </p>
3840
     * @param int   $options [optional] <p>
3841
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3842
     *                       <b>JSON_HEX_TAG</b>,
3843
     *                       <b>JSON_HEX_AMP</b>,
3844
     *                       <b>JSON_HEX_APOS</b>,
3845
     *                       <b>JSON_NUMERIC_CHECK</b>,
3846
     *                       <b>JSON_PRETTY_PRINT</b>,
3847
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3848
     *                       <b>JSON_FORCE_OBJECT</b>,
3849
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3850
     *                       constants is described on
3851
     *                       the JSON constants page.
3852
     *                       </p>
3853
     * @param int   $depth   [optional] <p>
3854
     *                       Set the maximum depth. Must be greater than zero.
3855
     *                       </p>
3856
     *
3857
     * @return false|string
3858
     *                      A JSON encoded <strong>string</strong> on success or<br>
3859
     *                      <strong>FALSE</strong> on failure
3860
     */
3861 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3862
    {
3863 5
        $value = self::filter($value);
3864
3865 5
        if (self::$SUPPORT['json'] === false) {
3866
            throw new \RuntimeException('ext-json: is not installed');
3867
        }
3868
3869
        /** @noinspection PhpComposerExtensionStubsInspection */
3870 5
        return \json_encode($value, $options, $depth);
3871
    }
3872
3873
    /**
3874
     * Checks whether JSON is available on the server.
3875
     *
3876
     * @return bool
3877
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3878
     */
3879
    public static function json_loaded(): bool
3880
    {
3881
        return \function_exists('json_decode');
3882
    }
3883
3884
    /**
3885
     * Makes string's first char lowercase.
3886
     *
3887
     * @param string      $str                   <p>The input string</p>
3888
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3889
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3890
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3891
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3892
     *
3893
     * @return string the resulting string
3894
     */
3895 46
    public static function lcfirst(
3896
        string $str,
3897
        string $encoding = 'UTF-8',
3898
        bool $cleanUtf8 = false,
3899
        string $lang = null,
3900
        bool $tryToKeepStringLength = false
3901
    ): string {
3902 46
        if ($cleanUtf8 === true) {
3903
            $str = self::clean($str);
3904
        }
3905
3906 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3907
3908 46
        if ($encoding === 'UTF-8') {
3909 43
            $strPartTwo = (string) \mb_substr($str, 1);
3910
3911 43
            if ($useMbFunction === true) {
3912 43
                $strPartOne = \mb_strtolower(
3913 43
                    (string) \mb_substr($str, 0, 1)
3914
                );
3915
            } else {
3916
                $strPartOne = self::strtolower(
3917
                    (string) \mb_substr($str, 0, 1),
3918
                    $encoding,
3919
                    false,
3920
                    $lang,
3921 43
                    $tryToKeepStringLength
3922
                );
3923
            }
3924
        } else {
3925 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3926
3927 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3928
3929 3
            $strPartOne = self::strtolower(
3930 3
                (string) self::substr($str, 0, 1, $encoding),
3931 3
                $encoding,
3932 3
                false,
3933 3
                $lang,
3934 3
                $tryToKeepStringLength
3935
            );
3936
        }
3937
3938 46
        return $strPartOne . $strPartTwo;
3939
    }
3940
3941
    /**
3942
     * alias for "UTF8::lcfirst()"
3943
     *
3944
     * @see UTF8::lcfirst()
3945
     *
3946
     * @param string      $str
3947
     * @param string      $encoding
3948
     * @param bool        $cleanUtf8
3949
     * @param string|null $lang
3950
     * @param bool        $tryToKeepStringLength
3951
     *
3952
     * @return string
3953
     */
3954 2
    public static function lcword(
3955
        string $str,
3956
        string $encoding = 'UTF-8',
3957
        bool $cleanUtf8 = false,
3958
        string $lang = null,
3959
        bool $tryToKeepStringLength = false
3960
    ): string {
3961 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3962
    }
3963
3964
    /**
3965
     * Lowercase for all words in the string.
3966
     *
3967
     * @param string      $str                   <p>The input string.</p>
3968
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3969
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3970
     *                                           a new word.</p>
3971
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3972
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3973
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3974
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3975
     *
3976
     * @return string
3977
     */
3978 2
    public static function lcwords(
3979
        string $str,
3980
        array $exceptions = [],
3981
        string $charlist = '',
3982
        string $encoding = 'UTF-8',
3983
        bool $cleanUtf8 = false,
3984
        string $lang = null,
3985
        bool $tryToKeepStringLength = false
3986
    ): string {
3987 2
        if (!$str) {
3988 2
            return '';
3989
        }
3990
3991 2
        $words = self::str_to_words($str, $charlist);
3992 2
        $useExceptions = \count($exceptions) > 0;
3993
3994 2
        foreach ($words as &$word) {
3995 2
            if (!$word) {
3996 2
                continue;
3997
            }
3998
3999
            if (
4000 2
                $useExceptions === false
4001
                ||
4002 2
                !\in_array($word, $exceptions, true)
4003
            ) {
4004 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4005
            }
4006
        }
4007
4008 2
        return \implode('', $words);
4009
    }
4010
4011
    /**
4012
     * alias for "UTF8::lcfirst()"
4013
     *
4014
     * @see UTF8::lcfirst()
4015
     *
4016
     * @param string      $str
4017
     * @param string      $encoding
4018
     * @param bool        $cleanUtf8
4019
     * @param string|null $lang
4020
     * @param bool        $tryToKeepStringLength
4021
     *
4022
     * @return string
4023
     */
4024 5
    public static function lowerCaseFirst(
4025
        string $str,
4026
        string $encoding = 'UTF-8',
4027
        bool $cleanUtf8 = false,
4028
        string $lang = null,
4029
        bool $tryToKeepStringLength = false
4030
    ): string {
4031 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4032
    }
4033
4034
    /**
4035
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4036
     *
4037
     * @param string      $str   <p>The string to be trimmed</p>
4038
     * @param string|null $chars <p>Optional characters to be stripped</p>
4039
     *
4040
     * @return string the string with unwanted characters stripped from the left
4041
     */
4042 22
    public static function ltrim(string $str = '', string $chars = null): string
4043
    {
4044 22
        if ($str === '') {
4045 3
            return '';
4046
        }
4047
4048 21
        if ($chars) {
4049 10
            $chars = \preg_quote($chars, '/');
4050 10
            $pattern = "^[${chars}]+";
4051
        } else {
4052 14
            $pattern = "^[\s]+";
4053
        }
4054
4055 21
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4058
        }
4059
4060
        return self::regex_replace($str, $pattern, '', '', '/');
4061
    }
4062
4063
    /**
4064
     * Returns the UTF-8 character with the maximum code point in the given data.
4065
     *
4066
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4067
     *
4068
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4069
     */
4070 2
    public static function max($arg)
4071
    {
4072 2
        if (\is_array($arg) === true) {
4073 2
            $arg = \implode('', $arg);
4074
        }
4075
4076 2
        $codepoints = self::codepoints($arg, false);
4077 2
        if (\count($codepoints) === 0) {
4078 2
            return null;
4079
        }
4080
4081 2
        $codepoint_max = \max($codepoints);
4082
4083 2
        return self::chr($codepoint_max);
4084
    }
4085
4086
    /**
4087
     * Calculates and returns the maximum number of bytes taken by any
4088
     * UTF-8 encoded character in the given string.
4089
     *
4090
     * @param string $str <p>The original Unicode string.</p>
4091
     *
4092
     * @return int max byte lengths of the given chars
4093
     */
4094 2
    public static function max_chr_width(string $str): int
4095
    {
4096 2
        $bytes = self::chr_size_list($str);
4097 2
        if (\count($bytes) > 0) {
4098 2
            return (int) \max($bytes);
4099
        }
4100
4101 2
        return 0;
4102
    }
4103
4104
    /**
4105
     * Checks whether mbstring is available on the server.
4106
     *
4107
     * @return bool
4108
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4109
     */
4110 27
    public static function mbstring_loaded(): bool
4111
    {
4112 27
        return \extension_loaded('mbstring');
4113
    }
4114
4115
    /**
4116
     * Returns the UTF-8 character with the minimum code point in the given data.
4117
     *
4118
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4119
     *
4120
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4121
     */
4122 2
    public static function min($arg)
4123
    {
4124 2
        if (\is_array($arg) === true) {
4125 2
            $arg = \implode('', $arg);
4126
        }
4127
4128 2
        $codepoints = self::codepoints($arg, false);
4129 2
        if (\count($codepoints) === 0) {
4130 2
            return null;
4131
        }
4132
4133 2
        $codepoint_min = \min($codepoints);
4134
4135 2
        return self::chr($codepoint_min);
4136
    }
4137
4138
    /**
4139
     * alias for "UTF8::normalize_encoding()"
4140
     *
4141
     * @see        UTF8::normalize_encoding()
4142
     *
4143
     * @param mixed $encoding
4144
     * @param mixed $fallback
4145
     *
4146
     * @return mixed
4147
     *
4148
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4149
     */
4150 2
    public static function normalizeEncoding($encoding, $fallback = '')
4151
    {
4152 2
        return self::normalize_encoding($encoding, $fallback);
4153
    }
4154
4155
    /**
4156
     * Normalize the encoding-"name" input.
4157
     *
4158
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4159
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4160
     *
4161
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4162
     */
4163 323
    public static function normalize_encoding($encoding, $fallback = '')
4164
    {
4165 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4166
4167
        // init
4168 323
        $encoding = (string) $encoding;
4169
4170 323
        if (!$encoding) {
4171 278
            return $fallback;
4172
        }
4173
4174
        if (
4175 50
            $encoding === 'UTF-8'
4176
            ||
4177 50
            $encoding === 'UTF8'
4178
        ) {
4179 24
            return 'UTF-8';
4180
        }
4181
4182
        if (
4183 43
            $encoding === '8BIT'
4184
            ||
4185 43
            $encoding === 'BINARY'
4186
        ) {
4187
            return 'CP850';
4188
        }
4189
4190
        if (
4191 43
            $encoding === 'HTML'
4192
            ||
4193 43
            $encoding === 'HTML-ENTITIES'
4194
        ) {
4195 2
            return 'HTML-ENTITIES';
4196
        }
4197
4198
        if (
4199 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4200
            ||
4201 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4202
        ) {
4203 1
            return $fallback;
4204
        }
4205
4206 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4207 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4208
        }
4209
4210 6
        if (self::$ENCODINGS === null) {
4211 1
            self::$ENCODINGS = self::getData('encodings');
4212
        }
4213
4214 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4215 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4216
4217 4
            return $encoding;
4218
        }
4219
4220 5
        $encodingOrig = $encoding;
4221 5
        $encoding = \strtoupper($encoding);
4222 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4223
4224
        $equivalences = [
4225 5
            'ISO8859'     => 'ISO-8859-1',
4226
            'ISO88591'    => 'ISO-8859-1',
4227
            'ISO'         => 'ISO-8859-1',
4228
            'LATIN'       => 'ISO-8859-1',
4229
            'LATIN1'      => 'ISO-8859-1', // Western European
4230
            'ISO88592'    => 'ISO-8859-2',
4231
            'LATIN2'      => 'ISO-8859-2', // Central European
4232
            'ISO88593'    => 'ISO-8859-3',
4233
            'LATIN3'      => 'ISO-8859-3', // Southern European
4234
            'ISO88594'    => 'ISO-8859-4',
4235
            'LATIN4'      => 'ISO-8859-4', // Northern European
4236
            'ISO88595'    => 'ISO-8859-5',
4237
            'ISO88596'    => 'ISO-8859-6', // Greek
4238
            'ISO88597'    => 'ISO-8859-7',
4239
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4240
            'ISO88599'    => 'ISO-8859-9',
4241
            'LATIN5'      => 'ISO-8859-9', // Turkish
4242
            'ISO885911'   => 'ISO-8859-11',
4243
            'TIS620'      => 'ISO-8859-11', // Thai
4244
            'ISO885910'   => 'ISO-8859-10',
4245
            'LATIN6'      => 'ISO-8859-10', // Nordic
4246
            'ISO885913'   => 'ISO-8859-13',
4247
            'LATIN7'      => 'ISO-8859-13', // Baltic
4248
            'ISO885914'   => 'ISO-8859-14',
4249
            'LATIN8'      => 'ISO-8859-14', // Celtic
4250
            'ISO885915'   => 'ISO-8859-15',
4251
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4252
            'ISO885916'   => 'ISO-8859-16',
4253
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4254
            'CP1250'      => 'WINDOWS-1250',
4255
            'WIN1250'     => 'WINDOWS-1250',
4256
            'WINDOWS1250' => 'WINDOWS-1250',
4257
            'CP1251'      => 'WINDOWS-1251',
4258
            'WIN1251'     => 'WINDOWS-1251',
4259
            'WINDOWS1251' => 'WINDOWS-1251',
4260
            'CP1252'      => 'WINDOWS-1252',
4261
            'WIN1252'     => 'WINDOWS-1252',
4262
            'WINDOWS1252' => 'WINDOWS-1252',
4263
            'CP1253'      => 'WINDOWS-1253',
4264
            'WIN1253'     => 'WINDOWS-1253',
4265
            'WINDOWS1253' => 'WINDOWS-1253',
4266
            'CP1254'      => 'WINDOWS-1254',
4267
            'WIN1254'     => 'WINDOWS-1254',
4268
            'WINDOWS1254' => 'WINDOWS-1254',
4269
            'CP1255'      => 'WINDOWS-1255',
4270
            'WIN1255'     => 'WINDOWS-1255',
4271
            'WINDOWS1255' => 'WINDOWS-1255',
4272
            'CP1256'      => 'WINDOWS-1256',
4273
            'WIN1256'     => 'WINDOWS-1256',
4274
            'WINDOWS1256' => 'WINDOWS-1256',
4275
            'CP1257'      => 'WINDOWS-1257',
4276
            'WIN1257'     => 'WINDOWS-1257',
4277
            'WINDOWS1257' => 'WINDOWS-1257',
4278
            'CP1258'      => 'WINDOWS-1258',
4279
            'WIN1258'     => 'WINDOWS-1258',
4280
            'WINDOWS1258' => 'WINDOWS-1258',
4281
            'UTF16'       => 'UTF-16',
4282
            'UTF32'       => 'UTF-32',
4283
            'UTF8'        => 'UTF-8',
4284
            'UTF'         => 'UTF-8',
4285
            'UTF7'        => 'UTF-7',
4286
            '8BIT'        => 'CP850',
4287
            'BINARY'      => 'CP850',
4288
        ];
4289
4290 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4291 4
            $encoding = $equivalences[$encodingUpperHelper];
4292
        }
4293
4294 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4295
4296 5
        return $encoding;
4297
    }
4298
4299
    /**
4300
     * Standardize line ending to unix-like.
4301
     *
4302
     * @param string $str
4303
     *
4304
     * @return string
4305
     */
4306 5
    public static function normalize_line_ending(string $str): string
4307
    {
4308 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4309
    }
4310
4311
    /**
4312
     * Normalize some MS Word special characters.
4313
     *
4314
     * @param string $str <p>The string to be normalized.</p>
4315
     *
4316
     * @return string
4317
     */
4318 38
    public static function normalize_msword(string $str): string
4319
    {
4320 38
        if ($str === '') {
4321 2
            return '';
4322
        }
4323
4324
        $keys = [
4325 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4326
            "\xc2\xbb", // » (U+00BB) in UTF-8
4327
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4328
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4329
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4330
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4331
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4332
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4333
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4334
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4335
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4336
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4337
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4338
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4339
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4340
        ];
4341
4342
        $values = [
4343 38
            '"', // « (U+00AB) in UTF-8
4344
            '"', // » (U+00BB) in UTF-8
4345
            "'", // ‘ (U+2018) in UTF-8
4346
            "'", // ’ (U+2019) in UTF-8
4347
            "'", // ‚ (U+201A) in UTF-8
4348
            "'", // ‛ (U+201B) in UTF-8
4349
            '"', // “ (U+201C) in UTF-8
4350
            '"', // ” (U+201D) in UTF-8
4351
            '"', // „ (U+201E) in UTF-8
4352
            '"', // ‟ (U+201F) in UTF-8
4353
            "'", // ‹ (U+2039) in UTF-8
4354
            "'", // › (U+203A) in UTF-8
4355
            '-', // – (U+2013) in UTF-8
4356
            '-', // — (U+2014) in UTF-8
4357
            '...', // … (U+2026) in UTF-8
4358
        ];
4359
4360 38
        return \str_replace($keys, $values, $str);
4361
    }
4362
4363
    /**
4364
     * Normalize the whitespace.
4365
     *
4366
     * @param string $str                     <p>The string to be normalized.</p>
4367
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4368
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4369
     *                                        bidirectional text chars.</p>
4370
     *
4371
     * @return string
4372
     */
4373 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4374
    {
4375 86
        if ($str === '') {
4376 9
            return '';
4377
        }
4378
4379 86
        static $WHITESPACE_CACHE = [];
4380 86
        $cacheKey = (int) $keepNonBreakingSpace;
4381
4382 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4383 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4384
4385 2
            if ($keepNonBreakingSpace === true) {
4386 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4387
            }
4388
4389 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4390
        }
4391
4392 86
        if ($keepBidiUnicodeControls === false) {
4393 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4394
4395 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4396 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4397
            }
4398
4399 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4400
        }
4401
4402 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4403
    }
4404
4405
    /**
4406
     * Calculates Unicode code point of the given UTF-8 encoded character.
4407
     *
4408
     * INFO: opposite to UTF8::chr()
4409
     *
4410
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4411
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4412
     *
4413
     * @return int
4414
     *             Unicode code point of the given character,<br>
4415
     *             0 on invalid UTF-8 byte sequence
4416
     */
4417 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4418
    {
4419 30
        static $CHAR_CACHE = [];
4420
4421
        // init
4422 30
        $chr = (string) $chr;
4423
4424 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4425 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4426
        }
4427
4428 30
        $cacheKey = $chr . $encoding;
4429 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4430 30
            return $CHAR_CACHE[$cacheKey];
4431
        }
4432
4433
        // check again, if it's still not UTF-8
4434 12
        if ($encoding !== 'UTF-8') {
4435 3
            $chr = self::encode($encoding, $chr);
4436
        }
4437
4438 12
        if (self::$ORD === null) {
4439
            self::$ORD = self::getData('ord');
4440
        }
4441
4442 12
        if (isset(self::$ORD[$chr])) {
4443 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4444
        }
4445
4446
        //
4447
        // fallback via "IntlChar"
4448
        //
4449
4450 6
        if (self::$SUPPORT['intlChar'] === true) {
4451
            /** @noinspection PhpComposerExtensionStubsInspection */
4452 5
            $code = \IntlChar::ord($chr);
4453 5
            if ($code) {
4454 5
                return $CHAR_CACHE[$cacheKey] = $code;
4455
            }
4456
        }
4457
4458
        //
4459
        // fallback via vanilla php
4460
        //
4461
4462
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4463 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4464 1
        $code = $chr ? $chr[1] : 0;
4465
4466 1
        if ($code >= 0xF0 && isset($chr[4])) {
4467
            /** @noinspection UnnecessaryCastingInspection */
4468
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4469
        }
4470
4471 1
        if ($code >= 0xE0 && isset($chr[3])) {
4472
            /** @noinspection UnnecessaryCastingInspection */
4473 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4474
        }
4475
4476 1
        if ($code >= 0xC0 && isset($chr[2])) {
4477
            /** @noinspection UnnecessaryCastingInspection */
4478 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4479
        }
4480
4481
        return $CHAR_CACHE[$cacheKey] = $code;
4482
    }
4483
4484
    /**
4485
     * Parses the string into an array (into the the second parameter).
4486
     *
4487
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4488
     *          if the second parameter is not set!
4489
     *
4490
     * @see http://php.net/manual/en/function.parse-str.php
4491
     *
4492
     * @param string $str       <p>The input string.</p>
4493
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4494
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4495
     *
4496
     * @return bool
4497
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4498
     */
4499 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4500
    {
4501 2
        if ($cleanUtf8 === true) {
4502 2
            $str = self::clean($str);
4503
        }
4504
4505 2
        if (self::$SUPPORT['mbstring'] === true) {
4506 2
            $return = \mb_parse_str($str, $result);
4507
4508 2
            return $return !== false && $result !== [];
4509
        }
4510
4511
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4512
        \parse_str($str, $result);
4513
4514
        return $result !== [];
4515
    }
4516
4517
    /**
4518
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4519
     *
4520
     * @return bool
4521
     *              <strong>true</strong> if support is available,<br>
4522
     *              <strong>false</strong> otherwise
4523
     */
4524 102
    public static function pcre_utf8_support(): bool
4525
    {
4526
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4527 102
        return (bool) @\preg_match('//u', '');
4528
    }
4529
4530
    /**
4531
     * Create an array containing a range of UTF-8 characters.
4532
     *
4533
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4534
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4535
     *
4536
     * @return string[]
4537
     */
4538 2
    public static function range($var1, $var2): array
4539
    {
4540 2
        if (!$var1 || !$var2) {
4541 2
            return [];
4542
        }
4543
4544 2
        if (self::$SUPPORT['ctype'] === false) {
4545
            throw new \RuntimeException('ext-ctype: is not installed');
4546
        }
4547
4548
        /** @noinspection PhpComposerExtensionStubsInspection */
4549 2
        if (\ctype_digit((string) $var1)) {
4550 2
            $start = (int) $var1;
4551 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4552
            $start = (int) self::hex_to_int($var1);
4553
        } else {
4554 2
            $start = self::ord($var1);
4555
        }
4556
4557 2
        if (!$start) {
4558
            return [];
4559
        }
4560
4561
        /** @noinspection PhpComposerExtensionStubsInspection */
4562 2
        if (\ctype_digit((string) $var2)) {
4563 2
            $end = (int) $var2;
4564 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4565
            $end = (int) self::hex_to_int($var2);
4566
        } else {
4567 2
            $end = self::ord($var2);
4568
        }
4569
4570 2
        if (!$end) {
4571
            return [];
4572
        }
4573
4574 2
        return \array_map(
4575
            static function (int $i): string {
4576 2
                return (string) self::chr($i);
4577 2
            },
4578 2
            \range($start, $end)
4579
        );
4580
    }
4581
4582
    /**
4583
     * Multi decode html entity & fix urlencoded-win1252-chars.
4584
     *
4585
     * e.g:
4586
     * 'test+test'                     => 'test+test'
4587
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4588
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4589
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4590
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4591
     * 'Düsseldorf'                   => 'Düsseldorf'
4592
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4593
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4594
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4595
     *
4596
     * @param string $str          <p>The input string.</p>
4597
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4598
     *
4599
     * @return string
4600
     */
4601 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4602
    {
4603 3
        if ($str === '') {
4604 2
            return '';
4605
        }
4606
4607
        if (
4608 3
            \strpos($str, '&') === false
4609
            &&
4610 3
            \strpos($str, '%') === false
4611
            &&
4612 3
            \strpos($str, '+') === false
4613
            &&
4614 3
            \strpos($str, '\u') === false
4615
        ) {
4616 2
            return self::fix_simple_utf8($str);
4617
        }
4618
4619 3
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
4620 3
        if (\preg_match($pattern, $str)) {
4621 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4622
        }
4623
4624 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4625
4626 3
        if ($multi_decode === true) {
4627
            do {
4628 3
                $str_compare = $str;
4629
4630
                /**
4631
                 * @psalm-suppress PossiblyInvalidArgument
4632
                 */
4633 3
                $str = self::fix_simple_utf8(
4634 3
                    \rawurldecode(
4635 3
                        self::html_entity_decode(
4636 3
                            self::to_utf8($str),
4637 3
                            $flags
4638
                        )
4639
                    )
4640
                );
4641 3
            } while ($str_compare !== $str);
4642
        }
4643
4644 3
        return $str;
4645
    }
4646
4647
    /**
4648
     * Replaces all occurrences of $pattern in $str by $replacement.
4649
     *
4650
     * @param string $str         <p>The input string.</p>
4651
     * @param string $pattern     <p>The regular expression pattern.</p>
4652
     * @param string $replacement <p>The string to replace with.</p>
4653
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4654
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4655
     *
4656
     * @return string
4657
     */
4658 18
    public static function regex_replace(
4659
        string $str,
4660
        string $pattern,
4661
        string $replacement,
4662
        string $options = '',
4663
        string $delimiter = '/'
4664
    ): string {
4665 18
        if ($options === 'msr') {
4666 9
            $options = 'ms';
4667
        }
4668
4669
        // fallback
4670 18
        if (!$delimiter) {
4671
            $delimiter = '/';
4672
        }
4673
4674 18
        return (string) \preg_replace(
4675 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4676 18
            $replacement,
4677 18
            $str
4678
        );
4679
    }
4680
4681
    /**
4682
     * alias for "UTF8::remove_bom()"
4683
     *
4684
     * @see        UTF8::remove_bom()
4685
     *
4686
     * @param string $str
4687
     *
4688
     * @return string
4689
     *
4690
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4691
     */
4692
    public static function removeBOM(string $str): string
4693
    {
4694
        return self::remove_bom($str);
4695
    }
4696
4697
    /**
4698
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4699
     *
4700
     * @param string $str <p>The input string.</p>
4701
     *
4702
     * @return string string without UTF-BOM
4703
     */
4704 82
    public static function remove_bom(string $str): string
4705
    {
4706 82
        if ($str === '') {
4707 9
            return '';
4708
        }
4709
4710 82
        $strLength = \strlen($str);
4711 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4712 82
            if (\strpos($str, $bomString, 0) === 0) {
4713 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4714 10
                if ($strTmp === false) {
4715
                    return '';
4716
                }
4717
4718 10
                $strLength -= (int) $bomByteLength;
4719
4720 82
                $str = (string) $strTmp;
4721
            }
4722
        }
4723
4724 82
        return $str;
4725
    }
4726
4727
    /**
4728
     * Removes duplicate occurrences of a string in another string.
4729
     *
4730
     * @param string          $str  <p>The base string.</p>
4731
     * @param string|string[] $what <p>String to search for in the base string.</p>
4732
     *
4733
     * @return string the result string with removed duplicates
4734
     */
4735 2
    public static function remove_duplicates(string $str, $what = ' '): string
4736
    {
4737 2
        if (\is_string($what) === true) {
4738 2
            $what = [$what];
4739
        }
4740
4741 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4742
            /** @noinspection ForeachSourceInspection */
4743 2
            foreach ($what as $item) {
4744 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4745
            }
4746
        }
4747
4748 2
        return $str;
4749
    }
4750
4751
    /**
4752
     * Remove html via "strip_tags()" from the string.
4753
     *
4754
     * @param string $str
4755
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4756
     *                              not be stripped. Default: null
4757
     *                              </p>
4758
     *
4759
     * @return string
4760
     */
4761 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4762
    {
4763 6
        return \strip_tags($str, $allowableTags);
4764
    }
4765
4766
    /**
4767
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4768
     *
4769
     * @param string $str
4770
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4771
     *
4772
     * @return string
4773
     */
4774 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4775
    {
4776 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4777
    }
4778
4779
    /**
4780
     * Remove invisible characters from a string.
4781
     *
4782
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4783
     *
4784
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4785
     *
4786
     * @param string $str
4787
     * @param bool   $url_encoded
4788
     * @param string $replacement
4789
     *
4790
     * @return string
4791
     */
4792 115
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4793
    {
4794
        // init
4795 115
        $non_displayables = [];
4796
4797
        // every control character except newline (dec 10),
4798
        // carriage return (dec 13) and horizontal tab (dec 09)
4799 115
        if ($url_encoded) {
4800 115
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4801 115
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4802
        }
4803
4804 115
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4805
4806
        do {
4807 115
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4808 115
        } while ($count !== 0);
4809
4810 115
        return $str;
4811
    }
4812
4813
    /**
4814
     * Returns a new string with the prefix $substring removed, if present.
4815
     *
4816
     * @param string $str
4817
     * @param string $substring <p>The prefix to remove.</p>
4818
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4819
     *
4820
     * @return string string without the prefix $substring
4821
     */
4822 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4823
    {
4824 12
        if ($substring && \strpos($str, $substring) === 0) {
4825 6
            if ($encoding === 'UTF-8') {
4826 4
                return (string) \mb_substr(
4827 4
                    $str,
4828 4
                    (int) \mb_strlen($substring)
4829
                );
4830
            }
4831
4832 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4833
4834 2
            return (string) self::substr(
4835 2
                $str,
4836 2
                (int) self::strlen($substring, $encoding),
4837 2
                null,
4838 2
                $encoding
4839
            );
4840
        }
4841
4842 6
        return $str;
4843
    }
4844
4845
    /**
4846
     * Returns a new string with the suffix $substring removed, if present.
4847
     *
4848
     * @param string $str
4849
     * @param string $substring <p>The suffix to remove.</p>
4850
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4851
     *
4852
     * @return string string having a $str without the suffix $substring
4853
     */
4854 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4855
    {
4856 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4857 6
            if ($encoding === 'UTF-8') {
4858 4
                return (string) \mb_substr(
4859 4
                    $str,
4860 4
                    0,
4861 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4862
                );
4863
            }
4864
4865 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4866
4867 2
            return (string) self::substr(
4868 2
                $str,
4869 2
                0,
4870 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4871 2
                $encoding
4872
            );
4873
        }
4874
4875 6
        return $str;
4876
    }
4877
4878
    /**
4879
     * Replaces all occurrences of $search in $str by $replacement.
4880
     *
4881
     * @param string $str           <p>The input string.</p>
4882
     * @param string $search        <p>The needle to search for.</p>
4883
     * @param string $replacement   <p>The string to replace with.</p>
4884
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4885
     *
4886
     * @return string string after the replacements
4887
     */
4888 29
    public static function replace(
4889
        string $str,
4890
        string $search,
4891
        string $replacement,
4892
        bool $caseSensitive = true
4893
    ): string {
4894 29
        if ($caseSensitive) {
4895 22
            return \str_replace($search, $replacement, $str);
4896
        }
4897
4898 7
        return self::str_ireplace($search, $replacement, $str);
4899
    }
4900
4901
    /**
4902
     * Replaces all occurrences of $search in $str by $replacement.
4903
     *
4904
     * @param string       $str           <p>The input string.</p>
4905
     * @param array        $search        <p>The elements to search for.</p>
4906
     * @param array|string $replacement   <p>The string to replace with.</p>
4907
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4908
     *
4909
     * @return string string after the replacements
4910
     */
4911 30
    public static function replace_all(
4912
        string $str,
4913
        array $search,
4914
        $replacement,
4915
        bool $caseSensitive = true
4916
    ): string {
4917 30
        if ($caseSensitive) {
4918 23
            return \str_replace($search, $replacement, $str);
4919
        }
4920
4921 7
        return self::str_ireplace($search, $replacement, $str);
4922
    }
4923
4924
    /**
4925
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4926
     *
4927
     * @param string $str                <p>The input string</p>
4928
     * @param string $replacementChar    <p>The replacement character.</p>
4929
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4930
     *
4931
     * @return string
4932
     */
4933 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4934
    {
4935 62
        if ($str === '') {
4936 9
            return '';
4937
        }
4938
4939 62
        if ($processInvalidUtf8 === true) {
4940 62
            $replacementCharHelper = $replacementChar;
4941 62
            if ($replacementChar === '') {
4942 62
                $replacementCharHelper = 'none';
4943
            }
4944
4945 62
            if (self::$SUPPORT['mbstring'] === false) {
4946
                // if there is no native support for "mbstring",
4947
                // then we need to clean the string before ...
4948
                $str = self::clean($str);
4949
            }
4950
4951 62
            $save = \mb_substitute_character();
4952 62
            \mb_substitute_character($replacementCharHelper);
4953
            // the polyfill maybe return false, so cast to string
4954 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4955 62
            \mb_substitute_character($save);
4956
        }
4957
4958 62
        return \str_replace(
4959
            [
4960 62
                "\xEF\xBF\xBD",
4961
                '�',
4962
            ],
4963
            [
4964 62
                $replacementChar,
4965 62
                $replacementChar,
4966
            ],
4967 62
            $str
4968
        );
4969
    }
4970
4971
    /**
4972
     * Strip whitespace or other characters from end of a UTF-8 string.
4973
     *
4974
     * @param string      $str   <p>The string to be trimmed.</p>
4975
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4976
     *
4977
     * @return string the string with unwanted characters stripped from the right
4978
     */
4979 20
    public static function rtrim(string $str = '', string $chars = null): string
4980
    {
4981 20
        if ($str === '') {
4982 3
            return '';
4983
        }
4984
4985 19
        if ($chars) {
4986 8
            $chars = \preg_quote($chars, '/');
4987 8
            $pattern = "[${chars}]+\$";
4988
        } else {
4989 14
            $pattern = "[\s]+\$";
4990
        }
4991
4992 19
        if (self::$SUPPORT['mbstring'] === true) {
4993
            /** @noinspection PhpComposerExtensionStubsInspection */
4994 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4995
        }
4996
4997
        return self::regex_replace($str, $pattern, '', '', '/');
4998
    }
4999
5000
    /**
5001
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5002
     *
5003
     * @return void
5004
     */
5005 2
    public static function showSupport()
5006
    {
5007 2
        echo '<pre>';
5008 2
        foreach (self::$SUPPORT as $key => &$value) {
5009 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5010
        }
5011 2
        unset($value);
5012 2
        echo '</pre>';
5013 2
    }
5014
5015
    /**
5016
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5017
     *
5018
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5019
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5020
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5021
     *
5022
     * @return string the HTML numbered entity
5023
     */
5024 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5025
    {
5026 2
        if ($char === '') {
5027 2
            return '';
5028
        }
5029
5030
        if (
5031 2
            $keepAsciiChars === true
5032
            &&
5033 2
            self::is_ascii($char) === true
5034
        ) {
5035 2
            return $char;
5036
        }
5037
5038 2
        return '&#' . self::ord($char, $encoding) . ';';
5039
    }
5040
5041
    /**
5042
     * @param string $str
5043
     * @param int    $tabLength
5044
     *
5045
     * @return string
5046
     */
5047 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5048
    {
5049 5
        if ($tabLength === 4) {
5050 3
            $tab = '    ';
5051 2
        } elseif ($tabLength === 2) {
5052 1
            $tab = '  ';
5053
        } else {
5054 1
            $tab = \str_repeat(' ', $tabLength);
5055
        }
5056
5057 5
        return \str_replace($tab, "\t", $str);
5058
    }
5059
5060
    /**
5061
     * Convert a string to an array of Unicode characters.
5062
     *
5063
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5064
     * @param int                       $length             [optional] <p>Max character length of each array
5065
     *                                                      element.</p>
5066
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5067
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5068
     *                                                      "mb_substr"</p>
5069
     *
5070
     * @return array
5071
     *               <p>An array containing chunks of the input.</p>
5072
     */
5073 87
    public static function str_split(
5074
        $str,
5075
        int $length = 1,
5076
        bool $cleanUtf8 = false,
5077
        bool $tryToUseMbFunction = true
5078
    ): array {
5079 87
        if ($length <= 0) {
5080 3
            return [];
5081
        }
5082
5083 86
        if (\is_array($str) === true) {
5084 2
            foreach ($str as $k => &$v) {
5085 2
                $v = self::str_split(
5086 2
                    $v,
5087 2
                    $length,
5088 2
                    $cleanUtf8,
5089 2
                    $tryToUseMbFunction
5090
                );
5091
            }
5092
5093 2
            return $str;
5094
        }
5095
5096
        // init
5097 86
        $str = (string) $str;
5098
5099 86
        if ($str === '') {
5100 13
            return [];
5101
        }
5102
5103 83
        if ($cleanUtf8 === true) {
5104 19
            $str = self::clean($str);
5105
        }
5106
5107
        if (
5108 83
            $tryToUseMbFunction === true
5109
            &&
5110 83
            self::$SUPPORT['mbstring'] === true
5111
        ) {
5112 79
            $iMax = \mb_strlen($str);
5113 79
            if ($iMax <= 127) {
5114 73
                $ret = [];
5115 73
                for ($i = 0; $i < $iMax; ++$i) {
5116 73
                    $ret[] = \mb_substr($str, $i, 1);
5117
                }
5118
            } else {
5119 15
                $retArray = [];
5120 15
                \preg_match_all('/./us', $str, $retArray);
5121 79
                $ret = $retArray[0] ?? [];
5122
            }
5123 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5124 17
            $retArray = [];
5125 17
            \preg_match_all('/./us', $str, $retArray);
5126 17
            $ret = $retArray[0] ?? [];
5127
        } else {
5128
5129
            // fallback
5130
5131 8
            $ret = [];
5132 8
            $len = \strlen($str);
5133
5134
            /** @noinspection ForeachInvariantsInspection */
5135 8
            for ($i = 0; $i < $len; ++$i) {
5136 8
                if (($str[$i] & "\x80") === "\x00") {
5137 8
                    $ret[] = $str[$i];
5138
                } elseif (
5139 8
                    isset($str[$i + 1])
5140
                    &&
5141 8
                    ($str[$i] & "\xE0") === "\xC0"
5142
                ) {
5143 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5144 4
                        $ret[] = $str[$i] . $str[$i + 1];
5145
5146 4
                        ++$i;
5147
                    }
5148
                } elseif (
5149 6
                    isset($str[$i + 2])
5150
                    &&
5151 6
                    ($str[$i] & "\xF0") === "\xE0"
5152
                ) {
5153
                    if (
5154 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5155
                        &&
5156 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5157
                    ) {
5158 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5159
5160 6
                        $i += 2;
5161
                    }
5162
                } elseif (
5163
                    isset($str[$i + 3])
5164
                    &&
5165
                    ($str[$i] & "\xF8") === "\xF0"
5166
                ) {
5167
                    if (
5168
                        ($str[$i + 1] & "\xC0") === "\x80"
5169
                        &&
5170
                        ($str[$i + 2] & "\xC0") === "\x80"
5171
                        &&
5172
                        ($str[$i + 3] & "\xC0") === "\x80"
5173
                    ) {
5174
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5175
5176
                        $i += 3;
5177
                    }
5178
                }
5179
            }
5180
        }
5181
5182 83
        if ($length > 1) {
5183 11
            $ret = \array_chunk($ret, $length);
5184
5185 11
            return \array_map(
5186
                static function (array &$item): string {
5187 11
                    return \implode('', $item);
5188 11
                },
5189 11
                $ret
5190
            );
5191
        }
5192
5193 76
        if (isset($ret[0]) && $ret[0] === '') {
5194
            return [];
5195
        }
5196
5197 76
        return $ret;
5198
    }
5199
5200
    /**
5201
     * Returns a camelCase version of the string. Trims surrounding spaces,
5202
     * capitalizes letters following digits, spaces, dashes and underscores,
5203
     * and removes spaces, dashes, as well as underscores.
5204
     *
5205
     * @param string      $str                   <p>The input string.</p>
5206
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5207
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5208
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5209
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5210
     *
5211
     * @return string
5212
     */
5213 32
    public static function str_camelize(
5214
        string $str,
5215
        string $encoding = 'UTF-8',
5216
        bool $cleanUtf8 = false,
5217
        string $lang = null,
5218
        bool $tryToKeepStringLength = false
5219
    ): string {
5220 32
        if ($cleanUtf8 === true) {
5221
            $str = self::clean($str);
5222
        }
5223
5224 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5225 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5226
        }
5227
5228 32
        $str = self::lcfirst(
5229 32
            \trim($str),
5230 32
            $encoding,
5231 32
            false,
5232 32
            $lang,
5233 32
            $tryToKeepStringLength
5234
        );
5235 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5236
5237 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5238
5239 32
        $str = (string) \preg_replace_callback(
5240 32
            '/[-_\s]+(.)?/u',
5241
            /**
5242
             * @param array $match
5243
             *
5244
             * @return string
5245
             */
5246
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5247 27
                if (isset($match[1])) {
5248 27
                    if ($useMbFunction === true) {
5249 27
                        if ($encoding === 'UTF-8') {
5250 27
                            return \mb_strtoupper($match[1]);
5251
                        }
5252
5253
                        return \mb_strtoupper($match[1], $encoding);
5254
                    }
5255
5256
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5257
                }
5258
5259 1
                return '';
5260 32
            },
5261 32
            $str
5262
        );
5263
5264 32
        return (string) \preg_replace_callback(
5265 32
            '/[\d]+(.)?/u',
5266
            /**
5267
             * @param array $match
5268
             *
5269
             * @return string
5270
             */
5271
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5272 6
                if ($useMbFunction === true) {
5273 6
                    if ($encoding === 'UTF-8') {
5274 6
                        return \mb_strtoupper($match[0]);
5275
                    }
5276
5277
                    return \mb_strtoupper($match[0], $encoding);
5278
                }
5279
5280
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5281 32
            },
5282 32
            $str
5283
        );
5284
    }
5285
5286
    /**
5287
     * Returns the string with the first letter of each word capitalized,
5288
     * except for when the word is a name which shouldn't be capitalized.
5289
     *
5290
     * @param string $str
5291
     *
5292
     * @return string string with $str capitalized
5293
     */
5294 1
    public static function str_capitalize_name(string $str): string
5295
    {
5296 1
        return self::str_capitalize_name_helper(
5297 1
            self::str_capitalize_name_helper(
5298 1
                self::collapse_whitespace($str),
5299 1
                ' '
5300
            ),
5301 1
            '-'
5302
        );
5303
    }
5304
5305
    /**
5306
     * Returns true if the string contains $needle, false otherwise. By default
5307
     * the comparison is case-sensitive, but can be made insensitive by setting
5308
     * $caseSensitive to false.
5309
     *
5310
     * @param string $haystack      <p>The input string.</p>
5311
     * @param string $needle        <p>Substring to look for.</p>
5312
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5313
     *
5314
     * @return bool whether or not $haystack contains $needle
5315
     */
5316 21
    public static function str_contains(
5317
        string $haystack,
5318
        string $needle,
5319
        bool $caseSensitive = true
5320
    ): bool {
5321 21
        if ($caseSensitive) {
5322 11
            return \strpos($haystack, $needle) !== false;
5323
        }
5324
5325 10
        return \mb_stripos($haystack, $needle) !== false;
5326
    }
5327
5328
    /**
5329
     * Returns true if the string contains all $needles, false otherwise. By
5330
     * default the comparison is case-sensitive, but can be made insensitive by
5331
     * setting $caseSensitive to false.
5332
     *
5333
     * @param string $haystack      <p>The input string.</p>
5334
     * @param array  $needles       <p>SubStrings to look for.</p>
5335
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5336
     *
5337
     * @return bool whether or not $haystack contains $needle
5338
     */
5339 44
    public static function str_contains_all(
5340
        string $haystack,
5341
        array $needles,
5342
        bool $caseSensitive = true
5343
    ): bool {
5344 44
        if ($haystack === '' || $needles === []) {
5345 1
            return false;
5346
        }
5347
5348
        /** @noinspection LoopWhichDoesNotLoopInspection */
5349 43
        foreach ($needles as &$needle) {
5350 43
            if (!$needle) {
5351 1
                return false;
5352
            }
5353
5354 42
            if ($caseSensitive) {
5355 22
                return \strpos($haystack, $needle) !== false;
5356
            }
5357
5358 20
            return \mb_stripos($haystack, $needle) !== false;
5359
        }
5360
5361
        return true;
5362
    }
5363
5364
    /**
5365
     * Returns true if the string contains any $needles, false otherwise. By
5366
     * default the comparison is case-sensitive, but can be made insensitive by
5367
     * setting $caseSensitive to false.
5368
     *
5369
     * @param string $haystack      <p>The input string.</p>
5370
     * @param array  $needles       <p>SubStrings to look for.</p>
5371
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5372
     *
5373
     * @return bool
5374
     *              Whether or not $str contains $needle
5375
     */
5376 43
    public static function str_contains_any(
5377
        string $haystack,
5378
        array $needles,
5379
        bool $caseSensitive = true
5380
    ): bool {
5381 43
        if ($haystack === '' || $needles === []) {
5382 1
            return false;
5383
        }
5384
5385
        /** @noinspection LoopWhichDoesNotLoopInspection */
5386 42
        foreach ($needles as &$needle) {
5387 42
            if (!$needle) {
5388
                return false;
5389
            }
5390
5391 42
            if ($caseSensitive) {
5392 22
                return \strpos($haystack, $needle) !== false;
5393
            }
5394
5395 20
            return \mb_stripos($haystack, $needle) !== false;
5396
        }
5397
5398
        return false;
5399
    }
5400
5401
    /**
5402
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5403
     * inserted before uppercase characters (with the exception of the first
5404
     * character of the string), and in place of spaces as well as underscores.
5405
     *
5406
     * @param string $str      <p>The input string.</p>
5407
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5408
     *
5409
     * @return string
5410
     */
5411 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5412
    {
5413 19
        return self::str_delimit($str, '-', $encoding);
5414
    }
5415
5416
    /**
5417
     * Returns a lowercase and trimmed string separated by the given delimiter.
5418
     * Delimiters are inserted before uppercase characters (with the exception
5419
     * of the first character of the string), and in place of spaces, dashes,
5420
     * and underscores. Alpha delimiters are not converted to lowercase.
5421
     *
5422
     * @param string      $str                   <p>The input string.</p>
5423
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5424
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5425
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5426
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5427
     *                                           tr</p>
5428
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5429
     *                                           ß</p>
5430
     *
5431
     * @return string
5432
     */
5433 49
    public static function str_delimit(
5434
        string $str,
5435
        string $delimiter,
5436
        string $encoding = 'UTF-8',
5437
        bool $cleanUtf8 = false,
5438
        string $lang = null,
5439
        bool $tryToKeepStringLength = false
5440
    ): string {
5441 49
        if (self::$SUPPORT['mbstring'] === true) {
5442
            /** @noinspection PhpComposerExtensionStubsInspection */
5443 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5444
5445 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5446 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5447 22
                $str = \mb_strtolower($str);
5448
            } else {
5449 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5450
            }
5451
5452
            /** @noinspection PhpComposerExtensionStubsInspection */
5453 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5454
        }
5455
5456
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5457
5458
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5459
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5460
            $str = \mb_strtolower($str);
5461
        } else {
5462
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5463
        }
5464
5465
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5466
    }
5467
5468
    /**
5469
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5470
     *
5471
     * @param string $str <p>The input string.</p>
5472
     *
5473
     * @return false|string
5474
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5475
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5476
     */
5477 30
    public static function str_detect_encoding($str)
5478
    {
5479
        // init
5480 30
        $str = (string) $str;
5481
5482
        //
5483
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5484
        //
5485
5486 30
        if (self::is_binary($str, true) === true) {
5487 11
            $isUtf16 = self::is_utf16($str, false);
5488 11
            if ($isUtf16 === 1) {
5489 2
                return 'UTF-16LE';
5490
            }
5491 11
            if ($isUtf16 === 2) {
5492 2
                return 'UTF-16BE';
5493
            }
5494
5495 9
            $isUtf32 = self::is_utf32($str, false);
5496 9
            if ($isUtf32 === 1) {
5497
                return 'UTF-32LE';
5498
            }
5499 9
            if ($isUtf32 === 2) {
5500
                return 'UTF-32BE';
5501
            }
5502
5503
            // is binary but not "UTF-16" or "UTF-32"
5504 9
            return false;
5505
        }
5506
5507
        //
5508
        // 2.) simple check for ASCII chars
5509
        //
5510
5511 26
        if (self::is_ascii($str) === true) {
5512 9
            return 'ASCII';
5513
        }
5514
5515
        //
5516
        // 3.) simple check for UTF-8 chars
5517
        //
5518
5519 26
        if (self::is_utf8($str) === true) {
5520 19
            return 'UTF-8';
5521
        }
5522
5523
        //
5524
        // 4.) check via "mb_detect_encoding()"
5525
        //
5526
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5527
5528
        $detectOrder = [
5529 15
            'ISO-8859-1',
5530
            'ISO-8859-2',
5531
            'ISO-8859-3',
5532
            'ISO-8859-4',
5533
            'ISO-8859-5',
5534
            'ISO-8859-6',
5535
            'ISO-8859-7',
5536
            'ISO-8859-8',
5537
            'ISO-8859-9',
5538
            'ISO-8859-10',
5539
            'ISO-8859-13',
5540
            'ISO-8859-14',
5541
            'ISO-8859-15',
5542
            'ISO-8859-16',
5543
            'WINDOWS-1251',
5544
            'WINDOWS-1252',
5545
            'WINDOWS-1254',
5546
            'CP932',
5547
            'CP936',
5548
            'CP950',
5549
            'CP866',
5550
            'CP850',
5551
            'CP51932',
5552
            'CP50220',
5553
            'CP50221',
5554
            'CP50222',
5555
            'ISO-2022-JP',
5556
            'ISO-2022-KR',
5557
            'JIS',
5558
            'JIS-ms',
5559
            'EUC-CN',
5560
            'EUC-JP',
5561
        ];
5562
5563 15
        if (self::$SUPPORT['mbstring'] === true) {
5564
            // info: do not use the symfony polyfill here
5565 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5566 15
            if ($encoding) {
5567 15
                return $encoding;
5568
            }
5569
        }
5570
5571
        //
5572
        // 5.) check via "iconv()"
5573
        //
5574
5575
        if (self::$ENCODINGS === null) {
5576
            self::$ENCODINGS = self::getData('encodings');
5577
        }
5578
5579
        foreach (self::$ENCODINGS as $encodingTmp) {
5580
            // INFO: //IGNORE but still throw notice
5581
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5582
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5583
                return $encodingTmp;
5584
            }
5585
        }
5586
5587
        return false;
5588
    }
5589
5590
    /**
5591
     * Check if the string ends with the given substring.
5592
     *
5593
     * @param string $haystack <p>The string to search in.</p>
5594
     * @param string $needle   <p>The substring to search for.</p>
5595
     *
5596
     * @return bool
5597
     */
5598 9
    public static function str_ends_with(string $haystack, string $needle): bool
5599
    {
5600 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5601
    }
5602
5603
    /**
5604
     * Returns true if the string ends with any of $substrings, false otherwise.
5605
     *
5606
     * - case-sensitive
5607
     *
5608
     * @param string   $str        <p>The input string.</p>
5609
     * @param string[] $substrings <p>Substrings to look for.</p>
5610
     *
5611
     * @return bool whether or not $str ends with $substring
5612
     */
5613 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5614
    {
5615 7
        if ($substrings === []) {
5616
            return false;
5617
        }
5618
5619 7
        foreach ($substrings as &$substring) {
5620 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5621 7
                return true;
5622
            }
5623
        }
5624
5625 6
        return false;
5626
    }
5627
5628
    /**
5629
     * Ensures that the string begins with $substring. If it doesn't, it's
5630
     * prepended.
5631
     *
5632
     * @param string $str       <p>The input string.</p>
5633
     * @param string $substring <p>The substring to add if not present.</p>
5634
     *
5635
     * @return string
5636
     */
5637 10
    public static function str_ensure_left(string $str, string $substring): string
5638
    {
5639
        if (
5640 10
            $substring !== ''
5641
            &&
5642 10
            \strpos($str, $substring) === 0
5643
        ) {
5644 6
            return $str;
5645
        }
5646
5647 4
        return $substring . $str;
5648
    }
5649
5650
    /**
5651
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5652
     *
5653
     * @param string $str       <p>The input string.</p>
5654
     * @param string $substring <p>The substring to add if not present.</p>
5655
     *
5656
     * @return string
5657
     */
5658 10
    public static function str_ensure_right(string $str, string $substring): string
5659
    {
5660
        if (
5661 10
            $str === ''
5662
            ||
5663 10
            $substring === ''
5664
            ||
5665 10
            \substr($str, -\strlen($substring)) !== $substring
5666
        ) {
5667 4
            $str .= $substring;
5668
        }
5669
5670 10
        return $str;
5671
    }
5672
5673
    /**
5674
     * Capitalizes the first word of the string, replaces underscores with
5675
     * spaces, and strips '_id'.
5676
     *
5677
     * @param string $str
5678
     *
5679
     * @return string
5680
     */
5681 3
    public static function str_humanize($str): string
5682
    {
5683 3
        $str = \str_replace(
5684
            [
5685 3
                '_id',
5686
                '_',
5687
            ],
5688
            [
5689 3
                '',
5690
                ' ',
5691
            ],
5692 3
            $str
5693
        );
5694
5695 3
        return self::ucfirst(\trim($str));
5696
    }
5697
5698
    /**
5699
     * Check if the string ends with the given substring, case insensitive.
5700
     *
5701
     * @param string $haystack <p>The string to search in.</p>
5702
     * @param string $needle   <p>The substring to search for.</p>
5703
     *
5704
     * @return bool
5705
     */
5706 12
    public static function str_iends_with(string $haystack, string $needle): bool
5707
    {
5708 12
        if ($haystack === '' || $needle === '') {
5709 2
            return false;
5710
        }
5711
5712 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5713
    }
5714
5715
    /**
5716
     * Returns true if the string ends with any of $substrings, false otherwise.
5717
     *
5718
     * - case-insensitive
5719
     *
5720
     * @param string   $str        <p>The input string.</p>
5721
     * @param string[] $substrings <p>Substrings to look for.</p>
5722
     *
5723
     * @return bool whether or not $str ends with $substring
5724
     */
5725 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5726
    {
5727 4
        if ($substrings === []) {
5728
            return false;
5729
        }
5730
5731 4
        foreach ($substrings as &$substring) {
5732 4
            if (self::str_iends_with($str, $substring)) {
5733 4
                return true;
5734
            }
5735
        }
5736
5737
        return false;
5738
    }
5739
5740
    /**
5741
     * Returns the index of the first occurrence of $needle in the string,
5742
     * and false if not found. Accepts an optional offset from which to begin
5743
     * the search.
5744
     *
5745
     * @param string $str      <p>The input string.</p>
5746
     * @param string $needle   <p>Substring to look for.</p>
5747
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5748
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5749
     *
5750
     * @return false|int
5751
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5752
     */
5753 2
    public static function str_iindex_first(
5754
        string $str,
5755
        string $needle,
5756
        int $offset = 0,
5757
        string $encoding = 'UTF-8'
5758
    ) {
5759 2
        return self::stripos(
5760 2
            $str,
5761 2
            $needle,
5762 2
            $offset,
5763 2
            $encoding
5764
        );
5765
    }
5766
5767
    /**
5768
     * Returns the index of the last occurrence of $needle in the string,
5769
     * and false if not found. Accepts an optional offset from which to begin
5770
     * the search. Offsets may be negative to count from the last character
5771
     * in the string.
5772
     *
5773
     * @param string $str      <p>The input string.</p>
5774
     * @param string $needle   <p>Substring to look for.</p>
5775
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5777
     *
5778
     * @return false|int
5779
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5780
     */
5781
    public static function str_iindex_last(
5782
        string $str,
5783
        string $needle,
5784
        int $offset = 0,
5785
        string $encoding = 'UTF-8'
5786
    ) {
5787
        return self::strripos(
5788
            $str,
5789
            $needle,
5790
            $offset,
5791
            $encoding
5792
        );
5793
    }
5794
5795
    /**
5796
     * Returns the index of the first occurrence of $needle in the string,
5797
     * and false if not found. Accepts an optional offset from which to begin
5798
     * the search.
5799
     *
5800
     * @param string $str      <p>The input string.</p>
5801
     * @param string $needle   <p>Substring to look for.</p>
5802
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5803
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5804
     *
5805
     * @return false|int
5806
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5807
     */
5808 10
    public static function str_index_first(
5809
        string $str,
5810
        string $needle,
5811
        int $offset = 0,
5812
        string $encoding = 'UTF-8'
5813
    ) {
5814 10
        return self::strpos(
5815 10
            $str,
5816 10
            $needle,
5817 10
            $offset,
5818 10
            $encoding
5819
        );
5820
    }
5821
5822
    /**
5823
     * Returns the index of the last occurrence of $needle in the string,
5824
     * and false if not found. Accepts an optional offset from which to begin
5825
     * the search. Offsets may be negative to count from the last character
5826
     * in the string.
5827
     *
5828
     * @param string $str      <p>The input string.</p>
5829
     * @param string $needle   <p>Substring to look for.</p>
5830
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5831
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5832
     *
5833
     * @return false|int
5834
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5835
     */
5836 10
    public static function str_index_last(
5837
        string $str,
5838
        string $needle,
5839
        int $offset = 0,
5840
        string $encoding = 'UTF-8'
5841
    ) {
5842 10
        return self::strrpos(
5843 10
            $str,
5844 10
            $needle,
5845 10
            $offset,
5846 10
            $encoding
5847
        );
5848
    }
5849
5850
    /**
5851
     * Inserts $substring into the string at the $index provided.
5852
     *
5853
     * @param string $str       <p>The input string.</p>
5854
     * @param string $substring <p>String to be inserted.</p>
5855
     * @param int    $index     <p>The index at which to insert the substring.</p>
5856
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5857
     *
5858
     * @return string
5859
     */
5860 8
    public static function str_insert(
5861
        string $str,
5862
        string $substring,
5863
        int $index,
5864
        string $encoding = 'UTF-8'
5865
    ): string {
5866 8
        if ($encoding === 'UTF-8') {
5867 4
            $len = (int) \mb_strlen($str);
5868 4
            if ($index > $len) {
5869
                return $str;
5870
            }
5871
5872
            /** @noinspection UnnecessaryCastingInspection */
5873 4
            return (string) \mb_substr($str, 0, $index) .
5874 4
                   $substring .
5875 4
                   (string) \mb_substr($str, $index, $len);
5876
        }
5877
5878 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5879
5880 4
        $len = (int) self::strlen($str, $encoding);
5881 4
        if ($index > $len) {
5882 1
            return $str;
5883
        }
5884
5885 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5886 3
               $substring .
5887 3
               ((string) self::substr($str, $index, $len, $encoding));
5888
    }
5889
5890
    /**
5891
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5892
     *
5893
     * @see  http://php.net/manual/en/function.str-ireplace.php
5894
     *
5895
     * @param mixed $search  <p>
5896
     *                       Every replacement with search array is
5897
     *                       performed on the result of previous replacement.
5898
     *                       </p>
5899
     * @param mixed $replace <p>
5900
     *                       </p>
5901
     * @param mixed $subject <p>
5902
     *                       If subject is an array, then the search and
5903
     *                       replace is performed with every entry of
5904
     *                       subject, and the return value is an array as
5905
     *                       well.
5906
     *                       </p>
5907
     * @param int   $count   [optional] <p>
5908
     *                       The number of matched and replaced needles will
5909
     *                       be returned in count which is passed by
5910
     *                       reference.
5911
     *                       </p>
5912
     *
5913
     * @return mixed a string or an array of replacements
5914
     */
5915 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5916
    {
5917 29
        $search = (array) $search;
5918
5919
        /** @noinspection AlterInForeachInspection */
5920 29
        foreach ($search as &$s) {
5921 29
            $s = (string) $s;
5922 29
            if ($s === '') {
5923 6
                $s = '/^(?<=.)$/';
5924
            } else {
5925 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5926
            }
5927
        }
5928
5929 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5930 29
        $count = $replace; // used as reference parameter
5931
5932 29
        return $subject;
5933
    }
5934
5935
    /**
5936
     * Replaces $search from the beginning of string with $replacement.
5937
     *
5938
     * @param string $str         <p>The input string.</p>
5939
     * @param string $search      <p>The string to search for.</p>
5940
     * @param string $replacement <p>The replacement.</p>
5941
     *
5942
     * @return string string after the replacements
5943
     */
5944 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5945
    {
5946 17
        if ($str === '') {
5947 4
            if ($replacement === '') {
5948 2
                return '';
5949
            }
5950
5951 2
            if ($search === '') {
5952 2
                return $replacement;
5953
            }
5954
        }
5955
5956 13
        if ($search === '') {
5957 2
            return $str . $replacement;
5958
        }
5959
5960 11
        if (\stripos($str, $search) === 0) {
5961 10
            return $replacement . \substr($str, \strlen($search));
5962
        }
5963
5964 1
        return $str;
5965
    }
5966
5967
    /**
5968
     * Replaces $search from the ending of string with $replacement.
5969
     *
5970
     * @param string $str         <p>The input string.</p>
5971
     * @param string $search      <p>The string to search for.</p>
5972
     * @param string $replacement <p>The replacement.</p>
5973
     *
5974
     * @return string string after the replacements
5975
     */
5976 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5977
    {
5978 17
        if ($str === '') {
5979 4
            if ($replacement === '') {
5980 2
                return '';
5981
            }
5982
5983 2
            if ($search === '') {
5984 2
                return $replacement;
5985
            }
5986
        }
5987
5988 13
        if ($search === '') {
5989 2
            return $str . $replacement;
5990
        }
5991
5992 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5993 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5994
        }
5995
5996 11
        return $str;
5997
    }
5998
5999
    /**
6000
     * Check if the string starts with the given substring, case insensitive.
6001
     *
6002
     * @param string $haystack <p>The string to search in.</p>
6003
     * @param string $needle   <p>The substring to search for.</p>
6004
     *
6005
     * @return bool
6006
     */
6007 12
    public static function str_istarts_with(string $haystack, string $needle): bool
6008
    {
6009 12
        if ($haystack === '' || $needle === '') {
6010 2
            return false;
6011
        }
6012
6013 12
        return self::stripos($haystack, $needle) === 0;
6014
    }
6015
6016
    /**
6017
     * Returns true if the string begins with any of $substrings, false otherwise.
6018
     *
6019
     * - case-insensitive
6020
     *
6021
     * @param string $str        <p>The input string.</p>
6022
     * @param array  $substrings <p>Substrings to look for.</p>
6023
     *
6024
     * @return bool whether or not $str starts with $substring
6025
     */
6026 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
6027
    {
6028 4
        if ($str === '') {
6029
            return false;
6030
        }
6031
6032 4
        if ($substrings === []) {
6033
            return false;
6034
        }
6035
6036 4
        foreach ($substrings as &$substring) {
6037 4
            if (self::str_istarts_with($str, $substring)) {
6038 4
                return true;
6039
            }
6040
        }
6041
6042
        return false;
6043
    }
6044
6045
    /**
6046
     * Gets the substring after the first occurrence of a separator.
6047
     *
6048
     * @param string $str       <p>The input string.</p>
6049
     * @param string $separator <p>The string separator.</p>
6050
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6051
     *
6052
     * @return string
6053
     */
6054 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6055
    {
6056 1
        if ($separator === '' || $str === '') {
6057 1
            return '';
6058
        }
6059
6060 1
        $offset = self::str_iindex_first($str, $separator);
6061 1
        if ($offset === false) {
6062 1
            return '';
6063
        }
6064
6065 1
        if ($encoding === 'UTF-8') {
6066 1
            return (string) \mb_substr(
6067 1
                $str,
6068 1
                $offset + (int) \mb_strlen($separator)
6069
            );
6070
        }
6071
6072
        return (string) self::substr(
6073
            $str,
6074
            $offset + (int) self::strlen($separator, $encoding),
6075
            null,
6076
            $encoding
6077
        );
6078
    }
6079
6080
    /**
6081
     * Gets the substring after the last occurrence of a separator.
6082
     *
6083
     * @param string $str       <p>The input string.</p>
6084
     * @param string $separator <p>The string separator.</p>
6085
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6086
     *
6087
     * @return string
6088
     */
6089 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6090
    {
6091 1
        if ($separator === '' || $str === '') {
6092 1
            return '';
6093
        }
6094
6095 1
        $offset = self::strripos($str, $separator);
6096 1
        if ($offset === false) {
6097 1
            return '';
6098
        }
6099
6100 1
        if ($encoding === 'UTF-8') {
6101 1
            return (string) \mb_substr(
6102 1
                $str,
6103 1
                $offset + (int) self::strlen($separator)
6104
            );
6105
        }
6106
6107
        return (string) self::substr(
6108
            $str,
6109
            $offset + (int) self::strlen($separator, $encoding),
6110
            null,
6111
            $encoding
6112
        );
6113
    }
6114
6115
    /**
6116
     * Gets the substring before the first occurrence of a separator.
6117
     *
6118
     * @param string $str       <p>The input string.</p>
6119
     * @param string $separator <p>The string separator.</p>
6120
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6121
     *
6122
     * @return string
6123
     */
6124 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6125
    {
6126 1
        if ($separator === '' || $str === '') {
6127 1
            return '';
6128
        }
6129
6130 1
        $offset = self::str_iindex_first($str, $separator);
6131 1
        if ($offset === false) {
6132 1
            return '';
6133
        }
6134
6135 1
        if ($encoding === 'UTF-8') {
6136 1
            return (string) \mb_substr($str, 0, $offset);
6137
        }
6138
6139
        return (string) self::substr($str, 0, $offset, $encoding);
6140
    }
6141
6142
    /**
6143
     * Gets the substring before the last occurrence of a separator.
6144
     *
6145
     * @param string $str       <p>The input string.</p>
6146
     * @param string $separator <p>The string separator.</p>
6147
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6148
     *
6149
     * @return string
6150
     */
6151 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6152
    {
6153 1
        if ($separator === '' || $str === '') {
6154 1
            return '';
6155
        }
6156
6157 1
        if ($encoding === 'UTF-8') {
6158 1
            $offset = \mb_strripos($str, $separator);
6159 1
            if ($offset === false) {
6160 1
                return '';
6161
            }
6162
6163 1
            return (string) \mb_substr($str, 0, $offset);
6164
        }
6165
6166
        $offset = self::strripos($str, $separator, 0, $encoding);
6167
        if ($offset === false) {
6168
            return '';
6169
        }
6170
6171
        return (string) self::substr($str, 0, $offset, $encoding);
6172
    }
6173
6174
    /**
6175
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6176
     *
6177
     * @param string $str          <p>The input string.</p>
6178
     * @param string $needle       <p>The string to look for.</p>
6179
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6180
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6181
     *
6182
     * @return string
6183
     */
6184 2
    public static function str_isubstr_first(
6185
        string $str,
6186
        string $needle,
6187
        bool $beforeNeedle = false,
6188
        string $encoding = 'UTF-8'
6189
    ): string {
6190
        if (
6191 2
            $needle === ''
6192
            ||
6193 2
            $str === ''
6194
        ) {
6195 2
            return '';
6196
        }
6197
6198 2
        $part = self::stristr(
6199 2
            $str,
6200 2
            $needle,
6201 2
            $beforeNeedle,
6202 2
            $encoding
6203
        );
6204 2
        if ($part === false) {
6205 2
            return '';
6206
        }
6207
6208 2
        return $part;
6209
    }
6210
6211
    /**
6212
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6213
     *
6214
     * @param string $str          <p>The input string.</p>
6215
     * @param string $needle       <p>The string to look for.</p>
6216
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6217
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6218
     *
6219
     * @return string
6220
     */
6221 1
    public static function str_isubstr_last(
6222
        string $str,
6223
        string $needle,
6224
        bool $beforeNeedle = false,
6225
        string $encoding = 'UTF-8'
6226
    ): string {
6227
        if (
6228 1
            $needle === ''
6229
            ||
6230 1
            $str === ''
6231
        ) {
6232 1
            return '';
6233
        }
6234
6235 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6236 1
        if ($part === false) {
6237 1
            return '';
6238
        }
6239
6240 1
        return $part;
6241
    }
6242
6243
    /**
6244
     * Returns the last $n characters of the string.
6245
     *
6246
     * @param string $str      <p>The input string.</p>
6247
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6248
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6249
     *
6250
     * @return string
6251
     */
6252 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6253
    {
6254 12
        if ($str === '' || $n <= 0) {
6255 4
            return '';
6256
        }
6257
6258 8
        if ($encoding === 'UTF-8') {
6259 4
            return (string) \mb_substr($str, -$n);
6260
        }
6261
6262 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6263
6264 4
        return (string) self::substr($str, -$n, null, $encoding);
6265
    }
6266
6267
    /**
6268
     * Limit the number of characters in a string.
6269
     *
6270
     * @param string $str      <p>The input string.</p>
6271
     * @param int    $length   [optional] <p>Default: 100</p>
6272
     * @param string $strAddOn [optional] <p>Default: …</p>
6273
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6274
     *
6275
     * @return string
6276
     */
6277 2
    public static function str_limit(
6278
        string $str,
6279
        int $length = 100,
6280
        string $strAddOn = '…',
6281
        string $encoding = 'UTF-8'
6282
    ): string {
6283 2
        if ($str === '' || $length <= 0) {
6284 2
            return '';
6285
        }
6286
6287 2
        if ($encoding === 'UTF-8') {
6288 2
            if ((int) \mb_strlen($str) <= $length) {
6289 2
                return $str;
6290
            }
6291
6292
            /** @noinspection UnnecessaryCastingInspection */
6293 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6294
        }
6295
6296
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6297
6298
        if ((int) self::strlen($str, $encoding) <= $length) {
6299
            return $str;
6300
        }
6301
6302
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6303
    }
6304
6305
    /**
6306
     * Limit the number of characters in a string, but also after the next word.
6307
     *
6308
     * @param string $str      <p>The input string.</p>
6309
     * @param int    $length   [optional] <p>Default: 100</p>
6310
     * @param string $strAddOn [optional] <p>Default: …</p>
6311
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6312
     *
6313
     * @return string
6314
     */
6315 6
    public static function str_limit_after_word(
6316
        string $str,
6317
        int $length = 100,
6318
        string $strAddOn = '…',
6319
        string $encoding = 'UTF-8'
6320
    ): string {
6321 6
        if ($str === '' || $length <= 0) {
6322 2
            return '';
6323
        }
6324
6325 6
        if ($encoding === 'UTF-8') {
6326
            /** @noinspection UnnecessaryCastingInspection */
6327 2
            if ((int) \mb_strlen($str) <= $length) {
6328 2
                return $str;
6329
            }
6330
6331 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6332 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6333
            }
6334
6335 2
            $str = \mb_substr($str, 0, $length);
6336
6337 2
            $array = \explode(' ', $str);
6338 2
            \array_pop($array);
6339 2
            $new_str = \implode(' ', $array);
6340
6341 2
            if ($new_str === '') {
6342 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6343
            }
6344
        } else {
6345 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6346
                return $str;
6347
            }
6348
6349 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6350 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6351
            }
6352
6353 1
            $str = self::substr($str, 0, $length, $encoding);
6354 1
            if ($str === false) {
6355
                return '' . $strAddOn;
6356
            }
6357
6358 1
            $array = \explode(' ', $str);
6359 1
            \array_pop($array);
6360 1
            $new_str = \implode(' ', $array);
6361
6362 1
            if ($new_str === '') {
6363
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6364
            }
6365
        }
6366
6367 3
        return $new_str . $strAddOn;
6368
    }
6369
6370
    /**
6371
     * Returns the longest common prefix between the string and $otherStr.
6372
     *
6373
     * @param string $str      <p>The input sting.</p>
6374
     * @param string $otherStr <p>Second string for comparison.</p>
6375
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6376
     *
6377
     * @return string
6378
     */
6379 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6380
    {
6381
        // init
6382 10
        $longestCommonPrefix = '';
6383
6384 10
        if ($encoding === 'UTF-8') {
6385 5
            $maxLength = (int) \min(
6386 5
                \mb_strlen($str),
6387 5
                \mb_strlen($otherStr)
6388
            );
6389
6390 5
            for ($i = 0; $i < $maxLength; ++$i) {
6391 4
                $char = \mb_substr($str, $i, 1);
6392
6393
                if (
6394 4
                    $char !== false
6395
                    &&
6396 4
                    $char === \mb_substr($otherStr, $i, 1)
6397
                ) {
6398 3
                    $longestCommonPrefix .= $char;
6399
                } else {
6400 3
                    break;
6401
                }
6402
            }
6403
        } else {
6404 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6405
6406 5
            $maxLength = (int) \min(
6407 5
                self::strlen($str, $encoding),
6408 5
                self::strlen($otherStr, $encoding)
6409
            );
6410
6411 5
            for ($i = 0; $i < $maxLength; ++$i) {
6412 4
                $char = self::substr($str, $i, 1, $encoding);
6413
6414
                if (
6415 4
                    $char !== false
6416
                    &&
6417 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6418
                ) {
6419 3
                    $longestCommonPrefix .= $char;
6420
                } else {
6421 3
                    break;
6422
                }
6423
            }
6424
        }
6425
6426 10
        return $longestCommonPrefix;
6427
    }
6428
6429
    /**
6430
     * Returns the longest common substring between the string and $otherStr.
6431
     * In the case of ties, it returns that which occurs first.
6432
     *
6433
     * @param string $str
6434
     * @param string $otherStr <p>Second string for comparison.</p>
6435
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6436
     *
6437
     * @return string string with its $str being the longest common substring
6438
     */
6439 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6440
    {
6441 11
        if ($str === '' || $otherStr === '') {
6442 2
            return '';
6443
        }
6444
6445
        // Uses dynamic programming to solve
6446
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6447
6448 9
        if ($encoding === 'UTF-8') {
6449 4
            $strLength = (int) \mb_strlen($str);
6450 4
            $otherLength = (int) \mb_strlen($otherStr);
6451
        } else {
6452 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6453
6454 5
            $strLength = (int) self::strlen($str, $encoding);
6455 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6456
        }
6457
6458
        // Return if either string is empty
6459 9
        if ($strLength === 0 || $otherLength === 0) {
6460
            return '';
6461
        }
6462
6463 9
        $len = 0;
6464 9
        $end = 0;
6465 9
        $table = \array_fill(
6466 9
            0,
6467 9
            $strLength + 1,
6468 9
            \array_fill(0, $otherLength + 1, 0)
6469
        );
6470
6471 9
        if ($encoding === 'UTF-8') {
6472 9
            for ($i = 1; $i <= $strLength; ++$i) {
6473 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6474 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6475 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6476
6477 9
                    if ($strChar === $otherChar) {
6478 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6479 8
                        if ($table[$i][$j] > $len) {
6480 8
                            $len = $table[$i][$j];
6481 8
                            $end = $i;
6482
                        }
6483
                    } else {
6484 9
                        $table[$i][$j] = 0;
6485
                    }
6486
                }
6487
            }
6488
        } else {
6489
            for ($i = 1; $i <= $strLength; ++$i) {
6490
                for ($j = 1; $j <= $otherLength; ++$j) {
6491
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6492
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6493
6494
                    if ($strChar === $otherChar) {
6495
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6496
                        if ($table[$i][$j] > $len) {
6497
                            $len = $table[$i][$j];
6498
                            $end = $i;
6499
                        }
6500
                    } else {
6501
                        $table[$i][$j] = 0;
6502
                    }
6503
                }
6504
            }
6505
        }
6506
6507 9
        if ($encoding === 'UTF-8') {
6508 9
            return (string) \mb_substr($str, $end - $len, $len);
6509
        }
6510
6511
        return (string) self::substr($str, $end - $len, $len, $encoding);
6512
    }
6513
6514
    /**
6515
     * Returns the longest common suffix between the string and $otherStr.
6516
     *
6517
     * @param string $str
6518
     * @param string $otherStr <p>Second string for comparison.</p>
6519
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6520
     *
6521
     * @return string
6522
     */
6523 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6524
    {
6525 10
        if ($str === '' || $otherStr === '') {
6526 2
            return '';
6527
        }
6528
6529 8
        if ($encoding === 'UTF-8') {
6530 4
            $maxLength = (int) \min(
6531 4
                \mb_strlen($str, $encoding),
6532 4
                \mb_strlen($otherStr, $encoding)
6533
            );
6534
6535 4
            $longestCommonSuffix = '';
6536 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6537 4
                $char = \mb_substr($str, -$i, 1);
6538
6539
                if (
6540 4
                    $char !== false
6541
                    &&
6542 4
                    $char === \mb_substr($otherStr, -$i, 1)
6543
                ) {
6544 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6545
                } else {
6546 3
                    break;
6547
                }
6548
            }
6549
        } else {
6550 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6551
6552 4
            $maxLength = (int) \min(
6553 4
                self::strlen($str, $encoding),
6554 4
                self::strlen($otherStr, $encoding)
6555
            );
6556
6557 4
            $longestCommonSuffix = '';
6558 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6559 4
                $char = self::substr($str, -$i, 1, $encoding);
6560
6561
                if (
6562 4
                    $char !== false
6563
                    &&
6564 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6565
                ) {
6566 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6567
                } else {
6568 3
                    break;
6569
                }
6570
            }
6571
        }
6572
6573 8
        return $longestCommonSuffix;
6574
    }
6575
6576
    /**
6577
     * Returns true if $str matches the supplied pattern, false otherwise.
6578
     *
6579
     * @param string $str     <p>The input string.</p>
6580
     * @param string $pattern <p>Regex pattern to match against.</p>
6581
     *
6582
     * @return bool whether or not $str matches the pattern
6583
     */
6584
    public static function str_matches_pattern(string $str, string $pattern): bool
6585
    {
6586
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6587
    }
6588
6589
    /**
6590
     * Returns whether or not a character exists at an index. Offsets may be
6591
     * negative to count from the last character in the string. Implements
6592
     * part of the ArrayAccess interface.
6593
     *
6594
     * @param string $str      <p>The input string.</p>
6595
     * @param int    $offset   <p>The index to check.</p>
6596
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6597
     *
6598
     * @return bool whether or not the index exists
6599
     */
6600 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6601
    {
6602
        // init
6603 6
        $length = (int) self::strlen($str, $encoding);
6604
6605 6
        if ($offset >= 0) {
6606 3
            return $length > $offset;
6607
        }
6608
6609 3
        return $length >= \abs($offset);
6610
    }
6611
6612
    /**
6613
     * Returns the character at the given index. Offsets may be negative to
6614
     * count from the last character in the string. Implements part of the
6615
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6616
     * does not exist.
6617
     *
6618
     * @param string $str      <p>The input string.</p>
6619
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6620
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6621
     *
6622
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6623
     *
6624
     * @return string the character at the specified index
6625
     */
6626 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6627
    {
6628
        // init
6629 2
        $length = (int) self::strlen($str);
6630
6631
        if (
6632 2
            ($index >= 0 && $length <= $index)
6633
            ||
6634 2
            $length < \abs($index)
6635
        ) {
6636 1
            throw new \OutOfBoundsException('No character exists at the index');
6637
        }
6638
6639 1
        return self::char_at($str, $index, $encoding);
6640
    }
6641
6642
    /**
6643
     * Pad a UTF-8 string to given length with another string.
6644
     *
6645
     * @param string     $str        <p>The input string.</p>
6646
     * @param int        $pad_length <p>The length of return string.</p>
6647
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6648
     * @param int|string $pad_type   [optional] <p>
6649
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6650
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6651
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6652
     *                               </p>
6653
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6654
     *
6655
     * @return string returns the padded string
6656
     */
6657 41
    public static function str_pad(
6658
        string $str,
6659
        int $pad_length,
6660
        string $pad_string = ' ',
6661
        $pad_type = \STR_PAD_RIGHT,
6662
        string $encoding = 'UTF-8'
6663
    ): string {
6664 41
        if ($pad_length === 0 || $pad_string === '') {
6665 1
            return $str;
6666
        }
6667
6668 41
        if ($pad_type !== (int) $pad_type) {
6669 13
            if ($pad_type === 'left') {
6670 3
                $pad_type = \STR_PAD_LEFT;
6671 10
            } elseif ($pad_type === 'right') {
6672 6
                $pad_type = \STR_PAD_RIGHT;
6673 4
            } elseif ($pad_type === 'both') {
6674 3
                $pad_type = \STR_PAD_BOTH;
6675
            } else {
6676 1
                throw new \InvalidArgumentException(
6677 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6678
                );
6679
            }
6680
        }
6681
6682 40
        if ($encoding === 'UTF-8') {
6683 25
            $str_length = (int) \mb_strlen($str);
6684
6685 25
            if ($pad_length >= $str_length) {
6686
                switch ($pad_type) {
6687 25
                    case \STR_PAD_LEFT:
6688 8
                        $ps_length = (int) \mb_strlen($pad_string);
6689
6690 8
                        $diff = ($pad_length - $str_length);
6691
6692 8
                        $pre = (string) \mb_substr(
6693 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6694 8
                            0,
6695 8
                            $diff
6696
                        );
6697 8
                        $post = '';
6698
6699 8
                        break;
6700
6701 20
                    case \STR_PAD_BOTH:
6702 14
                        $diff = ($pad_length - $str_length);
6703
6704 14
                        $ps_length_left = (int) \floor($diff / 2);
6705
6706 14
                        $ps_length_right = (int) \ceil($diff / 2);
6707
6708 14
                        $pre = (string) \mb_substr(
6709 14
                            \str_repeat($pad_string, $ps_length_left),
6710 14
                            0,
6711 14
                            $ps_length_left
6712
                        );
6713 14
                        $post = (string) \mb_substr(
6714 14
                            \str_repeat($pad_string, $ps_length_right),
6715 14
                            0,
6716 14
                            $ps_length_right
6717
                        );
6718
6719 14
                        break;
6720
6721 9
                    case \STR_PAD_RIGHT:
6722
                    default:
6723 9
                        $ps_length = (int) \mb_strlen($pad_string);
6724
6725 9
                        $diff = ($pad_length - $str_length);
6726
6727 9
                        $post = (string) \mb_substr(
6728 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6729 9
                            0,
6730 9
                            $diff
6731
                        );
6732 9
                        $pre = '';
6733
                }
6734
6735 25
                return $pre . $str . $post;
6736
            }
6737
6738 3
            return $str;
6739
        }
6740
6741 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6742
6743 15
        $str_length = (int) self::strlen($str, $encoding);
6744
6745 15
        if ($pad_length >= $str_length) {
6746
            switch ($pad_type) {
6747 14
                case \STR_PAD_LEFT:
6748 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6749
6750 5
                    $diff = ($pad_length - $str_length);
6751
6752 5
                    $pre = (string) self::substr(
6753 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6754 5
                        0,
6755 5
                        $diff,
6756 5
                        $encoding
6757
                    );
6758 5
                    $post = '';
6759
6760 5
                    break;
6761
6762 9
                case \STR_PAD_BOTH:
6763 3
                    $diff = ($pad_length - $str_length);
6764
6765 3
                    $ps_length_left = (int) \floor($diff / 2);
6766
6767 3
                    $ps_length_right = (int) \ceil($diff / 2);
6768
6769 3
                    $pre = (string) self::substr(
6770 3
                        \str_repeat($pad_string, $ps_length_left),
6771 3
                        0,
6772 3
                        $ps_length_left,
6773 3
                        $encoding
6774
                    );
6775 3
                    $post = (string) self::substr(
6776 3
                        \str_repeat($pad_string, $ps_length_right),
6777 3
                        0,
6778 3
                        $ps_length_right,
6779 3
                        $encoding
6780
                    );
6781
6782 3
                    break;
6783
6784 6
                case \STR_PAD_RIGHT:
6785
                default:
6786 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6787
6788 6
                    $diff = ($pad_length - $str_length);
6789
6790 6
                    $post = (string) self::substr(
6791 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6792 6
                        0,
6793 6
                        $diff,
6794 6
                        $encoding
6795
                    );
6796 6
                    $pre = '';
6797
            }
6798
6799 14
            return $pre . $str . $post;
6800
        }
6801
6802 1
        return $str;
6803
    }
6804
6805
    /**
6806
     * Returns a new string of a given length such that both sides of the
6807
     * string are padded. Alias for pad() with a $padType of 'both'.
6808
     *
6809
     * @param string $str
6810
     * @param int    $length   <p>Desired string length after padding.</p>
6811
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6813
     *
6814
     * @return string string with padding applied
6815
     */
6816 11
    public static function str_pad_both(
6817
        string $str,
6818
        int $length,
6819
        string $padStr = ' ',
6820
        string $encoding = 'UTF-8'
6821
    ): string {
6822 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6823
    }
6824
6825
    /**
6826
     * Returns a new string of a given length such that the beginning of the
6827
     * string is padded. Alias for pad() with a $padType of 'left'.
6828
     *
6829
     * @param string $str
6830
     * @param int    $length   <p>Desired string length after padding.</p>
6831
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6832
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6833
     *
6834
     * @return string string with left padding
6835
     */
6836 7
    public static function str_pad_left(
6837
        string $str,
6838
        int $length,
6839
        string $padStr = ' ',
6840
        string $encoding = 'UTF-8'
6841
    ): string {
6842 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6843
    }
6844
6845
    /**
6846
     * Returns a new string of a given length such that the end of the string
6847
     * is padded. Alias for pad() with a $padType of 'right'.
6848
     *
6849
     * @param string $str
6850
     * @param int    $length   <p>Desired string length after padding.</p>
6851
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6852
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6853
     *
6854
     * @return string string with right padding
6855
     */
6856 7
    public static function str_pad_right(
6857
        string $str,
6858
        int $length,
6859
        string $padStr = ' ',
6860
        string $encoding = 'UTF-8'
6861
    ): string {
6862 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6863
    }
6864
6865
    /**
6866
     * Repeat a string.
6867
     *
6868
     * @param string $str        <p>
6869
     *                           The string to be repeated.
6870
     *                           </p>
6871
     * @param int    $multiplier <p>
6872
     *                           Number of time the input string should be
6873
     *                           repeated.
6874
     *                           </p>
6875
     *                           <p>
6876
     *                           multiplier has to be greater than or equal to 0.
6877
     *                           If the multiplier is set to 0, the function
6878
     *                           will return an empty string.
6879
     *                           </p>
6880
     *
6881
     * @return string the repeated string
6882
     */
6883 9
    public static function str_repeat(string $str, int $multiplier): string
6884
    {
6885 9
        $str = self::filter($str);
6886
6887 9
        return \str_repeat($str, $multiplier);
6888
    }
6889
6890
    /**
6891
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6892
     *
6893
     * Replace all occurrences of the search string with the replacement string
6894
     *
6895
     * @see http://php.net/manual/en/function.str-replace.php
6896
     *
6897
     * @param mixed $search  <p>
6898
     *                       The value being searched for, otherwise known as the needle.
6899
     *                       An array may be used to designate multiple needles.
6900
     *                       </p>
6901
     * @param mixed $replace <p>
6902
     *                       The replacement value that replaces found search
6903
     *                       values. An array may be used to designate multiple replacements.
6904
     *                       </p>
6905
     * @param mixed $subject <p>
6906
     *                       The string or array being searched and replaced on,
6907
     *                       otherwise known as the haystack.
6908
     *                       </p>
6909
     *                       <p>
6910
     *                       If subject is an array, then the search and
6911
     *                       replace is performed with every entry of
6912
     *                       subject, and the return value is an array as
6913
     *                       well.
6914
     *                       </p>
6915
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6916
     *
6917
     * @return mixed this function returns a string or an array with the replaced values
6918
     */
6919 12
    public static function str_replace(
6920
        $search,
6921
        $replace,
6922
        $subject,
6923
        int &$count = null
6924
    ) {
6925
        /** @psalm-suppress PossiblyNullArgument */
6926 12
        return \str_replace($search, $replace, $subject, $count);
6927
    }
6928
6929
    /**
6930
     * Replaces $search from the beginning of string with $replacement.
6931
     *
6932
     * @param string $str         <p>The input string.</p>
6933
     * @param string $search      <p>The string to search for.</p>
6934
     * @param string $replacement <p>The replacement.</p>
6935
     *
6936
     * @return string string after the replacements
6937
     */
6938 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6939
    {
6940 17
        if ($str === '') {
6941 4
            if ($replacement === '') {
6942 2
                return '';
6943
            }
6944
6945 2
            if ($search === '') {
6946 2
                return $replacement;
6947
            }
6948
        }
6949
6950 13
        if ($search === '') {
6951 2
            return $str . $replacement;
6952
        }
6953
6954 11
        if (\strpos($str, $search) === 0) {
6955 9
            return $replacement . \substr($str, \strlen($search));
6956
        }
6957
6958 2
        return $str;
6959
    }
6960
6961
    /**
6962
     * Replaces $search from the ending of string with $replacement.
6963
     *
6964
     * @param string $str         <p>The input string.</p>
6965
     * @param string $search      <p>The string to search for.</p>
6966
     * @param string $replacement <p>The replacement.</p>
6967
     *
6968
     * @return string string after the replacements
6969
     */
6970 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6971
    {
6972 17
        if ($str === '') {
6973 4
            if ($replacement === '') {
6974 2
                return '';
6975
            }
6976
6977 2
            if ($search === '') {
6978 2
                return $replacement;
6979
            }
6980
        }
6981
6982 13
        if ($search === '') {
6983 2
            return $str . $replacement;
6984
        }
6985
6986 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6987 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6988
        }
6989
6990 11
        return $str;
6991
    }
6992
6993
    /**
6994
     * Replace the first "$search"-term with the "$replace"-term.
6995
     *
6996
     * @param string $search
6997
     * @param string $replace
6998
     * @param string $subject
6999
     *
7000
     * @return string
7001
     *
7002
     * @psalm-suppress InvalidReturnType
7003
     */
7004 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
7005
    {
7006 2
        $pos = self::strpos($subject, $search);
7007
7008 2
        if ($pos !== false) {
7009
            /** @psalm-suppress InvalidReturnStatement */
7010 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7011
        }
7012
7013 2
        return $subject;
7014
    }
7015
7016
    /**
7017
     * Replace the last "$search"-term with the "$replace"-term.
7018
     *
7019
     * @param string $search
7020
     * @param string $replace
7021
     * @param string $subject
7022
     *
7023
     * @return string
7024
     *
7025
     * @psalm-suppress InvalidReturnType
7026
     */
7027 2
    public static function str_replace_last(
7028
        string $search,
7029
        string $replace,
7030
        string $subject
7031
    ): string {
7032 2
        $pos = self::strrpos($subject, $search);
7033 2
        if ($pos !== false) {
7034
            /** @psalm-suppress InvalidReturnStatement */
7035 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7036
        }
7037
7038 2
        return $subject;
7039
    }
7040
7041
    /**
7042
     * Shuffles all the characters in the string.
7043
     *
7044
     * PS: uses random algorithm which is weak for cryptography purposes
7045
     *
7046
     * @param string $str      <p>The input string</p>
7047
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7048
     *
7049
     * @return string the shuffled string
7050
     */
7051 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7052
    {
7053 5
        if ($encoding === 'UTF-8') {
7054 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7055
            /** @noinspection NonSecureShuffleUsageInspection */
7056 5
            \shuffle($indexes);
7057
7058
            // init
7059 5
            $shuffledStr = '';
7060
7061 5
            foreach ($indexes as &$i) {
7062 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7063 5
                if ($tmpSubStr !== false) {
7064 5
                    $shuffledStr .= $tmpSubStr;
7065
                }
7066
            }
7067
        } else {
7068
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7069
7070
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7071
            /** @noinspection NonSecureShuffleUsageInspection */
7072
            \shuffle($indexes);
7073
7074
            // init
7075
            $shuffledStr = '';
7076
7077
            foreach ($indexes as &$i) {
7078
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7079
                if ($tmpSubStr !== false) {
7080
                    $shuffledStr .= $tmpSubStr;
7081
                }
7082
            }
7083
        }
7084
7085 5
        return $shuffledStr;
7086
    }
7087
7088
    /**
7089
     * Returns the substring beginning at $start, and up to, but not including
7090
     * the index specified by $end. If $end is omitted, the function extracts
7091
     * the remaining string. If $end is negative, it is computed from the end
7092
     * of the string.
7093
     *
7094
     * @param string $str
7095
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7096
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7097
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7098
     *
7099
     * @return false|string
7100
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7101
     *                      characters long, <b>FALSE</b> will be returned.
7102
     */
7103 18
    public static function str_slice(
7104
        string $str,
7105
        int $start,
7106
        int $end = null,
7107
        string $encoding = 'UTF-8'
7108
    ) {
7109 18
        if ($encoding === 'UTF-8') {
7110 7
            if ($end === null) {
7111 1
                $length = (int) \mb_strlen($str);
7112 6
            } elseif ($end >= 0 && $end <= $start) {
7113 2
                return '';
7114 4
            } elseif ($end < 0) {
7115 1
                $length = (int) \mb_strlen($str) + $end - $start;
7116
            } else {
7117 3
                $length = $end - $start;
7118
            }
7119
7120 5
            return \mb_substr($str, $start, $length);
7121
        }
7122
7123 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7124
7125 11
        if ($end === null) {
7126 5
            $length = (int) self::strlen($str, $encoding);
7127 6
        } elseif ($end >= 0 && $end <= $start) {
7128 2
            return '';
7129 4
        } elseif ($end < 0) {
7130 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7131
        } else {
7132 3
            $length = $end - $start;
7133
        }
7134
7135 9
        return self::substr($str, $start, $length, $encoding);
7136
    }
7137
7138
    /**
7139
     * Convert a string to e.g.: "snake_case"
7140
     *
7141
     * @param string $str
7142
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7143
     *
7144
     * @return string string in snake_case
7145
     */
7146 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7147
    {
7148 20
        if ($str === '') {
7149
            return '';
7150
        }
7151
7152 20
        $str = \str_replace(
7153 20
            '-',
7154 20
            '_',
7155 20
            self::normalize_whitespace($str)
7156
        );
7157
7158 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7159 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7160
        }
7161
7162 20
        $str = (string) \preg_replace_callback(
7163 20
            '/([\d|\p{Lu}])/u',
7164
            /**
7165
             * @param string[] $matches
7166
             *
7167
             * @return string
7168
             */
7169
            static function (array $matches) use ($encoding): string {
7170 9
                $match = $matches[1];
7171 9
                $matchInt = (int) $match;
7172
7173 9
                if ((string) $matchInt === $match) {
7174 4
                    return '_' . $match . '_';
7175
                }
7176
7177 5
                if ($encoding === 'UTF-8') {
7178 5
                    return '_' . \mb_strtolower($match);
7179
                }
7180
7181
                return '_' . self::strtolower($match, $encoding);
7182 20
            },
7183 20
            $str
7184
        );
7185
7186 20
        $str = (string) \preg_replace(
7187
            [
7188 20
                '/\s+/u',        // convert spaces to "_"
7189
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7190
                '/_+/',         // remove double "_"
7191
            ],
7192
            [
7193 20
                '_',
7194
                '',
7195
                '_',
7196
            ],
7197 20
            $str
7198
        );
7199
7200 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7201
    }
7202
7203
    /**
7204
     * Sort all characters according to code points.
7205
     *
7206
     * @param string $str    <p>A UTF-8 string.</p>
7207
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7208
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7209
     *
7210
     * @return string string of sorted characters
7211
     */
7212 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7213
    {
7214 2
        $array = self::codepoints($str);
7215
7216 2
        if ($unique) {
7217 2
            $array = \array_flip(\array_flip($array));
7218
        }
7219
7220 2
        if ($desc) {
7221 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7221
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7222
        } else {
7223 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7223
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7224
        }
7225
7226 2
        return self::string($array);
7227
    }
7228
7229
    /**
7230
     * alias for "UTF8::str_split()"
7231
     *
7232
     * @see UTF8::str_split()
7233
     *
7234
     * @param string|string[] $str
7235
     * @param int             $length
7236
     * @param bool            $cleanUtf8
7237
     *
7238
     * @return string[]
7239
     */
7240 9
    public static function split(
7241
        $str,
7242
        int $length = 1,
7243
        bool $cleanUtf8 = false
7244
    ): array {
7245 9
        return self::str_split($str, $length, $cleanUtf8);
7246
    }
7247
7248
    /**
7249
     * Splits the string with the provided regular expression, returning an
7250
     * array of Stringy objects. An optional integer $limit will truncate the
7251
     * results.
7252
     *
7253
     * @param string $str
7254
     * @param string $pattern <p>The regex with which to split the string.</p>
7255
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7256
     *
7257
     * @return string[] an array of strings
7258
     */
7259 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7260
    {
7261 16
        if ($limit === 0) {
7262 2
            return [];
7263
        }
7264
7265 14
        if ($pattern === '') {
7266 1
            return [$str];
7267
        }
7268
7269 13
        if (self::$SUPPORT['mbstring'] === true) {
7270 13
            if ($limit >= 0) {
7271
                /** @noinspection PhpComposerExtensionStubsInspection */
7272 8
                $resultTmp = \mb_split($pattern, $str);
7273
7274 8
                $result = [];
7275 8
                foreach ($resultTmp as $itemTmp) {
7276 8
                    if ($limit === 0) {
7277 4
                        break;
7278
                    }
7279 8
                    --$limit;
7280
7281 8
                    $result[] = $itemTmp;
7282
                }
7283
7284 8
                return $result;
7285
            }
7286
7287
            /** @noinspection PhpComposerExtensionStubsInspection */
7288 5
            return \mb_split($pattern, $str);
7289
        }
7290
7291
        if ($limit > 0) {
7292
            ++$limit;
7293
        } else {
7294
            $limit = -1;
7295
        }
7296
7297
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7298
7299
        if ($array === false) {
7300
            return [];
7301
        }
7302
7303
        if ($limit > 0 && \count($array) === $limit) {
7304
            \array_pop($array);
7305
        }
7306
7307
        return $array;
7308
    }
7309
7310
    /**
7311
     * Check if the string starts with the given substring.
7312
     *
7313
     * @param string $haystack <p>The string to search in.</p>
7314
     * @param string $needle   <p>The substring to search for.</p>
7315
     *
7316
     * @return bool
7317
     */
7318 19
    public static function str_starts_with(string $haystack, string $needle): bool
7319
    {
7320 19
        return \strpos($haystack, $needle) === 0;
7321
    }
7322
7323
    /**
7324
     * Returns true if the string begins with any of $substrings, false otherwise.
7325
     *
7326
     * - case-sensitive
7327
     *
7328
     * @param string $str        <p>The input string.</p>
7329
     * @param array  $substrings <p>Substrings to look for.</p>
7330
     *
7331
     * @return bool whether or not $str starts with $substring
7332
     */
7333 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7334
    {
7335 8
        if ($str === '') {
7336
            return false;
7337
        }
7338
7339 8
        if ($substrings === []) {
7340
            return false;
7341
        }
7342
7343 8
        foreach ($substrings as &$substring) {
7344 8
            if (self::str_starts_with($str, $substring)) {
7345 8
                return true;
7346
            }
7347
        }
7348
7349 6
        return false;
7350
    }
7351
7352
    /**
7353
     * Gets the substring after the first occurrence of a separator.
7354
     *
7355
     * @param string $str       <p>The input string.</p>
7356
     * @param string $separator <p>The string separator.</p>
7357
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7358
     *
7359
     * @return string
7360
     */
7361 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7362
    {
7363 1
        if ($separator === '' || $str === '') {
7364 1
            return '';
7365
        }
7366
7367 1
        if ($encoding === 'UTF-8') {
7368 1
            $offset = \mb_strpos($str, $separator);
7369 1
            if ($offset === false) {
7370 1
                return '';
7371
            }
7372
7373 1
            return (string) \mb_substr(
7374 1
                $str,
7375 1
                $offset + (int) \mb_strlen($separator)
7376
            );
7377
        }
7378
7379
        $offset = self::strpos($str, $separator, 0, $encoding);
7380
        if ($offset === false) {
7381
            return '';
7382
        }
7383
7384
        return (string) \mb_substr(
7385
            $str,
7386
            $offset + (int) self::strlen($separator, $encoding),
7387
            null,
7388
            $encoding
7389
        );
7390
    }
7391
7392
    /**
7393
     * Gets the substring after the last occurrence of a separator.
7394
     *
7395
     * @param string $str       <p>The input string.</p>
7396
     * @param string $separator <p>The string separator.</p>
7397
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7398
     *
7399
     * @return string
7400
     */
7401 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7402
    {
7403 1
        if ($separator === '' || $str === '') {
7404 1
            return '';
7405
        }
7406
7407 1
        if ($encoding === 'UTF-8') {
7408 1
            $offset = \mb_strrpos($str, $separator);
7409 1
            if ($offset === false) {
7410 1
                return '';
7411
            }
7412
7413 1
            return (string) \mb_substr(
7414 1
                $str,
7415 1
                $offset + (int) \mb_strlen($separator)
7416
            );
7417
        }
7418
7419
        $offset = self::strrpos($str, $separator, 0, $encoding);
7420
        if ($offset === false) {
7421
            return '';
7422
        }
7423
7424
        return (string) self::substr(
7425
            $str,
7426
            $offset + (int) self::strlen($separator, $encoding),
7427
            null,
7428
            $encoding
7429
        );
7430
    }
7431
7432
    /**
7433
     * Gets the substring before the first occurrence of a separator.
7434
     *
7435
     * @param string $str       <p>The input string.</p>
7436
     * @param string $separator <p>The string separator.</p>
7437
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7438
     *
7439
     * @return string
7440
     */
7441 1
    public static function str_substr_before_first_separator(
7442
        string $str,
7443
        string $separator,
7444
        string $encoding = 'UTF-8'
7445
    ): string {
7446 1
        if ($separator === '' || $str === '') {
7447 1
            return '';
7448
        }
7449
7450 1
        if ($encoding === 'UTF-8') {
7451 1
            $offset = \mb_strpos($str, $separator);
7452 1
            if ($offset === false) {
7453 1
                return '';
7454
            }
7455
7456 1
            return (string) \mb_substr(
7457 1
                $str,
7458 1
                0,
7459 1
                $offset
7460
            );
7461
        }
7462
7463
        $offset = self::strpos($str, $separator, 0, $encoding);
7464
        if ($offset === false) {
7465
            return '';
7466
        }
7467
7468
        return (string) self::substr(
7469
            $str,
7470
            0,
7471
            $offset,
7472
            $encoding
7473
        );
7474
    }
7475
7476
    /**
7477
     * Gets the substring before the last occurrence of a separator.
7478
     *
7479
     * @param string $str       <p>The input string.</p>
7480
     * @param string $separator <p>The string separator.</p>
7481
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7482
     *
7483
     * @return string
7484
     */
7485 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7486
    {
7487 1
        if ($separator === '' || $str === '') {
7488 1
            return '';
7489
        }
7490
7491 1
        if ($encoding === 'UTF-8') {
7492 1
            $offset = \mb_strrpos($str, $separator);
7493 1
            if ($offset === false) {
7494 1
                return '';
7495
            }
7496
7497 1
            return (string) \mb_substr(
7498 1
                $str,
7499 1
                0,
7500 1
                $offset
7501
            );
7502
        }
7503
7504
        $offset = self::strrpos($str, $separator, 0, $encoding);
7505
        if ($offset === false) {
7506
            return '';
7507
        }
7508
7509
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7510
7511
        return (string) self::substr(
7512
            $str,
7513
            0,
7514
            $offset,
7515
            $encoding
7516
        );
7517
    }
7518
7519
    /**
7520
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7521
     *
7522
     * @param string $str          <p>The input string.</p>
7523
     * @param string $needle       <p>The string to look for.</p>
7524
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7525
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7526
     *
7527
     * @return string
7528
     */
7529 2
    public static function str_substr_first(
7530
        string $str,
7531
        string $needle,
7532
        bool $beforeNeedle = false,
7533
        string $encoding = 'UTF-8'
7534
    ): string {
7535 2
        if ($str === '' || $needle === '') {
7536 2
            return '';
7537
        }
7538
7539 2
        if ($encoding === 'UTF-8') {
7540 2
            if ($beforeNeedle === true) {
7541 1
                $part = \mb_strstr(
7542 1
                    $str,
7543 1
                    $needle,
7544 1
                    $beforeNeedle
7545
                );
7546
            } else {
7547 1
                $part = \mb_strstr(
7548 1
                    $str,
7549 2
                    $needle
7550
                );
7551
            }
7552
        } else {
7553
            $part = self::strstr(
7554
                $str,
7555
                $needle,
7556
                $beforeNeedle,
7557
                $encoding
7558
            );
7559
        }
7560
7561 2
        return $part === false ? '' : $part;
7562
    }
7563
7564
    /**
7565
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7566
     *
7567
     * @param string $str          <p>The input string.</p>
7568
     * @param string $needle       <p>The string to look for.</p>
7569
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7570
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7571
     *
7572
     * @return string
7573
     */
7574 2
    public static function str_substr_last(
7575
        string $str,
7576
        string $needle,
7577
        bool $beforeNeedle = false,
7578
        string $encoding = 'UTF-8'
7579
    ): string {
7580 2
        if ($str === '' || $needle === '') {
7581 2
            return '';
7582
        }
7583
7584 2
        if ($encoding === 'UTF-8') {
7585 2
            if ($beforeNeedle === true) {
7586 1
                $part = \mb_strrchr(
7587 1
                    $str,
7588 1
                    $needle,
7589 1
                    $beforeNeedle
7590
                );
7591
            } else {
7592 1
                $part = \mb_strrchr(
7593 1
                    $str,
7594 2
                    $needle
7595
                );
7596
            }
7597
        } else {
7598
            $part = self::strrchr(
7599
                $str,
7600
                $needle,
7601
                $beforeNeedle,
7602
                $encoding
7603
            );
7604
        }
7605
7606 2
        return $part === false ? '' : $part;
7607
    }
7608
7609
    /**
7610
     * Surrounds $str with the given substring.
7611
     *
7612
     * @param string $str
7613
     * @param string $substring <p>The substring to add to both sides.</P>
7614
     *
7615
     * @return string string with the substring both prepended and appended
7616
     */
7617 5
    public static function str_surround(string $str, string $substring): string
7618
    {
7619 5
        return $substring . $str . $substring;
7620
    }
7621
7622
    /**
7623
     * Returns a trimmed string with the first letter of each word capitalized.
7624
     * Also accepts an array, $ignore, allowing you to list words not to be
7625
     * capitalized.
7626
     *
7627
     * @param string              $str
7628
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7629
     *                                                   Default: null</p>
7630
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7631
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7632
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7633
     *                                                   tr</p>
7634
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7635
     *                                                   ß</p>
7636
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7637
     *
7638
     * @return string the titleized string
7639
     */
7640 5
    public static function str_titleize(
7641
        string $str,
7642
        array $ignore = null,
7643
        string $encoding = 'UTF-8',
7644
        bool $cleanUtf8 = false,
7645
        string $lang = null,
7646
        bool $tryToKeepStringLength = false,
7647
        bool $useTrimFirst = true
7648
    ): string {
7649 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7650 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7651
        }
7652
7653 5
        if ($useTrimFirst === true) {
7654 5
            $str = \trim($str);
7655
        }
7656
7657 5
        if ($cleanUtf8 === true) {
7658
            $str = self::clean($str);
7659
        }
7660
7661 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7662
7663 5
        return (string) \preg_replace_callback(
7664 5
            '/([\S]+)/u',
7665
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7666 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7667 2
                    return $match[0];
7668
                }
7669
7670 5
                if ($useMbFunction === true) {
7671 5
                    if ($encoding === 'UTF-8') {
7672 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7673 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7674
                    }
7675
7676
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7677
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7678
                }
7679
7680
                return self::ucfirst(
7681
                    self::strtolower(
7682
                        $match[0],
7683
                        $encoding,
7684
                        false,
7685
                        $lang,
7686
                        $tryToKeepStringLength
7687
                    ),
7688
                    $encoding,
7689
                    false,
7690
                    $lang,
7691
                    $tryToKeepStringLength
7692
                );
7693 5
            },
7694 5
            $str
7695
        );
7696
    }
7697
7698
    /**
7699
     * Returns a trimmed string in proper title case.
7700
     *
7701
     * Also accepts an array, $ignore, allowing you to list words not to be
7702
     * capitalized.
7703
     *
7704
     * Adapted from John Gruber's script.
7705
     *
7706
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7707
     *
7708
     * @param string $str
7709
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7710
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7711
     *
7712
     * @return string the titleized string
7713
     */
7714 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7715
    {
7716 35
        $smallWords = \array_merge(
7717
            [
7718 35
                '(?<!q&)a',
7719
                'an',
7720
                'and',
7721
                'as',
7722
                'at(?!&t)',
7723
                'but',
7724
                'by',
7725
                'en',
7726
                'for',
7727
                'if',
7728
                'in',
7729
                'of',
7730
                'on',
7731
                'or',
7732
                'the',
7733
                'to',
7734
                'v[.]?',
7735
                'via',
7736
                'vs[.]?',
7737
            ],
7738 35
            $ignore
7739
        );
7740
7741 35
        $smallWordsRx = \implode('|', $smallWords);
7742 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7743
7744 35
        $str = \trim($str);
7745
7746 35
        if (self::has_lowercase($str) === false) {
7747 2
            $str = self::strtolower($str, $encoding);
7748
        }
7749
7750
        // the main substitutions
7751 35
        $str = (string) \preg_replace_callback(
7752
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7753
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7754 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7755
                        |
7756 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7757
                        |
7758 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7759
                        |
7760 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7761
                      ) (_*) \b                                                           # 6. With trailing underscore
7762
                    ~ux',
7763
            /**
7764
             * @param string[] $matches
7765
             *
7766
             * @return string
7767
             */
7768
            static function (array $matches) use ($encoding): string {
7769
                // preserve leading underscore
7770 35
                $str = $matches[1];
7771 35
                if ($matches[2]) {
7772
                    // preserve URLs, domains, emails and file paths
7773 5
                    $str .= $matches[2];
7774 35
                } elseif ($matches[3]) {
7775
                    // lower-case small words
7776 25
                    $str .= self::strtolower($matches[3], $encoding);
7777 35
                } elseif ($matches[4]) {
7778
                    // capitalize word w/o internal caps
7779 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7780
                } else {
7781
                    // preserve other kinds of word (iPhone)
7782 7
                    $str .= $matches[5];
7783
                }
7784
                // Preserve trailing underscore
7785 35
                $str .= $matches[6];
7786
7787 35
                return $str;
7788 35
            },
7789 35
            $str
7790
        );
7791
7792
        // Exceptions for small words: capitalize at start of title...
7793 35
        $str = (string) \preg_replace_callback(
7794
            '~(  \A [[:punct:]]*                # start of title...
7795
                      |  [:.;?!][ ]+               # or of subsentence...
7796
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7797 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7798
                     ~uxi',
7799
            /**
7800
             * @param string[] $matches
7801
             *
7802
             * @return string
7803
             */
7804
            static function (array $matches) use ($encoding): string {
7805 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7806 35
            },
7807 35
            $str
7808
        );
7809
7810
        // ...and end of title
7811 35
        $str = (string) \preg_replace_callback(
7812 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7813
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7814
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7815
                     ~uxi',
7816
            /**
7817
             * @param string[] $matches
7818
             *
7819
             * @return string
7820
             */
7821
            static function (array $matches) use ($encoding): string {
7822 3
                return static::str_upper_first($matches[1], $encoding);
7823 35
            },
7824 35
            $str
7825
        );
7826
7827
        // Exceptions for small words in hyphenated compound words.
7828
        // e.g. "in-flight" -> In-Flight
7829 35
        $str = (string) \preg_replace_callback(
7830
            '~\b
7831
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7832 35
                        ( ' . $smallWordsRx . ' )
7833
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7834
                       ~uxi',
7835
            /**
7836
             * @param string[] $matches
7837
             *
7838
             * @return string
7839
             */
7840
            static function (array $matches) use ($encoding): string {
7841
                return static::str_upper_first($matches[1], $encoding);
7842 35
            },
7843 35
            $str
7844
        );
7845
7846
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7847 35
        $str = (string) \preg_replace_callback(
7848
            '~\b
7849
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7850
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7851 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7852
                      (?!	- )                   # Negative lookahead for another -
7853
                     ~uxi',
7854
            /**
7855
             * @param string[] $matches
7856
             *
7857
             * @return string
7858
             */
7859
            static function (array $matches) use ($encoding): string {
7860
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7861 35
            },
7862 35
            $str
7863
        );
7864
7865 35
        return $str;
7866
    }
7867
7868
    /**
7869
     * Get a binary representation of a specific string.
7870
     *
7871
     * @param string $str <p>The input string.</p>
7872
     *
7873
     * @return string
7874
     */
7875 2
    public static function str_to_binary(string $str): string
7876
    {
7877 2
        $value = \unpack('H*', $str);
7878
7879 2
        return \base_convert($value[1], 16, 2);
7880
    }
7881
7882
    /**
7883
     * @param string   $str
7884
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7885
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7886
     *
7887
     * @return string[]
7888
     */
7889 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7890
    {
7891 17
        if ($str === '') {
7892 1
            return $removeEmptyValues === true ? [] : [''];
7893
        }
7894
7895 16
        if (self::$SUPPORT['mbstring'] === true) {
7896
            /** @noinspection PhpComposerExtensionStubsInspection */
7897 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7898
        } else {
7899
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7900
        }
7901
7902 16
        if ($return === false) {
7903
            return $removeEmptyValues === true ? [] : [''];
7904
        }
7905
7906
        if (
7907 16
            $removeShortValues === null
7908
            &&
7909 16
            $removeEmptyValues === false
7910
        ) {
7911 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7912
        }
7913
7914
        return self::reduce_string_array(
7915
            $return,
7916
            $removeEmptyValues,
7917
            $removeShortValues
7918
        );
7919
    }
7920
7921
    /**
7922
     * Convert a string into an array of words.
7923
     *
7924
     * @param string   $str
7925
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7926
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7927
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7928
     *
7929
     * @return string[]
7930
     */
7931 13
    public static function str_to_words(
7932
        string $str,
7933
        string $charList = '',
7934
        bool $removeEmptyValues = false,
7935
        int $removeShortValues = null
7936
    ): array {
7937 13
        if ($str === '') {
7938 4
            return $removeEmptyValues === true ? [] : [''];
7939
        }
7940
7941 13
        $charList = self::rxClass($charList, '\pL');
7942
7943 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7944 13
        if ($return === false) {
7945
            return $removeEmptyValues === true ? [] : [''];
7946
        }
7947
7948
        if (
7949 13
            $removeShortValues === null
7950
            &&
7951 13
            $removeEmptyValues === false
7952
        ) {
7953 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7954
        }
7955
7956 2
        $tmpReturn = self::reduce_string_array(
7957 2
            $return,
7958 2
            $removeEmptyValues,
7959 2
            $removeShortValues
7960
        );
7961
7962 2
        foreach ($tmpReturn as &$item) {
7963 2
            $item = (string) $item;
7964
        }
7965
7966 2
        return $tmpReturn;
7967
    }
7968
7969
    /**
7970
     * alias for "UTF8::to_ascii()"
7971
     *
7972
     * @see UTF8::to_ascii()
7973
     *
7974
     * @param string $str
7975
     * @param string $unknown
7976
     * @param bool   $strict
7977
     *
7978
     * @return string
7979
     */
7980 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7981
    {
7982 8
        return self::to_ascii($str, $unknown, $strict);
7983
    }
7984
7985
    /**
7986
     * Truncates the string to a given length. If $substring is provided, and
7987
     * truncating occurs, the string is further truncated so that the substring
7988
     * may be appended without exceeding the desired length.
7989
     *
7990
     * @param string $str
7991
     * @param int    $length    <p>Desired length of the truncated string.</p>
7992
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7993
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7994
     *
7995
     * @return string string after truncating
7996
     */
7997 22
    public static function str_truncate(
7998
        string $str,
7999
        int $length,
8000
        string $substring = '',
8001
        string $encoding = 'UTF-8'
8002
    ): string {
8003 22
        if ($str === '') {
8004
            return '';
8005
        }
8006
8007 22
        if ($encoding === 'UTF-8') {
8008 10
            if ($length >= (int) \mb_strlen($str)) {
8009 2
                return $str;
8010
            }
8011
8012 8
            if ($substring !== '') {
8013 4
                $length -= (int) \mb_strlen($substring);
8014
8015
                /** @noinspection UnnecessaryCastingInspection */
8016 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8017
            }
8018
8019
            /** @noinspection UnnecessaryCastingInspection */
8020 4
            return (string) \mb_substr($str, 0, $length);
8021
        }
8022
8023 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8024
8025 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8026 2
            return $str;
8027
        }
8028
8029 10
        if ($substring !== '') {
8030 6
            $length -= (int) self::strlen($substring, $encoding);
8031
        }
8032
8033
        return (
8034 10
            (string) self::substr(
8035 10
                $str,
8036 10
                0,
8037 10
                $length,
8038 10
                $encoding
8039
            )
8040 10
       ) . $substring;
8041
    }
8042
8043
    /**
8044
     * Truncates the string to a given length, while ensuring that it does not
8045
     * split words. If $substring is provided, and truncating occurs, the
8046
     * string is further truncated so that the substring may be appended without
8047
     * exceeding the desired length.
8048
     *
8049
     * @param string $str
8050
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8051
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8052
     *                                                ''</p>
8053
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8054
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8055
     *
8056
     * @return string string after truncating
8057
     */
8058 47
    public static function str_truncate_safe(
8059
        string $str,
8060
        int $length,
8061
        string $substring = '',
8062
        string $encoding = 'UTF-8',
8063
        bool $ignoreDoNotSplitWordsForOneWord = false
8064
    ): string {
8065 47
        if ($str === '' || $length <= 0) {
8066 1
            return $substring;
8067
        }
8068
8069 47
        if ($encoding === 'UTF-8') {
8070 21
            if ($length >= (int) \mb_strlen($str)) {
8071 5
                return $str;
8072
            }
8073
8074
            // need to further trim the string so we can append the substring
8075 17
            $length -= (int) \mb_strlen($substring);
8076 17
            if ($length <= 0) {
8077 1
                return $substring;
8078
            }
8079
8080 17
            $truncated = \mb_substr($str, 0, $length);
8081
8082 17
            if ($truncated === false) {
8083
                return '';
8084
            }
8085
8086
            // if the last word was truncated
8087 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8088 17
            if ($strPosSpace !== $length) {
8089
                // find pos of the last occurrence of a space, get up to that
8090 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8091
8092
                if (
8093 13
                    $lastPos !== false
8094
                    ||
8095 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8096
                ) {
8097 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8098
                }
8099
            }
8100
        } else {
8101 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8102
8103 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8104 4
                return $str;
8105
            }
8106
8107
            // need to further trim the string so we can append the substring
8108 22
            $length -= (int) self::strlen($substring, $encoding);
8109 22
            if ($length <= 0) {
8110
                return $substring;
8111
            }
8112
8113 22
            $truncated = self::substr($str, 0, $length, $encoding);
8114
8115 22
            if ($truncated === false) {
8116
                return '';
8117
            }
8118
8119
            // if the last word was truncated
8120 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8121 22
            if ($strPosSpace !== $length) {
8122
                // find pos of the last occurrence of a space, get up to that
8123 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8124
8125
                if (
8126 12
                    $lastPos !== false
8127
                    ||
8128 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8129
                ) {
8130 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8131
                }
8132
            }
8133
        }
8134
8135 39
        return $truncated . $substring;
8136
    }
8137
8138
    /**
8139
     * Returns a lowercase and trimmed string separated by underscores.
8140
     * Underscores are inserted before uppercase characters (with the exception
8141
     * of the first character of the string), and in place of spaces as well as
8142
     * dashes.
8143
     *
8144
     * @param string $str
8145
     *
8146
     * @return string the underscored string
8147
     */
8148 16
    public static function str_underscored(string $str): string
8149
    {
8150 16
        return self::str_delimit($str, '_');
8151
    }
8152
8153
    /**
8154
     * Returns an UpperCamelCase version of the supplied string. It trims
8155
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8156
     * and underscores, and removes spaces, dashes, underscores.
8157
     *
8158
     * @param string      $str                   <p>The input string.</p>
8159
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8160
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8161
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8162
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8163
     *
8164
     * @return string string in UpperCamelCase
8165
     */
8166 13
    public static function str_upper_camelize(
8167
        string $str,
8168
        string $encoding = 'UTF-8',
8169
        bool $cleanUtf8 = false,
8170
        string $lang = null,
8171
        bool $tryToKeepStringLength = false
8172
    ): string {
8173 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8174
    }
8175
8176
    /**
8177
     * alias for "UTF8::ucfirst()"
8178
     *
8179
     * @see UTF8::ucfirst()
8180
     *
8181
     * @param string      $str
8182
     * @param string      $encoding
8183
     * @param bool        $cleanUtf8
8184
     * @param string|null $lang
8185
     * @param bool        $tryToKeepStringLength
8186
     *
8187
     * @return string
8188
     */
8189 39
    public static function str_upper_first(
8190
        string $str,
8191
        string $encoding = 'UTF-8',
8192
        bool $cleanUtf8 = false,
8193
        string $lang = null,
8194
        bool $tryToKeepStringLength = false
8195
    ): string {
8196 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8197
    }
8198
8199
    /**
8200
     * Counts number of words in the UTF-8 string.
8201
     *
8202
     * @param string $str      <p>The input string.</p>
8203
     * @param int    $format   [optional] <p>
8204
     *                         <strong>0</strong> => return a number of words (default)<br>
8205
     *                         <strong>1</strong> => return an array of words<br>
8206
     *                         <strong>2</strong> => return an array of words with word-offset as key
8207
     *                         </p>
8208
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8209
     *
8210
     * @return int|string[] The number of words in the string
8211
     */
8212 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8213
    {
8214 2
        $strParts = self::str_to_words($str, $charlist);
8215
8216 2
        $len = \count($strParts);
8217
8218 2
        if ($format === 1) {
8219 2
            $numberOfWords = [];
8220 2
            for ($i = 1; $i < $len; $i += 2) {
8221 2
                $numberOfWords[] = $strParts[$i];
8222
            }
8223 2
        } elseif ($format === 2) {
8224 2
            $numberOfWords = [];
8225 2
            $offset = (int) self::strlen($strParts[0]);
8226 2
            for ($i = 1; $i < $len; $i += 2) {
8227 2
                $numberOfWords[$offset] = $strParts[$i];
8228 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8229
            }
8230
        } else {
8231 2
            $numberOfWords = (int) (($len - 1) / 2);
8232
        }
8233
8234 2
        return $numberOfWords;
8235
    }
8236
8237
    /**
8238
     * Case-insensitive string comparison.
8239
     *
8240
     * INFO: Case-insensitive version of UTF8::strcmp()
8241
     *
8242
     * @param string $str1     <p>The first string.</p>
8243
     * @param string $str2     <p>The second string.</p>
8244
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8245
     *
8246
     * @return int
8247
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8248
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8249
     *             <strong>0</strong> if they are equal
8250
     */
8251 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8252
    {
8253 23
        return self::strcmp(
8254 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8255 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8256
        );
8257
    }
8258
8259
    /**
8260
     * alias for "UTF8::strstr()"
8261
     *
8262
     * @see UTF8::strstr()
8263
     *
8264
     * @param string $haystack
8265
     * @param string $needle
8266
     * @param bool   $before_needle
8267
     * @param string $encoding
8268
     * @param bool   $cleanUtf8
8269
     *
8270
     * @return false|string
8271
     */
8272 2
    public static function strchr(
8273
        string $haystack,
8274
        string $needle,
8275
        bool $before_needle = false,
8276
        string $encoding = 'UTF-8',
8277
        bool $cleanUtf8 = false
8278
    ) {
8279 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8280
    }
8281
8282
    /**
8283
     * Case-sensitive string comparison.
8284
     *
8285
     * @param string $str1 <p>The first string.</p>
8286
     * @param string $str2 <p>The second string.</p>
8287
     *
8288
     * @return int
8289
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8290
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8291
     *             <strong>0</strong> if they are equal
8292
     */
8293 29
    public static function strcmp(string $str1, string $str2): int
8294
    {
8295 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8296 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8297 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8298
        );
8299
    }
8300
8301
    /**
8302
     * Find length of initial segment not matching mask.
8303
     *
8304
     * @param string $str
8305
     * @param string $charList
8306
     * @param int    $offset
8307
     * @param int    $length
8308
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8309
     *
8310
     * @return int
8311
     */
8312 12
    public static function strcspn(
8313
        string $str,
8314
        string $charList,
8315
        int $offset = null,
8316
        int $length = null,
8317
        string $encoding = 'UTF-8'
8318
    ): int {
8319 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8320
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8321
        }
8322
8323 12
        if ($charList === '') {
8324 2
            return (int) self::strlen($str, $encoding);
8325
        }
8326
8327 11
        if ($offset !== null || $length !== null) {
8328 3
            if ($encoding === 'UTF-8') {
8329 3
                if ($length === null) {
8330
                    /** @noinspection UnnecessaryCastingInspection */
8331 2
                    $strTmp = \mb_substr($str, (int) $offset);
8332
                } else {
8333
                    /** @noinspection UnnecessaryCastingInspection */
8334 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8335
                }
8336
            } else {
8337
                /** @noinspection UnnecessaryCastingInspection */
8338
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8339
            }
8340 3
            if ($strTmp === false) {
8341
                return 0;
8342
            }
8343 3
            $str = $strTmp;
8344
        }
8345
8346 11
        if ($str === '') {
8347 2
            return 0;
8348
        }
8349
8350 10
        $matches = [];
8351 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8352 9
            $return = self::strlen($matches[1], $encoding);
8353 9
            if ($return === false) {
8354
                return 0;
8355
            }
8356
8357 9
            return $return;
8358
        }
8359
8360 2
        return (int) self::strlen($str, $encoding);
8361
    }
8362
8363
    /**
8364
     * alias for "UTF8::stristr()"
8365
     *
8366
     * @see UTF8::stristr()
8367
     *
8368
     * @param string $haystack
8369
     * @param string $needle
8370
     * @param bool   $before_needle
8371
     * @param string $encoding
8372
     * @param bool   $cleanUtf8
8373
     *
8374
     * @return false|string
8375
     */
8376 1
    public static function strichr(
8377
        string $haystack,
8378
        string $needle,
8379
        bool $before_needle = false,
8380
        string $encoding = 'UTF-8',
8381
        bool $cleanUtf8 = false
8382
    ) {
8383 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8384
    }
8385
8386
    /**
8387
     * Create a UTF-8 string from code points.
8388
     *
8389
     * INFO: opposite to UTF8::codepoints()
8390
     *
8391
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8392
     *
8393
     * @return string UTF-8 encoded string
8394
     */
8395 4
    public static function string(array $array): string
8396
    {
8397 4
        return \implode(
8398 4
            '',
8399 4
            \array_map(
8400
                [
8401 4
                    self::class,
8402
                    'chr',
8403
                ],
8404 4
                $array
8405
            )
8406
        );
8407
    }
8408
8409
    /**
8410
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8411
     *
8412
     * @param string $str <p>The input string.</p>
8413
     *
8414
     * @return bool
8415
     *              <strong>true</strong> if the string has BOM at the start,<br>
8416
     *              <strong>false</strong> otherwise
8417
     */
8418 6
    public static function string_has_bom(string $str): bool
8419
    {
8420
        /** @noinspection PhpUnusedLocalVariableInspection */
8421 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8422 6
            if (\strpos($str, $bomString) === 0) {
8423 6
                return true;
8424
            }
8425
        }
8426
8427 6
        return false;
8428
    }
8429
8430
    /**
8431
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8432
     *
8433
     * @see http://php.net/manual/en/function.strip-tags.php
8434
     *
8435
     * @param string $str            <p>
8436
     *                               The input string.
8437
     *                               </p>
8438
     * @param string $allowable_tags [optional] <p>
8439
     *                               You can use the optional second parameter to specify tags which should
8440
     *                               not be stripped.
8441
     *                               </p>
8442
     *                               <p>
8443
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8444
     *                               can not be changed with allowable_tags.
8445
     *                               </p>
8446
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8447
     *
8448
     * @return string the stripped string
8449
     */
8450 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8451
    {
8452 4
        if ($str === '') {
8453 1
            return '';
8454
        }
8455
8456 4
        if ($cleanUtf8 === true) {
8457 2
            $str = self::clean($str);
8458
        }
8459
8460 4
        if ($allowable_tags === null) {
8461 4
            return \strip_tags($str);
8462
        }
8463
8464 2
        return \strip_tags($str, $allowable_tags);
8465
    }
8466
8467
    /**
8468
     * Strip all whitespace characters. This includes tabs and newline
8469
     * characters, as well as multibyte whitespace such as the thin space
8470
     * and ideographic space.
8471
     *
8472
     * @param string $str
8473
     *
8474
     * @return string
8475
     */
8476 36
    public static function strip_whitespace(string $str): string
8477
    {
8478 36
        if ($str === '') {
8479 3
            return '';
8480
        }
8481
8482 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8483
    }
8484
8485
    /**
8486
     * Finds position of first occurrence of a string within another, case insensitive.
8487
     *
8488
     * @see http://php.net/manual/en/function.mb-stripos.php
8489
     *
8490
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8491
     * @param string $needle    <p>The string to find in haystack.</p>
8492
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8493
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8494
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8495
     *
8496
     * @return false|int
8497
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8498
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8499
     */
8500 24
    public static function stripos(
8501
        string $haystack,
8502
        string $needle,
8503
        int $offset = 0,
8504
        $encoding = 'UTF-8',
8505
        bool $cleanUtf8 = false
8506
    ) {
8507 24
        if ($haystack === '' || $needle === '') {
8508 5
            return false;
8509
        }
8510
8511 23
        if ($cleanUtf8 === true) {
8512
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8513
            // if invalid characters are found in $haystack before $needle
8514 1
            $haystack = self::clean($haystack);
8515 1
            $needle = self::clean($needle);
8516
        }
8517
8518 23
        if (self::$SUPPORT['mbstring'] === true) {
8519 23
            if ($encoding === 'UTF-8') {
8520 23
                return \mb_stripos($haystack, $needle, $offset);
8521
            }
8522
8523 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8524
8525 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8526
        }
8527
8528 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8529
8530
        if (
8531 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8532
            &&
8533 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8534
            &&
8535 2
            self::$SUPPORT['intl'] === true
8536
        ) {
8537
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8538
            if ($returnTmp !== false) {
8539
                return $returnTmp;
8540
            }
8541
        }
8542
8543
        //
8544
        // fallback for ascii only
8545
        //
8546
8547 2
        if (self::is_ascii($haystack . $needle)) {
8548
            return \stripos($haystack, $needle, $offset);
8549
        }
8550
8551
        //
8552
        // fallback via vanilla php
8553
        //
8554
8555 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8556 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8557
8558 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8559
    }
8560
8561
    /**
8562
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8563
     *
8564
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8565
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8566
     * @param bool   $before_needle [optional] <p>
8567
     *                              If <b>TRUE</b>, it returns the part of the
8568
     *                              haystack before the first occurrence of the needle (excluding the needle).
8569
     *                              </p>
8570
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8571
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8572
     *
8573
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8574
     */
8575 12
    public static function stristr(
8576
        string $haystack,
8577
        string $needle,
8578
        bool $before_needle = false,
8579
        string $encoding = 'UTF-8',
8580
        bool $cleanUtf8 = false
8581
    ) {
8582 12
        if ($haystack === '' || $needle === '') {
8583 3
            return false;
8584
        }
8585
8586 9
        if ($cleanUtf8 === true) {
8587
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8588
            // if invalid characters are found in $haystack before $needle
8589 1
            $needle = self::clean($needle);
8590 1
            $haystack = self::clean($haystack);
8591
        }
8592
8593 9
        if (!$needle) {
8594
            return $haystack;
8595
        }
8596
8597 9
        if (self::$SUPPORT['mbstring'] === true) {
8598 9
            if ($encoding === 'UTF-8') {
8599 9
                return \mb_stristr($haystack, $needle, $before_needle);
8600
            }
8601
8602 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8603
8604 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8605
        }
8606
8607
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8608
8609
        if (
8610
            $encoding !== 'UTF-8'
8611
            &&
8612
            self::$SUPPORT['mbstring'] === false
8613
        ) {
8614
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8615
        }
8616
8617
        if (
8618
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8619
            &&
8620
            self::$SUPPORT['intl'] === true
8621
        ) {
8622
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8623
            if ($returnTmp !== false) {
8624
                return $returnTmp;
8625
            }
8626
        }
8627
8628
        if (self::is_ascii($needle . $haystack)) {
8629
            return \stristr($haystack, $needle, $before_needle);
8630
        }
8631
8632
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8633
8634
        if (!isset($match[1])) {
8635
            return false;
8636
        }
8637
8638
        if ($before_needle) {
8639
            return $match[1];
8640
        }
8641
8642
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8643
    }
8644
8645
    /**
8646
     * Get the string length, not the byte-length!
8647
     *
8648
     * @see     http://php.net/manual/en/function.mb-strlen.php
8649
     *
8650
     * @param string $str       <p>The string being checked for length.</p>
8651
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8652
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8653
     *
8654
     * @return false|int
8655
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8656
     *                   $encoding.
8657
     *                   (One multi-byte character counted as +1).
8658
     *                   <br>
8659
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8660
     *                   chars.
8661
     */
8662 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8663
    {
8664 173
        if ($str === '') {
8665 21
            return 0;
8666
        }
8667
8668 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8669 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8670
        }
8671
8672 171
        if ($cleanUtf8 === true) {
8673
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8674
            // if invalid characters are found in $str
8675 4
            $str = self::clean($str);
8676
        }
8677
8678
        //
8679
        // fallback via mbstring
8680
        //
8681
8682 171
        if (self::$SUPPORT['mbstring'] === true) {
8683 165
            if ($encoding === 'UTF-8') {
8684 165
                return \mb_strlen($str);
8685
            }
8686
8687 4
            return \mb_strlen($str, $encoding);
8688
        }
8689
8690
        //
8691
        // fallback for binary || ascii only
8692
        //
8693
8694
        if (
8695 8
            $encoding === 'CP850'
8696
            ||
8697 8
            $encoding === 'ASCII'
8698
        ) {
8699
            return \strlen($str);
8700
        }
8701
8702
        if (
8703 8
            $encoding !== 'UTF-8'
8704
            &&
8705 8
            self::$SUPPORT['mbstring'] === false
8706
            &&
8707 8
            self::$SUPPORT['iconv'] === false
8708
        ) {
8709 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8710
        }
8711
8712
        //
8713
        // fallback via iconv
8714
        //
8715
8716 8
        if (self::$SUPPORT['iconv'] === true) {
8717
            $returnTmp = \iconv_strlen($str, $encoding);
8718
            if ($returnTmp !== false) {
8719
                return $returnTmp;
8720
            }
8721
        }
8722
8723
        //
8724
        // fallback via intl
8725
        //
8726
8727
        if (
8728 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8729
            &&
8730 8
            self::$SUPPORT['intl'] === true
8731
        ) {
8732
            $returnTmp = \grapheme_strlen($str);
8733
            if ($returnTmp !== null) {
8734
                return $returnTmp;
8735
            }
8736
        }
8737
8738
        //
8739
        // fallback for ascii only
8740
        //
8741
8742 8
        if (self::is_ascii($str)) {
8743 4
            return \strlen($str);
8744
        }
8745
8746
        //
8747
        // fallback via vanilla php
8748
        //
8749
8750 8
        \preg_match_all('/./us', $str, $parts);
8751
8752 8
        $returnTmp = \count($parts[0]);
8753 8
        if ($returnTmp === 0) {
8754
            return false;
8755
        }
8756
8757 8
        return $returnTmp;
8758
    }
8759
8760
    /**
8761
     * Get string length in byte.
8762
     *
8763
     * @param string $str
8764
     *
8765
     * @return int
8766
     */
8767
    public static function strlen_in_byte(string $str): int
8768
    {
8769
        if ($str === '') {
8770
            return 0;
8771
        }
8772
8773
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8774
            // "mb_" is available if overload is used, so use it ...
8775
            return \mb_strlen($str, 'CP850'); // 8-BIT
8776
        }
8777
8778
        return \strlen($str);
8779
    }
8780
8781
    /**
8782
     * Case insensitive string comparisons using a "natural order" algorithm.
8783
     *
8784
     * INFO: natural order version of UTF8::strcasecmp()
8785
     *
8786
     * @param string $str1     <p>The first string.</p>
8787
     * @param string $str2     <p>The second string.</p>
8788
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8789
     *
8790
     * @return int
8791
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8792
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8793
     *             <strong>0</strong> if they are equal
8794
     */
8795 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8796
    {
8797 2
        return self::strnatcmp(
8798 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8799 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8800
        );
8801
    }
8802
8803
    /**
8804
     * String comparisons using a "natural order" algorithm
8805
     *
8806
     * INFO: natural order version of UTF8::strcmp()
8807
     *
8808
     * @see  http://php.net/manual/en/function.strnatcmp.php
8809
     *
8810
     * @param string $str1 <p>The first string.</p>
8811
     * @param string $str2 <p>The second string.</p>
8812
     *
8813
     * @return int
8814
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8815
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8816
     *             <strong>0</strong> if they are equal
8817
     */
8818 4
    public static function strnatcmp(string $str1, string $str2): int
8819
    {
8820 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8821
    }
8822
8823
    /**
8824
     * Case-insensitive string comparison of the first n characters.
8825
     *
8826
     * @see  http://php.net/manual/en/function.strncasecmp.php
8827
     *
8828
     * @param string $str1     <p>The first string.</p>
8829
     * @param string $str2     <p>The second string.</p>
8830
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8831
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8832
     *
8833
     * @return int
8834
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8835
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8836
     *             <strong>0</strong> if they are equal
8837
     */
8838 2
    public static function strncasecmp(
8839
        string $str1,
8840
        string $str2,
8841
        int $len,
8842
        string $encoding = 'UTF-8'
8843
    ): int {
8844 2
        return self::strncmp(
8845 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8846 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8847 2
            $len
8848
        );
8849
    }
8850
8851
    /**
8852
     * String comparison of the first n characters.
8853
     *
8854
     * @see  http://php.net/manual/en/function.strncmp.php
8855
     *
8856
     * @param string $str1     <p>The first string.</p>
8857
     * @param string $str2     <p>The second string.</p>
8858
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8859
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8860
     *
8861
     * @return int
8862
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8863
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8864
     *             <strong>0</strong> if they are equal
8865
     */
8866 4
    public static function strncmp(
8867
        string $str1,
8868
        string $str2,
8869
        int $len,
8870
        string $encoding = 'UTF-8'
8871
    ): int {
8872 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8873
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8874
        }
8875
8876 4
        if ($encoding === 'UTF-8') {
8877 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8878 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8879
        } else {
8880
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8881
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8882
        }
8883
8884 4
        return self::strcmp($str1, $str2);
8885
    }
8886
8887
    /**
8888
     * Search a string for any of a set of characters.
8889
     *
8890
     * @see  http://php.net/manual/en/function.strpbrk.php
8891
     *
8892
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8893
     * @param string $char_list <p>This parameter is case sensitive.</p>
8894
     *
8895
     * @return false|string string starting from the character found, or false if it is not found
8896
     */
8897 2
    public static function strpbrk(string $haystack, string $char_list)
8898
    {
8899 2
        if ($haystack === '' || $char_list === '') {
8900 2
            return false;
8901
        }
8902
8903 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8904 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8905
        }
8906
8907 2
        return false;
8908
    }
8909
8910
    /**
8911
     * Find position of first occurrence of string in a string.
8912
     *
8913
     * @see http://php.net/manual/en/function.mb-strpos.php
8914
     *
8915
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8916
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8917
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8918
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8919
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8920
     *
8921
     * @return false|int
8922
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8923
     *                   string.<br> If needle is not found it returns false.
8924
     */
8925 53
    public static function strpos(
8926
        string $haystack,
8927
        $needle,
8928
        int $offset = 0,
8929
        $encoding = 'UTF-8',
8930
        bool $cleanUtf8 = false
8931
    ) {
8932 53
        if ($haystack === '') {
8933 4
            return false;
8934
        }
8935
8936
        // iconv and mbstring do not support integer $needle
8937 52
        if ((int) $needle === $needle) {
8938
            $needle = (string) self::chr($needle);
8939
        }
8940 52
        $needle = (string) $needle;
8941
8942 52
        if ($needle === '') {
8943 2
            return false;
8944
        }
8945
8946 52
        if ($cleanUtf8 === true) {
8947
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8948
            // if invalid characters are found in $haystack before $needle
8949 3
            $needle = self::clean($needle);
8950 3
            $haystack = self::clean($haystack);
8951
        }
8952
8953 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8954 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8955
        }
8956
8957
        //
8958
        // fallback via mbstring
8959
        //
8960
8961 52
        if (self::$SUPPORT['mbstring'] === true) {
8962 50
            if ($encoding === 'UTF-8') {
8963 50
                return \mb_strpos($haystack, $needle, $offset);
8964
            }
8965
8966 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8967
        }
8968
8969
        //
8970
        // fallback for binary || ascii only
8971
        //
8972
        if (
8973 4
            $encoding === 'CP850'
8974
            ||
8975 4
            $encoding === 'ASCII'
8976
        ) {
8977 2
            return \strpos($haystack, $needle, $offset);
8978
        }
8979
8980
        if (
8981 4
            $encoding !== 'UTF-8'
8982
            &&
8983 4
            self::$SUPPORT['iconv'] === false
8984
            &&
8985 4
            self::$SUPPORT['mbstring'] === false
8986
        ) {
8987 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8988
        }
8989
8990
        //
8991
        // fallback via intl
8992
        //
8993
8994
        if (
8995 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8996
            &&
8997 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8998
            &&
8999 4
            self::$SUPPORT['intl'] === true
9000
        ) {
9001
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9002
            if ($returnTmp !== false) {
9003
                return $returnTmp;
9004
            }
9005
        }
9006
9007
        //
9008
        // fallback via iconv
9009
        //
9010
9011
        if (
9012 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9013
            &&
9014 4
            self::$SUPPORT['iconv'] === true
9015
        ) {
9016
            // ignore invalid negative offset to keep compatibility
9017
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9018
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9019
            if ($returnTmp !== false) {
9020
                return $returnTmp;
9021
            }
9022
        }
9023
9024
        //
9025
        // fallback for ascii only
9026
        //
9027
9028 4
        if (self::is_ascii($haystack . $needle)) {
9029 2
            return \strpos($haystack, $needle, $offset);
9030
        }
9031
9032
        //
9033
        // fallback via vanilla php
9034
        //
9035
9036 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9037 4
        if ($haystackTmp === false) {
9038
            $haystackTmp = '';
9039
        }
9040 4
        $haystack = (string) $haystackTmp;
9041
9042 4
        if ($offset < 0) {
9043
            $offset = 0;
9044
        }
9045
9046 4
        $pos = \strpos($haystack, $needle);
9047 4
        if ($pos === false) {
9048 2
            return false;
9049
        }
9050
9051 4
        if ($pos) {
9052 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9053
        }
9054
9055 2
        return $offset + 0;
9056
    }
9057
9058
    /**
9059
     * Find position of first occurrence of string in a string.
9060
     *
9061
     * @param string $haystack <p>
9062
     *                         The string being checked.
9063
     *                         </p>
9064
     * @param string $needle   <p>
9065
     *                         The position counted from the beginning of haystack.
9066
     *                         </p>
9067
     * @param int    $offset   [optional] <p>
9068
     *                         The search offset. If it is not specified, 0 is used.
9069
     *                         </p>
9070
     *
9071
     * @return false|int The numeric position of the first occurrence of needle in the
9072
     *                   haystack string. If needle is not found, it returns false.
9073
     */
9074
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9075
    {
9076
        if ($haystack === '' || $needle === '') {
9077
            return false;
9078
        }
9079
9080
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9081
            // "mb_" is available if overload is used, so use it ...
9082
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9083
        }
9084
9085
        return \strpos($haystack, $needle, $offset);
9086
    }
9087
9088
    /**
9089
     * Finds the last occurrence of a character in a string within another.
9090
     *
9091
     * @see http://php.net/manual/en/function.mb-strrchr.php
9092
     *
9093
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9094
     * @param string $needle        <p>The string to find in haystack</p>
9095
     * @param bool   $before_needle [optional] <p>
9096
     *                              Determines which portion of haystack
9097
     *                              this function returns.
9098
     *                              If set to true, it returns all of haystack
9099
     *                              from the beginning to the last occurrence of needle.
9100
     *                              If set to false, it returns all of haystack
9101
     *                              from the last occurrence of needle to the end,
9102
     *                              </p>
9103
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9104
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9105
     *
9106
     * @return false|string the portion of haystack or false if needle is not found
9107
     */
9108 2
    public static function strrchr(
9109
        string $haystack,
9110
        string $needle,
9111
        bool $before_needle = false,
9112
        string $encoding = 'UTF-8',
9113
        bool $cleanUtf8 = false
9114
    ) {
9115 2
        if ($haystack === '' || $needle === '') {
9116 2
            return false;
9117
        }
9118
9119 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9120 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9121
        }
9122
9123 2
        if ($cleanUtf8 === true) {
9124
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9125
            // if invalid characters are found in $haystack before $needle
9126 2
            $needle = self::clean($needle);
9127 2
            $haystack = self::clean($haystack);
9128
        }
9129
9130
        //
9131
        // fallback via mbstring
9132
        //
9133
9134 2
        if (self::$SUPPORT['mbstring'] === true) {
9135 2
            if ($encoding === 'UTF-8') {
9136 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9137
            }
9138
9139 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9140
        }
9141
9142
        //
9143
        // fallback for binary || ascii only
9144
        //
9145
9146
        if (
9147
            $before_needle === false
9148
            &&
9149
            (
9150
                $encoding === 'CP850'
9151
                ||
9152
                $encoding === 'ASCII'
9153
            )
9154
        ) {
9155
            return \strrchr($haystack, $needle);
9156
        }
9157
9158
        if (
9159
            $encoding !== 'UTF-8'
9160
            &&
9161
            self::$SUPPORT['mbstring'] === false
9162
        ) {
9163
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9164
        }
9165
9166
        //
9167
        // fallback via iconv
9168
        //
9169
9170
        if (self::$SUPPORT['iconv'] === true) {
9171
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9172
            if ($needleTmp === false) {
9173
                return false;
9174
            }
9175
            $needle = (string) $needleTmp;
9176
9177
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9178
            if ($pos === false) {
9179
                return false;
9180
            }
9181
9182
            if ($before_needle) {
9183
                return self::substr($haystack, 0, $pos, $encoding);
9184
            }
9185
9186
            return self::substr($haystack, $pos, null, $encoding);
9187
        }
9188
9189
        //
9190
        // fallback via vanilla php
9191
        //
9192
9193
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9194
        if ($needleTmp === false) {
9195
            return false;
9196
        }
9197
        $needle = (string) $needleTmp;
9198
9199
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9200
        if ($pos === false) {
9201
            return false;
9202
        }
9203
9204
        if ($before_needle) {
9205
            return self::substr($haystack, 0, $pos, $encoding);
9206
        }
9207
9208
        return self::substr($haystack, $pos, null, $encoding);
9209
    }
9210
9211
    /**
9212
     * Reverses characters order in the string.
9213
     *
9214
     * @param string $str      <p>The input string.</p>
9215
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9216
     *
9217
     * @return string the string with characters in the reverse sequence
9218
     */
9219 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9220
    {
9221 10
        if ($str === '') {
9222 4
            return '';
9223
        }
9224
9225
        // init
9226 8
        $reversed = '';
9227
9228 8
        $str = self::emoji_encode($str, true);
9229
9230 8
        if ($encoding === 'UTF-8') {
9231 8
            if (self::$SUPPORT['intl'] === true) {
9232
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9233 8
                $i = (int) \grapheme_strlen($str);
9234 8
                while ($i--) {
9235 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9236 8
                    if ($reversedTmp !== false) {
9237 8
                        $reversed .= $reversedTmp;
9238
                    }
9239
                }
9240
            } else {
9241
                $i = (int) \mb_strlen($str);
9242 8
                while ($i--) {
9243
                    $reversedTmp = \mb_substr($str, $i, 1);
9244
                    if ($reversedTmp !== false) {
9245
                        $reversed .= $reversedTmp;
9246
                    }
9247
                }
9248
            }
9249
        } else {
9250
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9251
9252
            $i = (int) self::strlen($str, $encoding);
9253
            while ($i--) {
9254
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9255
                if ($reversedTmp !== false) {
9256
                    $reversed .= $reversedTmp;
9257
                }
9258
            }
9259
        }
9260
9261 8
        return self::emoji_decode($reversed, true);
9262
    }
9263
9264
    /**
9265
     * Finds the last occurrence of a character in a string within another, case insensitive.
9266
     *
9267
     * @see http://php.net/manual/en/function.mb-strrichr.php
9268
     *
9269
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9270
     * @param string $needle        <p>The string to find in haystack.</p>
9271
     * @param bool   $before_needle [optional] <p>
9272
     *                              Determines which portion of haystack
9273
     *                              this function returns.
9274
     *                              If set to true, it returns all of haystack
9275
     *                              from the beginning to the last occurrence of needle.
9276
     *                              If set to false, it returns all of haystack
9277
     *                              from the last occurrence of needle to the end,
9278
     *                              </p>
9279
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9280
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9281
     *
9282
     * @return false|string the portion of haystack or<br>false if needle is not found
9283
     */
9284 3
    public static function strrichr(
9285
        string $haystack,
9286
        string $needle,
9287
        bool $before_needle = false,
9288
        string $encoding = 'UTF-8',
9289
        bool $cleanUtf8 = false
9290
    ) {
9291 3
        if ($haystack === '' || $needle === '') {
9292 2
            return false;
9293
        }
9294
9295 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9296 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9297
        }
9298
9299 3
        if ($cleanUtf8 === true) {
9300
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9301
            // if invalid characters are found in $haystack before $needle
9302 2
            $needle = self::clean($needle);
9303 2
            $haystack = self::clean($haystack);
9304
        }
9305
9306
        //
9307
        // fallback via mbstring
9308
        //
9309
9310 3
        if (self::$SUPPORT['mbstring'] === true) {
9311 3
            if ($encoding === 'UTF-8') {
9312 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9313
            }
9314
9315 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9316
        }
9317
9318
        //
9319
        // fallback via vanilla php
9320
        //
9321
9322
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9323
        if ($needleTmp === false) {
9324
            return false;
9325
        }
9326
        $needle = (string) $needleTmp;
9327
9328
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9329
        if ($pos === false) {
9330
            return false;
9331
        }
9332
9333
        if ($before_needle) {
9334
            return self::substr($haystack, 0, $pos, $encoding);
9335
        }
9336
9337
        return self::substr($haystack, $pos, null, $encoding);
9338
    }
9339
9340
    /**
9341
     * Find position of last occurrence of a case-insensitive string.
9342
     *
9343
     * @param string     $haystack  <p>The string to look in.</p>
9344
     * @param int|string $needle    <p>The string to look for.</p>
9345
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9346
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9347
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9348
     *
9349
     * @return false|int
9350
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9351
     *                   string.<br>If needle is not found, it returns false.
9352
     */
9353 3
    public static function strripos(
9354
        string $haystack,
9355
        $needle,
9356
        int $offset = 0,
9357
        string $encoding = 'UTF-8',
9358
        bool $cleanUtf8 = false
9359
    ) {
9360 3
        if ($haystack === '') {
9361
            return false;
9362
        }
9363
9364
        // iconv and mbstring do not support integer $needle
9365 3
        if ((int) $needle === $needle && $needle >= 0) {
9366
            $needle = (string) self::chr($needle);
9367
        }
9368 3
        $needle = (string) $needle;
9369
9370 3
        if ($needle === '') {
9371
            return false;
9372
        }
9373
9374 3
        if ($cleanUtf8 === true) {
9375
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9376 2
            $needle = self::clean($needle);
9377 2
            $haystack = self::clean($haystack);
9378
        }
9379
9380 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9381 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9382
        }
9383
9384
        //
9385
        // fallback via mbstrig
9386
        //
9387
9388 3
        if (self::$SUPPORT['mbstring'] === true) {
9389 3
            if ($encoding === 'UTF-8') {
9390 3
                return \mb_strripos($haystack, $needle, $offset);
9391
            }
9392
9393
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9394
        }
9395
9396
        //
9397
        // fallback for binary || ascii only
9398
        //
9399
9400
        if (
9401
            $encoding === 'CP850'
9402
            ||
9403
            $encoding === 'ASCII'
9404
        ) {
9405
            return \strripos($haystack, $needle, $offset);
9406
        }
9407
9408
        if (
9409
            $encoding !== 'UTF-8'
9410
            &&
9411
            self::$SUPPORT['mbstring'] === false
9412
        ) {
9413
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9414
        }
9415
9416
        //
9417
        // fallback via intl
9418
        //
9419
9420
        if (
9421
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9422
            &&
9423
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9424
            &&
9425
            self::$SUPPORT['intl'] === true
9426
        ) {
9427
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9428
            if ($returnTmp !== false) {
9429
                return $returnTmp;
9430
            }
9431
        }
9432
9433
        //
9434
        // fallback for ascii only
9435
        //
9436
9437
        if (self::is_ascii($haystack . $needle)) {
9438
            return \strripos($haystack, $needle, $offset);
9439
        }
9440
9441
        //
9442
        // fallback via vanilla php
9443
        //
9444
9445
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9446
        $needle = self::strtocasefold($needle, true, false, $encoding);
9447
9448
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9449
    }
9450
9451
    /**
9452
     * Finds position of last occurrence of a string within another, case insensitive.
9453
     *
9454
     * @param string $haystack <p>
9455
     *                         The string from which to get the position of the last occurrence
9456
     *                         of needle.
9457
     *                         </p>
9458
     * @param string $needle   <p>
9459
     *                         The string to find in haystack.
9460
     *                         </p>
9461
     * @param int    $offset   [optional] <p>
9462
     *                         The position in haystack
9463
     *                         to start searching.
9464
     *                         </p>
9465
     *
9466
     * @return false|int return the numeric position of the last occurrence of needle in the
9467
     *                   haystack string, or false if needle is not found
9468
     */
9469
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9470
    {
9471
        if ($haystack === '' || $needle === '') {
9472
            return false;
9473
        }
9474
9475
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9476
            // "mb_" is available if overload is used, so use it ...
9477
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9478
        }
9479
9480
        return \strripos($haystack, $needle, $offset);
9481
    }
9482
9483
    /**
9484
     * Find position of last occurrence of a string in a string.
9485
     *
9486
     * @see http://php.net/manual/en/function.mb-strrpos.php
9487
     *
9488
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9489
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9490
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9491
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9492
     *                              the end of the string.
9493
     *                              </p>
9494
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9495
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9496
     *
9497
     * @return false|int
9498
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9499
     *                   string.<br>If needle is not found, it returns false.
9500
     */
9501 35
    public static function strrpos(
9502
        string $haystack,
9503
        $needle,
9504
        int $offset = 0,
9505
        string $encoding = 'UTF-8',
9506
        bool $cleanUtf8 = false
9507
    ) {
9508 35
        if ($haystack === '') {
9509 3
            return false;
9510
        }
9511
9512
        // iconv and mbstring do not support integer $needle
9513 34
        if ((int) $needle === $needle && $needle >= 0) {
9514 2
            $needle = (string) self::chr($needle);
9515
        }
9516 34
        $needle = (string) $needle;
9517
9518 34
        if ($needle === '') {
9519 2
            return false;
9520
        }
9521
9522 34
        if ($cleanUtf8 === true) {
9523
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9524 4
            $needle = self::clean($needle);
9525 4
            $haystack = self::clean($haystack);
9526
        }
9527
9528 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9529 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9530
        }
9531
9532
        //
9533
        // fallback via mbstring
9534
        //
9535
9536 34
        if (self::$SUPPORT['mbstring'] === true) {
9537 34
            if ($encoding === 'UTF-8') {
9538 34
                return \mb_strrpos($haystack, $needle, $offset);
9539
            }
9540
9541 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9542
        }
9543
9544
        //
9545
        // fallback for binary || ascii only
9546
        //
9547
9548
        if (
9549
            $encoding === 'CP850'
9550
            ||
9551
            $encoding === 'ASCII'
9552
        ) {
9553
            return \strrpos($haystack, $needle, $offset);
9554
        }
9555
9556
        if (
9557
            $encoding !== 'UTF-8'
9558
            &&
9559
            self::$SUPPORT['mbstring'] === false
9560
        ) {
9561
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9562
        }
9563
9564
        //
9565
        // fallback via intl
9566
        //
9567
9568
        if (
9569
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9570
            &&
9571
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9572
            &&
9573
            self::$SUPPORT['intl'] === true
9574
        ) {
9575
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9576
            if ($returnTmp !== false) {
9577
                return $returnTmp;
9578
            }
9579
        }
9580
9581
        //
9582
        // fallback for ascii only
9583
        //
9584
9585
        if (self::is_ascii($haystack . $needle)) {
9586
            return \strrpos($haystack, $needle, $offset);
9587
        }
9588
9589
        //
9590
        // fallback via vanilla php
9591
        //
9592
9593
        $haystackTmp = null;
9594
        if ($offset > 0) {
9595
            $haystackTmp = self::substr($haystack, $offset);
9596
        } elseif ($offset < 0) {
9597
            $haystackTmp = self::substr($haystack, 0, $offset);
9598
            $offset = 0;
9599
        }
9600
9601
        if ($haystackTmp !== null) {
9602
            if ($haystackTmp === false) {
9603
                $haystackTmp = '';
9604
            }
9605
            $haystack = (string) $haystackTmp;
9606
        }
9607
9608
        $pos = \strrpos($haystack, $needle);
9609
        if ($pos === false) {
9610
            return false;
9611
        }
9612
9613
        $strTmp = \substr($haystack, 0, $pos);
9614
        if ($strTmp === false) {
9615
            return false;
9616
        }
9617
9618
        return $offset + (int) self::strlen($strTmp);
9619
    }
9620
9621
    /**
9622
     * Find position of last occurrence of a string in a string.
9623
     *
9624
     * @param string $haystack <p>
9625
     *                         The string being checked, for the last occurrence
9626
     *                         of needle.
9627
     *                         </p>
9628
     * @param string $needle   <p>
9629
     *                         The string to find in haystack.
9630
     *                         </p>
9631
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9632
     *                         the string. Negative values will stop searching at an arbitrary point
9633
     *                         prior to the end of the string.
9634
     *
9635
     * @return false|int The numeric position of the last occurrence of needle in the
9636
     *                   haystack string. If needle is not found, it returns false.
9637
     */
9638
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9639
    {
9640
        if ($haystack === '' || $needle === '') {
9641
            return false;
9642
        }
9643
9644
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9645
            // "mb_" is available if overload is used, so use it ...
9646
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9647
        }
9648
9649
        return \strrpos($haystack, $needle, $offset);
9650
    }
9651
9652
    /**
9653
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9654
     * mask.
9655
     *
9656
     * @param string $str      <p>The input string.</p>
9657
     * @param string $mask     <p>The mask of chars</p>
9658
     * @param int    $offset   [optional]
9659
     * @param int    $length   [optional]
9660
     * @param string $encoding [optional] <p>Set the charset.</p>
9661
     *
9662
     * @return false|int
9663
     */
9664 10
    public static function strspn(
9665
        string $str,
9666
        string $mask,
9667
        int $offset = 0,
9668
        int $length = null,
9669
        string $encoding = 'UTF-8'
9670
    ) {
9671 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9672
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9673
        }
9674
9675 10
        if ($offset || $length !== null) {
9676 2
            if ($encoding === 'UTF-8') {
9677 2
                if ($length === null) {
9678
                    $str = (string) \mb_substr($str, $offset);
9679
                } else {
9680 2
                    $str = (string) \mb_substr($str, $offset, $length);
9681
                }
9682
            } else {
9683
                $str = (string) self::substr($str, $offset, $length, $encoding);
9684
            }
9685
        }
9686
9687 10
        if ($str === '' || $mask === '') {
9688 2
            return 0;
9689
        }
9690
9691 8
        $matches = [];
9692
9693 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9694
    }
9695
9696
    /**
9697
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9698
     *
9699
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9700
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9701
     * @param bool   $before_needle [optional] <p>
9702
     *                              If <b>TRUE</b>, strstr() returns the part of the
9703
     *                              haystack before the first occurrence of the needle (excluding the needle).
9704
     *                              </p>
9705
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9706
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9707
     *
9708
     * @return false|string
9709
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9710
     */
9711 3
    public static function strstr(
9712
        string $haystack,
9713
        string $needle,
9714
        bool $before_needle = false,
9715
        string $encoding = 'UTF-8',
9716
        $cleanUtf8 = false
9717
    ) {
9718 3
        if ($haystack === '' || $needle === '') {
9719 2
            return false;
9720
        }
9721
9722 3
        if ($cleanUtf8 === true) {
9723
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9724
            // if invalid characters are found in $haystack before $needle
9725
            $needle = self::clean($needle);
9726
            $haystack = self::clean($haystack);
9727
        }
9728
9729 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9730 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9731
        }
9732
9733
        //
9734
        // fallback via mbstring
9735
        //
9736
9737 3
        if (self::$SUPPORT['mbstring'] === true) {
9738 3
            if ($encoding === 'UTF-8') {
9739 3
                return \mb_strstr($haystack, $needle, $before_needle);
9740
            }
9741
9742 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9743
        }
9744
9745
        //
9746
        // fallback for binary || ascii only
9747
        //
9748
9749
        if (
9750
            $encoding === 'CP850'
9751
            ||
9752
            $encoding === 'ASCII'
9753
        ) {
9754
            return \strstr($haystack, $needle, $before_needle);
9755
        }
9756
9757
        if (
9758
            $encoding !== 'UTF-8'
9759
            &&
9760
            self::$SUPPORT['mbstring'] === false
9761
        ) {
9762
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9763
        }
9764
9765
        //
9766
        // fallback via intl
9767
        //
9768
9769
        if (
9770
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9771
            &&
9772
            self::$SUPPORT['intl'] === true
9773
        ) {
9774
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9775
            if ($returnTmp !== false) {
9776
                return $returnTmp;
9777
            }
9778
        }
9779
9780
        //
9781
        // fallback for ascii only
9782
        //
9783
9784
        if (self::is_ascii($haystack . $needle)) {
9785
            return \strstr($haystack, $needle, $before_needle);
9786
        }
9787
9788
        //
9789
        // fallback via vanilla php
9790
        //
9791
9792
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9793
9794
        if (!isset($match[1])) {
9795
            return false;
9796
        }
9797
9798
        if ($before_needle) {
9799
            return $match[1];
9800
        }
9801
9802
        return self::substr($haystack, (int) self::strlen($match[1]));
9803
    }
9804
9805
    /**
9806
     *  * Finds first occurrence of a string within another.
9807
     *
9808
     * @param string $haystack      <p>
9809
     *                              The string from which to get the first occurrence
9810
     *                              of needle.
9811
     *                              </p>
9812
     * @param string $needle        <p>
9813
     *                              The string to find in haystack.
9814
     *                              </p>
9815
     * @param bool   $before_needle [optional] <p>
9816
     *                              Determines which portion of haystack
9817
     *                              this function returns.
9818
     *                              If set to true, it returns all of haystack
9819
     *                              from the beginning to the first occurrence of needle.
9820
     *                              If set to false, it returns all of haystack
9821
     *                              from the first occurrence of needle to the end,
9822
     *                              </p>
9823
     *
9824
     * @return false|string the portion of haystack,
9825
     *                      or false if needle is not found
9826
     */
9827
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9828
    {
9829
        if ($haystack === '' || $needle === '') {
9830
            return false;
9831
        }
9832
9833
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9834
            // "mb_" is available if overload is used, so use it ...
9835
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9836
        }
9837
9838
        return \strstr($haystack, $needle, $before_needle);
9839
    }
9840
9841
    /**
9842
     * Unicode transformation for case-less matching.
9843
     *
9844
     * @see http://unicode.org/reports/tr21/tr21-5.html
9845
     *
9846
     * @param string      $str       <p>The input string.</p>
9847
     * @param bool        $full      [optional] <p>
9848
     *                               <b>true</b>, replace full case folding chars (default)<br>
9849
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9850
     *                               </p>
9851
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9852
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9853
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9854
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9855
     *                               is for some languages better ...</p>
9856
     *
9857
     * @return string
9858
     */
9859 32
    public static function strtocasefold(
9860
        string $str,
9861
        bool $full = true,
9862
        bool $cleanUtf8 = false,
9863
        string $encoding = 'UTF-8',
9864
        string $lang = null,
9865
        $lower = true
9866
    ): string {
9867 32
        if ($str === '') {
9868 5
            return '';
9869
        }
9870
9871 31
        if ($cleanUtf8 === true) {
9872
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9873
            // if invalid characters are found in $haystack before $needle
9874 2
            $str = self::clean($str);
9875
        }
9876
9877 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9878
9879 31
        if ($lang === null && $encoding === 'UTF-8') {
9880 31
            if ($lower === true) {
9881 2
                return \mb_strtolower($str);
9882
            }
9883
9884 29
            return \mb_strtoupper($str);
9885
        }
9886
9887 2
        if ($lower === true) {
9888
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9889
        }
9890
9891 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9892
    }
9893
9894
    /**
9895
     * Make a string lowercase.
9896
     *
9897
     * @see http://php.net/manual/en/function.mb-strtolower.php
9898
     *
9899
     * @param string      $str                   <p>The string being lowercased.</p>
9900
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9901
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9902
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9903
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9904
     *
9905
     * @return string
9906
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9907
     */
9908 73
    public static function strtolower(
9909
        $str,
9910
        string $encoding = 'UTF-8',
9911
        bool $cleanUtf8 = false,
9912
        string $lang = null,
9913
        bool $tryToKeepStringLength = false
9914
    ): string {
9915
        // init
9916 73
        $str = (string) $str;
9917
9918 73
        if ($str === '') {
9919 1
            return '';
9920
        }
9921
9922 72
        if ($cleanUtf8 === true) {
9923
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9924
            // if invalid characters are found in $haystack before $needle
9925 2
            $str = self::clean($str);
9926
        }
9927
9928
        // hack for old php version or for the polyfill ...
9929 72
        if ($tryToKeepStringLength === true) {
9930
            $str = self::fixStrCaseHelper($str, true);
9931
        }
9932
9933 72
        if ($lang === null && $encoding === 'UTF-8') {
9934 13
            return \mb_strtolower($str);
9935
        }
9936
9937 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9938
9939 61
        if ($lang !== null) {
9940 2
            if (self::$SUPPORT['intl'] === true) {
9941 2
                $langCode = $lang . '-Lower';
9942 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9943
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9944
9945
                    $langCode = 'Any-Lower';
9946
                }
9947
9948
                /** @noinspection PhpComposerExtensionStubsInspection */
9949
                /** @noinspection UnnecessaryCastingInspection */
9950 2
                return (string) \transliterator_transliterate($langCode, $str);
9951
            }
9952
9953
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9954
        }
9955
9956
        // always fallback via symfony polyfill
9957 61
        return \mb_strtolower($str, $encoding);
9958
    }
9959
9960
    /**
9961
     * Make a string uppercase.
9962
     *
9963
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9964
     *
9965
     * @param string      $str                   <p>The string being uppercased.</p>
9966
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9967
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9968
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9969
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9970
     *
9971
     * @return string
9972
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9973
     */
9974 17
    public static function strtoupper(
9975
        $str,
9976
        string $encoding = 'UTF-8',
9977
        bool $cleanUtf8 = false,
9978
        string $lang = null,
9979
        bool $tryToKeepStringLength = false
9980
    ): string {
9981
        // init
9982 17
        $str = (string) $str;
9983
9984 17
        if ($str === '') {
9985 1
            return '';
9986
        }
9987
9988 16
        if ($cleanUtf8 === true) {
9989
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9990
            // if invalid characters are found in $haystack before $needle
9991 2
            $str = self::clean($str);
9992
        }
9993
9994
        // hack for old php version or for the polyfill ...
9995 16
        if ($tryToKeepStringLength === true) {
9996 2
            $str = self::fixStrCaseHelper($str, false);
9997
        }
9998
9999 16
        if ($lang === null && $encoding === 'UTF-8') {
10000 8
            return \mb_strtoupper($str);
10001
        }
10002
10003 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10004
10005 10
        if ($lang !== null) {
10006 2
            if (self::$SUPPORT['intl'] === true) {
10007 2
                $langCode = $lang . '-Upper';
10008 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
10009
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10010
10011
                    $langCode = 'Any-Upper';
10012
                }
10013
10014
                /** @noinspection PhpComposerExtensionStubsInspection */
10015
                /** @noinspection UnnecessaryCastingInspection */
10016 2
                return (string) \transliterator_transliterate($langCode, $str);
10017
            }
10018
10019
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10020
        }
10021
10022
        // always fallback via symfony polyfill
10023 10
        return \mb_strtoupper($str, $encoding);
10024
    }
10025
10026
    /**
10027
     * Translate characters or replace sub-strings.
10028
     *
10029
     * @see  http://php.net/manual/en/function.strtr.php
10030
     *
10031
     * @param string          $str  <p>The string being translated.</p>
10032
     * @param string|string[] $from <p>The string replacing from.</p>
10033
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10034
     *
10035
     * @return string
10036
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10037
     *                corresponding character in to
10038
     */
10039 2
    public static function strtr(string $str, $from, $to = ''): string
10040
    {
10041 2
        if ($str === '') {
10042
            return '';
10043
        }
10044
10045 2
        if ($from === $to) {
10046
            return $str;
10047
        }
10048
10049 2
        if ($to !== '') {
10050 2
            $from = self::str_split($from);
10051 2
            $to = self::str_split($to);
10052 2
            $countFrom = \count($from);
10053 2
            $countTo = \count($to);
10054
10055 2
            if ($countFrom > $countTo) {
10056 2
                $from = \array_slice($from, 0, $countTo);
10057 2
            } elseif ($countFrom < $countTo) {
10058 2
                $to = \array_slice($to, 0, $countFrom);
10059
            }
10060
10061 2
            $from = \array_combine($from, $to);
10062 2
            if ($from === false) {
10063
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10064
            }
10065
        }
10066
10067 2
        if (\is_string($from)) {
10068 2
            return \str_replace($from, '', $str);
10069
        }
10070
10071 2
        return \strtr($str, $from);
10072
    }
10073
10074
    /**
10075
     * Return the width of a string.
10076
     *
10077
     * @param string $str       <p>The input string.</p>
10078
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10079
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10080
     *
10081
     * @return int
10082
     */
10083 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10084
    {
10085 2
        if ($str === '') {
10086 2
            return 0;
10087
        }
10088
10089 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10090 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10091
        }
10092
10093 2
        if ($cleanUtf8 === true) {
10094
            // iconv and mbstring are not tolerant to invalid encoding
10095
            // further, their behaviour is inconsistent with that of PHP's substr
10096 2
            $str = self::clean($str);
10097
        }
10098
10099
        //
10100
        // fallback via mbstring
10101
        //
10102
10103 2
        if (self::$SUPPORT['mbstring'] === true) {
10104 2
            if ($encoding === 'UTF-8') {
10105 2
                return \mb_strwidth($str);
10106
            }
10107
10108
            return \mb_strwidth($str, $encoding);
10109
        }
10110
10111
        //
10112
        // fallback via vanilla php
10113
        //
10114
10115
        if ($encoding !== 'UTF-8') {
10116
            $str = self::encode('UTF-8', $str, false, $encoding);
10117
        }
10118
10119
        $wide = 0;
10120
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10121
10122
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10123
    }
10124
10125
    /**
10126
     * Get part of a string.
10127
     *
10128
     * @see http://php.net/manual/en/function.mb-substr.php
10129
     *
10130
     * @param string $str       <p>The string being checked.</p>
10131
     * @param int    $offset    <p>The first position used in str.</p>
10132
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10133
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10134
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10135
     *
10136
     * @return false|string
10137
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10138
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10139
     *                      characters long, <b>FALSE</b> will be returned.
10140
     */
10141 172
    public static function substr(
10142
        string $str,
10143
        int $offset = 0,
10144
        int $length = null,
10145
        string $encoding = 'UTF-8',
10146
        bool $cleanUtf8 = false
10147
    ) {
10148
        // empty string
10149 172
        if ($str === '' || $length === 0) {
10150 8
            return '';
10151
        }
10152
10153 168
        if ($cleanUtf8 === true) {
10154
            // iconv and mbstring are not tolerant to invalid encoding
10155
            // further, their behaviour is inconsistent with that of PHP's substr
10156 2
            $str = self::clean($str);
10157
        }
10158
10159
        // whole string
10160 168
        if (!$offset && $length === null) {
10161 7
            return $str;
10162
        }
10163
10164 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10165 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10166
        }
10167
10168
        //
10169
        // fallback via mbstring
10170
        //
10171
10172 163
        if (self::$SUPPORT['mbstring'] === true) {
10173 161
            if ($encoding === 'UTF-8') {
10174 161
                if ($length === null) {
10175 64
                    return \mb_substr($str, $offset);
10176
                }
10177
10178 102
                return \mb_substr($str, $offset, $length);
10179
            }
10180
10181
            return self::substr($str, $offset, $length, $encoding);
10182
        }
10183
10184
        //
10185
        // fallback for binary || ascii only
10186
        //
10187
10188
        if (
10189 4
            $encoding === 'CP850'
10190
            ||
10191 4
            $encoding === 'ASCII'
10192
        ) {
10193
            if ($length === null) {
10194
                return \substr($str, $offset);
10195
            }
10196
10197
            return \substr($str, $offset, $length);
10198
        }
10199
10200
        // otherwise we need the string-length
10201 4
        $str_length = 0;
10202 4
        if ($offset || $length === null) {
10203 4
            $str_length = self::strlen($str, $encoding);
10204
        }
10205
10206
        // e.g.: invalid chars + mbstring not installed
10207 4
        if ($str_length === false) {
10208
            return false;
10209
        }
10210
10211
        // empty string
10212 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10213
            return '';
10214
        }
10215
10216
        // impossible
10217 4
        if ($offset && $offset > $str_length) {
10218
            return '';
10219
        }
10220
10221 4
        if ($length === null) {
10222 4
            $length = (int) $str_length;
10223
        } else {
10224 2
            $length = (int) $length;
10225
        }
10226
10227
        if (
10228 4
            $encoding !== 'UTF-8'
10229
            &&
10230 4
            self::$SUPPORT['mbstring'] === false
10231
        ) {
10232 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10233
        }
10234
10235
        //
10236
        // fallback via intl
10237
        //
10238
10239
        if (
10240 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10241
            &&
10242 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10243
            &&
10244 4
            self::$SUPPORT['intl'] === true
10245
        ) {
10246
            $returnTmp = \grapheme_substr($str, $offset, $length);
10247
            if ($returnTmp !== false) {
10248
                return $returnTmp;
10249
            }
10250
        }
10251
10252
        //
10253
        // fallback via iconv
10254
        //
10255
10256
        if (
10257 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10258
            &&
10259 4
            self::$SUPPORT['iconv'] === true
10260
        ) {
10261
            $returnTmp = \iconv_substr($str, $offset, $length);
10262
            if ($returnTmp !== false) {
10263
                return $returnTmp;
10264
            }
10265
        }
10266
10267
        //
10268
        // fallback for ascii only
10269
        //
10270
10271 4
        if (self::is_ascii($str)) {
10272
            return \substr($str, $offset, $length);
10273
        }
10274
10275
        //
10276
        // fallback via vanilla php
10277
        //
10278
10279
        // split to array, and remove invalid characters
10280 4
        $array = self::str_split($str);
10281
10282
        // extract relevant part, and join to make sting again
10283 4
        return \implode('', \array_slice($array, $offset, $length));
10284
    }
10285
10286
    /**
10287
     * Binary safe comparison of two strings from an offset, up to length characters.
10288
     *
10289
     * @param string   $str1               <p>The main string being compared.</p>
10290
     * @param string   $str2               <p>The secondary string being compared.</p>
10291
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10292
     *                                     counting from the end of the string.</p>
10293
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10294
     *                                     of the length of the str compared to the length of main_str less the
10295
     *                                     offset.</p>
10296
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10297
     *                                     insensitive.</p>
10298
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10299
     *
10300
     * @return int
10301
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10302
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10303
     *             <strong>0</strong> if they are equal
10304
     */
10305 2
    public static function substr_compare(
10306
        string $str1,
10307
        string $str2,
10308
        int $offset = 0,
10309
        int $length = null,
10310
        bool $case_insensitivity = false,
10311
        string $encoding = 'UTF-8'
10312
    ): int {
10313
        if (
10314 2
            $offset !== 0
10315
            ||
10316 2
            $length !== null
10317
        ) {
10318 2
            if ($encoding === 'UTF-8') {
10319 2
                if ($length === null) {
10320 2
                    $str1 = (string) \mb_substr($str1, $offset);
10321
                } else {
10322 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10323
                }
10324 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10325
            } else {
10326
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10327
10328
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10329
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10330
            }
10331
        }
10332
10333 2
        if ($case_insensitivity === true) {
10334 2
            return self::strcasecmp($str1, $str2, $encoding);
10335
        }
10336
10337 2
        return self::strcmp($str1, $str2);
10338
    }
10339
10340
    /**
10341
     * Count the number of substring occurrences.
10342
     *
10343
     * @see  http://php.net/manual/en/function.substr-count.php
10344
     *
10345
     * @param string $haystack  <p>The string to search in.</p>
10346
     * @param string $needle    <p>The substring to search for.</p>
10347
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10348
     * @param int    $length    [optional] <p>
10349
     *                          The maximum length after the specified offset to search for the
10350
     *                          substring. It outputs a warning if the offset plus the length is
10351
     *                          greater than the haystack length.
10352
     *                          </p>
10353
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10354
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10355
     *
10356
     * @return false|int this functions returns an integer or false if there isn't a string
10357
     */
10358 5
    public static function substr_count(
10359
        string $haystack,
10360
        string $needle,
10361
        int $offset = 0,
10362
        int $length = null,
10363
        string $encoding = 'UTF-8',
10364
        bool $cleanUtf8 = false
10365
    ) {
10366 5
        if ($haystack === '' || $needle === '') {
10367 2
            return false;
10368
        }
10369
10370 5
        if ($length === 0) {
10371 2
            return 0;
10372
        }
10373
10374 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10375 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10376
        }
10377
10378 5
        if ($cleanUtf8 === true) {
10379
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10380
            // if invalid characters are found in $haystack before $needle
10381
            $needle = self::clean($needle);
10382
            $haystack = self::clean($haystack);
10383
        }
10384
10385 5
        if ($offset || $length > 0) {
10386 2
            if ($length === null) {
10387 2
                $lengthTmp = self::strlen($haystack, $encoding);
10388 2
                if ($lengthTmp === false) {
10389
                    return false;
10390
                }
10391 2
                $length = (int) $lengthTmp;
10392
            }
10393
10394 2
            if ($encoding === 'UTF-8') {
10395 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10396
            } else {
10397 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10398
            }
10399
        }
10400
10401
        if (
10402 5
            $encoding !== 'UTF-8'
10403
            &&
10404 5
            self::$SUPPORT['mbstring'] === false
10405
        ) {
10406
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10407
        }
10408
10409 5
        if (self::$SUPPORT['mbstring'] === true) {
10410 5
            if ($encoding === 'UTF-8') {
10411 5
                return \mb_substr_count($haystack, $needle);
10412
            }
10413
10414 2
            return \mb_substr_count($haystack, $needle, $encoding);
10415
        }
10416
10417
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10418
10419
        return \count($matches);
10420
    }
10421
10422
    /**
10423
     * Count the number of substring occurrences.
10424
     *
10425
     * @param string $haystack <p>
10426
     *                         The string being checked.
10427
     *                         </p>
10428
     * @param string $needle   <p>
10429
     *                         The string being found.
10430
     *                         </p>
10431
     * @param int    $offset   [optional] <p>
10432
     *                         The offset where to start counting
10433
     *                         </p>
10434
     * @param int    $length   [optional] <p>
10435
     *                         The maximum length after the specified offset to search for the
10436
     *                         substring. It outputs a warning if the offset plus the length is
10437
     *                         greater than the haystack length.
10438
     *                         </p>
10439
     *
10440
     * @return false|int the number of times the
10441
     *                   needle substring occurs in the
10442
     *                   haystack string
10443
     */
10444
    public static function substr_count_in_byte(
10445
        string $haystack,
10446
        string $needle,
10447
        int $offset = 0,
10448
        int $length = null
10449
    ) {
10450
        if ($haystack === '' || $needle === '') {
10451
            return 0;
10452
        }
10453
10454
        if (
10455
            ($offset || $length !== null)
10456
            &&
10457
            self::$SUPPORT['mbstring_func_overload'] === true
10458
        ) {
10459
            if ($length === null) {
10460
                $lengthTmp = self::strlen($haystack);
10461
                if ($lengthTmp === false) {
10462
                    return false;
10463
                }
10464
                $length = (int) $lengthTmp;
10465
            }
10466
10467
            if (
10468
                (
10469
                    $length !== 0
10470
                    &&
10471
                    $offset !== 0
10472
                )
10473
                &&
10474
                ($length + $offset) <= 0
10475
                &&
10476
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10477
            ) {
10478
                return false;
10479
            }
10480
10481
            $haystackTmp = \substr($haystack, $offset, $length);
10482
            if ($haystackTmp === false) {
10483
                $haystackTmp = '';
10484
            }
10485
            $haystack = (string) $haystackTmp;
10486
        }
10487
10488
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10489
            // "mb_" is available if overload is used, so use it ...
10490
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10491
        }
10492
10493
        if ($length === null) {
10494
            return \substr_count($haystack, $needle, $offset);
10495
        }
10496
10497
        return \substr_count($haystack, $needle, $offset, $length);
10498
    }
10499
10500
    /**
10501
     * Returns the number of occurrences of $substring in the given string.
10502
     * By default, the comparison is case-sensitive, but can be made insensitive
10503
     * by setting $caseSensitive to false.
10504
     *
10505
     * @param string $str           <p>The input string.</p>
10506
     * @param string $substring     <p>The substring to search for.</p>
10507
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10508
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10509
     *
10510
     * @return int
10511
     */
10512 15
    public static function substr_count_simple(
10513
        string $str,
10514
        string $substring,
10515
        bool $caseSensitive = true,
10516
        string $encoding = 'UTF-8'
10517
    ): int {
10518 15
        if ($str === '' || $substring === '') {
10519 2
            return 0;
10520
        }
10521
10522 13
        if ($encoding === 'UTF-8') {
10523 7
            if ($caseSensitive) {
10524
                return (int) \mb_substr_count($str, $substring);
10525
            }
10526
10527 7
            return (int) \mb_substr_count(
10528 7
                \mb_strtoupper($str),
10529 7
                \mb_strtoupper($substring)
10530
10531
            );
10532
        }
10533
10534 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10535
10536 6
        if ($caseSensitive) {
10537 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10538
        }
10539
10540 3
        return (int) \mb_substr_count(
10541 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10542 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10543 3
            $encoding
10544
        );
10545
    }
10546
10547
    /**
10548
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10549
     *
10550
     * @param string $haystack <p>The string to search in.</p>
10551
     * @param string $needle   <p>The substring to search for.</p>
10552
     *
10553
     * @return string return the sub-string
10554
     */
10555 2
    public static function substr_ileft(string $haystack, string $needle): string
10556
    {
10557 2
        if ($haystack === '') {
10558 2
            return '';
10559
        }
10560
10561 2
        if ($needle === '') {
10562 2
            return $haystack;
10563
        }
10564
10565 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10566 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10567
        }
10568
10569 2
        return $haystack;
10570
    }
10571
10572
    /**
10573
     * Get part of a string process in bytes.
10574
     *
10575
     * @param string $str    <p>The string being checked.</p>
10576
     * @param int    $offset <p>The first position used in str.</p>
10577
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10578
     *
10579
     * @return false|string
10580
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10581
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10582
     *                      characters long, <b>FALSE</b> will be returned.
10583
     */
10584
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10585
    {
10586
        // empty string
10587
        if ($str === '' || $length === 0) {
10588
            return '';
10589
        }
10590
10591
        // whole string
10592
        if (!$offset && $length === null) {
10593
            return $str;
10594
        }
10595
10596
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10597
            // "mb_" is available if overload is used, so use it ...
10598
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10599
        }
10600
10601
        return \substr($str, $offset, $length ?? 2147483647);
10602
    }
10603
10604
    /**
10605
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10606
     *
10607
     * @param string $haystack <p>The string to search in.</p>
10608
     * @param string $needle   <p>The substring to search for.</p>
10609
     *
10610
     * @return string return the sub-string
10611
     */
10612 2
    public static function substr_iright(string $haystack, string $needle): string
10613
    {
10614 2
        if ($haystack === '') {
10615 2
            return '';
10616
        }
10617
10618 2
        if ($needle === '') {
10619 2
            return $haystack;
10620
        }
10621
10622 2
        if (self::str_iends_with($haystack, $needle) === true) {
10623 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10624
        }
10625
10626 2
        return $haystack;
10627
    }
10628
10629
    /**
10630
     * Removes an prefix ($needle) from start of the string ($haystack).
10631
     *
10632
     * @param string $haystack <p>The string to search in.</p>
10633
     * @param string $needle   <p>The substring to search for.</p>
10634
     *
10635
     * @return string return the sub-string
10636
     */
10637 2
    public static function substr_left(string $haystack, string $needle): string
10638
    {
10639 2
        if ($haystack === '') {
10640 2
            return '';
10641
        }
10642
10643 2
        if ($needle === '') {
10644 2
            return $haystack;
10645
        }
10646
10647 2
        if (self::str_starts_with($haystack, $needle) === true) {
10648 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10649
        }
10650
10651 2
        return $haystack;
10652
    }
10653
10654
    /**
10655
     * Replace text within a portion of a string.
10656
     *
10657
     * source: https://gist.github.com/stemar/8287074
10658
     *
10659
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10660
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10661
     * @param int|int[]       $offset      <p>
10662
     *                                     If start is positive, the replacing will begin at the start'th offset
10663
     *                                     into string.
10664
     *                                     <br><br>
10665
     *                                     If start is negative, the replacing will begin at the start'th character
10666
     *                                     from the end of string.
10667
     *                                     </p>
10668
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10669
     *                                     portion of string which is to be replaced. If it is negative, it
10670
     *                                     represents the number of characters from the end of string at which to
10671
     *                                     stop replacing. If it is not given, then it will default to strlen(
10672
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10673
     *                                     length is zero then this function will have the effect of inserting
10674
     *                                     replacement into string at the given start offset.</p>
10675
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10676
     *
10677
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10678
     */
10679 10
    public static function substr_replace(
10680
        $str,
10681
        $replacement,
10682
        $offset,
10683
        $length = null,
10684
        string $encoding = 'UTF-8'
10685
    ) {
10686 10
        if (\is_array($str) === true) {
10687 1
            $num = \count($str);
10688
10689
            // the replacement
10690 1
            if (\is_array($replacement) === true) {
10691 1
                $replacement = \array_slice($replacement, 0, $num);
10692
            } else {
10693 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10694
            }
10695
10696
            // the offset
10697 1
            if (\is_array($offset) === true) {
10698 1
                $offset = \array_slice($offset, 0, $num);
10699 1
                foreach ($offset as &$valueTmp) {
10700 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10701
                }
10702 1
                unset($valueTmp);
10703
            } else {
10704 1
                $offset = \array_pad([$offset], $num, $offset);
10705
            }
10706
10707
            // the length
10708 1
            if ($length === null) {
10709 1
                $length = \array_fill(0, $num, 0);
10710 1
            } elseif (\is_array($length) === true) {
10711 1
                $length = \array_slice($length, 0, $num);
10712 1
                foreach ($length as &$valueTmpV2) {
10713 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10714
                }
10715 1
                unset($valueTmpV2);
10716
            } else {
10717 1
                $length = \array_pad([$length], $num, $length);
10718
            }
10719
10720
            // recursive call
10721 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10722
        }
10723
10724 10
        if (\is_array($replacement) === true) {
10725 1
            if (\count($replacement) > 0) {
10726 1
                $replacement = $replacement[0];
10727
            } else {
10728 1
                $replacement = '';
10729
            }
10730
        }
10731
10732
        // init
10733 10
        $str = (string) $str;
10734 10
        $replacement = (string) $replacement;
10735
10736 10
        if (\is_array($length) === true) {
10737
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10738
        }
10739
10740 10
        if (\is_array($offset) === true) {
10741
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10742
        }
10743
10744 10
        if ($str === '') {
10745 1
            return $replacement;
10746
        }
10747
10748 9
        if (self::$SUPPORT['mbstring'] === true) {
10749 9
            $string_length = (int) self::strlen($str, $encoding);
10750
10751 9
            if ($offset < 0) {
10752 1
                $offset = (int) \max(0, $string_length + $offset);
10753 9
            } elseif ($offset > $string_length) {
10754 1
                $offset = $string_length;
10755
            }
10756
10757 9
            if ($length !== null && $length < 0) {
10758 1
                $length = (int) \max(0, $string_length - $offset + $length);
10759 9
            } elseif ($length === null || $length > $string_length) {
10760 4
                $length = $string_length;
10761
            }
10762
10763
            /** @noinspection AdditionOperationOnArraysInspection */
10764 9
            if (($offset + $length) > $string_length) {
10765 4
                $length = $string_length - $offset;
10766
            }
10767
10768
            /** @noinspection AdditionOperationOnArraysInspection */
10769 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10770 9
                   $replacement .
10771 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10772
        }
10773
10774
        //
10775
        // fallback for ascii only
10776
        //
10777
10778
        if (self::is_ascii($str)) {
10779
            return ($length === null) ?
10780
                \substr_replace($str, $replacement, $offset) :
10781
                \substr_replace($str, $replacement, $offset, $length);
10782
        }
10783
10784
        //
10785
        // fallback via vanilla php
10786
        //
10787
10788
        \preg_match_all('/./us', $str, $smatches);
10789
        \preg_match_all('/./us', $replacement, $rmatches);
10790
10791
        if ($length === null) {
10792
            $lengthTmp = self::strlen($str, $encoding);
10793
            if ($lengthTmp === false) {
10794
                // e.g.: non mbstring support + invalid chars
10795
                return '';
10796
            }
10797
            $length = (int) $lengthTmp;
10798
        }
10799
10800
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10801
10802
        return \implode('', $smatches[0]);
10803
    }
10804
10805
    /**
10806
     * Removes an suffix ($needle) from end of the string ($haystack).
10807
     *
10808
     * @param string $haystack <p>The string to search in.</p>
10809
     * @param string $needle   <p>The substring to search for.</p>
10810
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10811
     *
10812
     * @return string return the sub-string
10813
     */
10814 2
    public static function substr_right(
10815
        string $haystack,
10816
        string $needle,
10817
        string $encoding = 'UTF-8'
10818
    ): string {
10819 2
        if ($haystack === '') {
10820 2
            return '';
10821
        }
10822
10823 2
        if ($needle === '') {
10824 2
            return $haystack;
10825
        }
10826
10827
        if (
10828 2
            $encoding === 'UTF-8'
10829
            &&
10830 2
            \substr($haystack, -\strlen($needle)) === $needle
10831
        ) {
10832 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10833
        }
10834
10835 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10836
            return (string) self::substr(
10837
                $haystack,
10838
                0,
10839
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10840
                $encoding
10841
            );
10842
        }
10843
10844 2
        return $haystack;
10845
    }
10846
10847
    /**
10848
     * Returns a case swapped version of the string.
10849
     *
10850
     * @param string $str       <p>The input string.</p>
10851
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10852
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10853
     *
10854
     * @return string each character's case swapped
10855
     */
10856 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10857
    {
10858 6
        if ($str === '') {
10859 1
            return '';
10860
        }
10861
10862 6
        if ($cleanUtf8 === true) {
10863
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10864
            // if invalid characters are found in $haystack before $needle
10865 2
            $str = self::clean($str);
10866
        }
10867
10868 6
        if ($encoding === 'UTF-8') {
10869 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10870
        }
10871
10872 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10873
    }
10874
10875
    /**
10876
     * Checks whether symfony-polyfills are used.
10877
     *
10878
     * @return bool
10879
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10880
     */
10881
    public static function symfony_polyfill_used(): bool
10882
    {
10883
        // init
10884
        $return = false;
10885
10886
        $returnTmp = \extension_loaded('mbstring');
10887
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10888
            $return = true;
10889
        }
10890
10891
        $returnTmp = \extension_loaded('iconv');
10892
        if ($returnTmp === false && \function_exists('iconv')) {
10893
            $return = true;
10894
        }
10895
10896
        return $return;
10897
    }
10898
10899
    /**
10900
     * @param string $str
10901
     * @param int    $tabLength
10902
     *
10903
     * @return string
10904
     */
10905 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10906
    {
10907 6
        if ($tabLength === 4) {
10908 3
            $spaces = '    ';
10909 3
        } elseif ($tabLength === 2) {
10910 1
            $spaces = '  ';
10911
        } else {
10912 2
            $spaces = \str_repeat(' ', $tabLength);
10913
        }
10914
10915 6
        return \str_replace("\t", $spaces, $str);
10916
    }
10917
10918
    /**
10919
     * Converts the first character of each word in the string to uppercase
10920
     * and all other chars to lowercase.
10921
     *
10922
     * @param string      $str                   <p>The input string.</p>
10923
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10924
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10925
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10926
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10927
     *
10928
     * @return string string with all characters of $str being title-cased
10929
     */
10930 5
    public static function titlecase(
10931
        string $str,
10932
        string $encoding = 'UTF-8',
10933
        bool $cleanUtf8 = false,
10934
        string $lang = null,
10935
        bool $tryToKeepStringLength = false
10936
    ): string {
10937 5
        if ($cleanUtf8 === true) {
10938
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10939
            // if invalid characters are found in $haystack before $needle
10940
            $str = self::clean($str);
10941
        }
10942
10943 5
        if ($lang === null && $tryToKeepStringLength === false) {
10944 5
            if ($encoding === 'UTF-8') {
10945 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10946
            }
10947
10948 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10949
10950 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10951
        }
10952
10953
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10954
    }
10955
10956
    /**
10957
     * alias for "UTF8::to_ascii()"
10958
     *
10959
     * @see        UTF8::to_ascii()
10960
     *
10961
     * @param string $str
10962
     * @param string $subst_chr
10963
     * @param bool   $strict
10964
     *
10965
     * @return string
10966
     *
10967
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10968
     */
10969 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10970
    {
10971 7
        return self::to_ascii($str, $subst_chr, $strict);
10972
    }
10973
10974
    /**
10975
     * alias for "UTF8::to_iso8859()"
10976
     *
10977
     * @see        UTF8::to_iso8859()
10978
     *
10979
     * @param string|string[] $str
10980
     *
10981
     * @return string|string[]
10982
     *
10983
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10984
     */
10985 2
    public static function toIso8859($str)
10986
    {
10987 2
        return self::to_iso8859($str);
10988
    }
10989
10990
    /**
10991
     * alias for "UTF8::to_latin1()"
10992
     *
10993
     * @see        UTF8::to_latin1()
10994
     *
10995
     * @param string|string[] $str
10996
     *
10997
     * @return string|string[]
10998
     *
10999
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11000
     */
11001 2
    public static function toLatin1($str)
11002
    {
11003 2
        return self::to_latin1($str);
11004
    }
11005
11006
    /**
11007
     * alias for "UTF8::to_utf8()"
11008
     *
11009
     * @see        UTF8::to_utf8()
11010
     *
11011
     * @param string|string[] $str
11012
     *
11013
     * @return string|string[]
11014
     *
11015
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11016
     */
11017 2
    public static function toUTF8($str)
11018
    {
11019 2
        return self::to_utf8($str);
11020
    }
11021
11022
    /**
11023
     * Convert a string into ASCII.
11024
     *
11025
     * @param string $str     <p>The input string.</p>
11026
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11027
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11028
     *                        performance</p>
11029
     *
11030
     * @return string
11031
     */
11032 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11033
    {
11034 38
        static $UTF8_TO_ASCII;
11035
11036 38
        if ($str === '') {
11037 3
            return '';
11038
        }
11039
11040
        // check if we only have ASCII, first (better performance)
11041 35
        if (self::is_ascii($str) === true) {
11042 9
            return $str;
11043
        }
11044
11045 28
        $str = self::clean(
11046 28
            $str,
11047 28
            true,
11048 28
            true,
11049 28
            true,
11050 28
            false,
11051 28
            true,
11052 28
            true
11053
        );
11054
11055
        // check again, if we only have ASCII, now ...
11056 28
        if (self::is_ascii($str) === true) {
11057 10
            return $str;
11058
        }
11059
11060
        if (
11061 19
            $strict === true
11062
            &&
11063 19
            self::$SUPPORT['intl'] === true
11064
        ) {
11065
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11066
            /** @noinspection PhpComposerExtensionStubsInspection */
11067
            /** @noinspection UnnecessaryCastingInspection */
11068 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11069
11070
            // check again, if we only have ASCII, now ...
11071 1
            if (self::is_ascii($str) === true) {
11072 1
                return $str;
11073
            }
11074
        }
11075
11076 19
        if (self::$ORD === null) {
11077
            self::$ORD = self::getData('ord');
11078
        }
11079
11080 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11081 19
        $chars = $ar[0];
11082 19
        $ord = null;
11083 19
        foreach ($chars as &$c) {
11084 19
            $ordC0 = self::$ORD[$c[0]];
11085
11086 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11087 15
                continue;
11088
            }
11089
11090 19
            $ordC1 = self::$ORD[$c[1]];
11091
11092
            // ASCII - next please
11093 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11094 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11095
            }
11096
11097 19
            if ($ordC0 >= 224) {
11098 8
                $ordC2 = self::$ORD[$c[2]];
11099
11100 8
                if ($ordC0 <= 239) {
11101 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11102
                }
11103
11104 8
                if ($ordC0 >= 240) {
11105 2
                    $ordC3 = self::$ORD[$c[3]];
11106
11107 2
                    if ($ordC0 <= 247) {
11108 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11109
                    }
11110
11111 2
                    if ($ordC0 >= 248) {
11112
                        $ordC4 = self::$ORD[$c[4]];
11113
11114
                        if ($ordC0 <= 251) {
11115
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11116
                        }
11117
11118
                        if ($ordC0 >= 252) {
11119
                            $ordC5 = self::$ORD[$c[5]];
11120
11121
                            if ($ordC0 <= 253) {
11122
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11123
                            }
11124
                        }
11125
                    }
11126
                }
11127
            }
11128
11129 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11130
                $c = $unknown;
11131
11132
                continue;
11133
            }
11134
11135 19
            if ($ord === null) {
11136
                $c = $unknown;
11137
11138
                continue;
11139
            }
11140
11141 19
            $bank = $ord >> 8;
11142 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11143 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11144 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11145 2
                    $UTF8_TO_ASCII[$bank] = [];
11146
                }
11147
            }
11148
11149 19
            $newchar = $ord & 255;
11150
11151
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11152 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11153
11154
                // keep for debugging
11155
                /*
11156
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11157
                echo "char: " . $c . "\n";
11158
                echo "ord: " . $ord . "\n";
11159
                echo "newchar: " . $newchar . "\n";
11160
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11161
                echo "bank:" . $bank . "\n\n";
11162
                 */
11163
11164 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11165
            } else {
11166
11167
                // keep for debugging missing chars
11168
                /*
11169
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11170
                echo "char: " . $c . "\n";
11171
                echo "ord: " . $ord . "\n";
11172
                echo "newchar: " . $newchar . "\n";
11173
                echo "bank:" . $bank . "\n\n";
11174
                 */
11175
11176 19
                $c = $unknown;
11177
            }
11178
        }
11179
11180 19
        return \implode('', $chars);
11181
    }
11182
11183
    /**
11184
     * @param mixed $str
11185
     *
11186
     * @return bool
11187
     */
11188 19
    public static function to_boolean($str): bool
11189
    {
11190
        // init
11191 19
        $str = (string) $str;
11192
11193 19
        if ($str === '') {
11194 2
            return false;
11195
        }
11196
11197
        // Info: http://php.net/manual/en/filter.filters.validate.php
11198
        $map = [
11199 17
            'true'  => true,
11200
            '1'     => true,
11201
            'on'    => true,
11202
            'yes'   => true,
11203
            'false' => false,
11204
            '0'     => false,
11205
            'off'   => false,
11206
            'no'    => false,
11207
        ];
11208
11209 17
        if (isset($map[$str])) {
11210 11
            return $map[$str];
11211
        }
11212
11213 6
        $key = \strtolower($str);
11214 6
        if (isset($map[$key])) {
11215 2
            return $map[$key];
11216
        }
11217
11218 4
        if (\is_numeric($str)) {
11219 2
            return ((float) $str + 0) > 0;
11220
        }
11221
11222 2
        return (bool) \trim($str);
11223
    }
11224
11225
    /**
11226
     * Convert given string to safe filename (and keep string case).
11227
     *
11228
     * @param string $string
11229
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11230
     *                                  simply replaced with hyphen.
11231
     * @param string $fallback_char
11232
     *
11233
     * @return string
11234
     */
11235 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11236
    {
11237 1
        if ($use_transliterate === true) {
11238 1
            $string = self::str_transliterate($string, $fallback_char);
11239
        }
11240
11241 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11242
11243 1
        $string = (string) \preg_replace(
11244
            [
11245 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11246 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11247 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11248
            ],
11249
            [
11250 1
                '',
11251 1
                $fallback_char,
11252 1
                $fallback_char,
11253
            ],
11254 1
            $string
11255
        );
11256
11257
        // trim "$fallback_char" from beginning and end of the string
11258 1
        return \trim($string, $fallback_char);
11259
    }
11260
11261
    /**
11262
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11263
     *
11264
     * @param string|string[] $str
11265
     *
11266
     * @return string|string[]
11267
     */
11268 8
    public static function to_iso8859($str)
11269
    {
11270 8
        if (\is_array($str) === true) {
11271 2
            foreach ($str as $k => &$v) {
11272 2
                $v = self::to_iso8859($v);
11273
            }
11274
11275 2
            return $str;
11276
        }
11277
11278 8
        $str = (string) $str;
11279 8
        if ($str === '') {
11280 2
            return '';
11281
        }
11282
11283 8
        return self::utf8_decode($str);
11284
    }
11285
11286
    /**
11287
     * alias for "UTF8::to_iso8859()"
11288
     *
11289
     * @see UTF8::to_iso8859()
11290
     *
11291
     * @param string|string[] $str
11292
     *
11293
     * @return string|string[]
11294
     */
11295 2
    public static function to_latin1($str)
11296
    {
11297 2
        return self::to_iso8859($str);
11298
    }
11299
11300
    /**
11301
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11302
     *
11303
     * <ul>
11304
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11305
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11306
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11307
     * case.</li>
11308
     * </ul>
11309
     *
11310
     * @param string|string[] $str                    <p>Any string or array.</p>
11311
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11312
     *
11313
     * @return string|string[] the UTF-8 encoded string
11314
     */
11315 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11316
    {
11317 37
        if (\is_array($str) === true) {
11318 4
            foreach ($str as $k => &$v) {
11319 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11320
            }
11321
11322 4
            return $str;
11323
        }
11324
11325 37
        $str = (string) $str;
11326 37
        if ($str === '') {
11327 6
            return $str;
11328
        }
11329
11330 37
        $max = \strlen($str);
11331 37
        $buf = '';
11332
11333 37
        for ($i = 0; $i < $max; ++$i) {
11334 37
            $c1 = $str[$i];
11335
11336 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11337
11338 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11339
11340 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11341
11342 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11343 16
                        $buf .= $c1 . $c2;
11344 16
                        ++$i;
11345
                    } else { // not valid UTF8 - convert it
11346 30
                        $buf .= self::to_utf8_convert_helper($c1);
11347
                    }
11348 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11349
11350 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11351 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11352
11353 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11354 14
                        $buf .= $c1 . $c2 . $c3;
11355 14
                        $i += 2;
11356
                    } else { // not valid UTF8 - convert it
11357 32
                        $buf .= self::to_utf8_convert_helper($c1);
11358
                    }
11359 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11360
11361 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11362 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11363 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11364
11365 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11366 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11367 8
                        $i += 3;
11368
                    } else { // not valid UTF8 - convert it
11369 25
                        $buf .= self::to_utf8_convert_helper($c1);
11370
                    }
11371
                } else { // doesn't look like UTF8, but should be converted
11372
11373 33
                    $buf .= self::to_utf8_convert_helper($c1);
11374
                }
11375 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11376
11377 3
                $buf .= self::to_utf8_convert_helper($c1);
11378
            } else { // it doesn't need conversion
11379
11380 34
                $buf .= $c1;
11381
            }
11382
        }
11383
11384
        // decode unicode escape sequences + unicode surrogate pairs
11385 37
        $buf = \preg_replace_callback(
11386 37
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11387
            /**
11388
             * @param array $matches
11389
             *
11390
             * @return string
11391
             */
11392
            static function (array $matches): string {
11393 9
                if (isset($matches[3])) {
11394 9
                    $cp = (int) \hexdec($matches[3]);
11395
                } else {
11396
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11397
                    $cp = ((int) \hexdec($matches[1]) << 10)
11398
                          + (int) \hexdec($matches[2])
11399
                          + 0x10000
11400
                          - (0xD800 << 10)
11401
                          - 0xDC00;
11402
                }
11403
11404
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11405
                //
11406
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11407
11408 9
                if ($cp < 0x80) {
11409 7
                    return (string) self::chr($cp);
11410
                }
11411
11412 6
                if ($cp < 0xA0) {
11413
                    /** @noinspection UnnecessaryCastingInspection */
11414
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11415
                }
11416
11417 6
                return self::decimal_to_chr($cp);
11418 37
            },
11419 37
            $buf
11420
        );
11421
11422 37
        if ($buf === null) {
11423
            return '';
11424
        }
11425
11426
        // decode UTF-8 codepoints
11427 37
        if ($decodeHtmlEntityToUtf8 === true) {
11428 2
            $buf = self::html_entity_decode($buf);
11429
        }
11430
11431 37
        return $buf;
11432
    }
11433
11434
    /**
11435
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11436
     *
11437
     * INFO: This is slower then "trim()"
11438
     *
11439
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11440
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11441
     *
11442
     * @param string      $str   <p>The string to be trimmed</p>
11443
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11444
     *
11445
     * @return string the trimmed string
11446
     */
11447 55
    public static function trim(string $str = '', string $chars = null): string
11448
    {
11449 55
        if ($str === '') {
11450 9
            return '';
11451
        }
11452
11453 48
        if ($chars) {
11454 27
            $chars = \preg_quote($chars, '/');
11455 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11456
        } else {
11457 21
            $pattern = "^[\s]+|[\s]+\$";
11458
        }
11459
11460 48
        if (self::$SUPPORT['mbstring'] === true) {
11461
            /** @noinspection PhpComposerExtensionStubsInspection */
11462 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11463
        }
11464
11465 8
        return self::regex_replace($str, $pattern, '', '', '/');
11466
    }
11467
11468
    /**
11469
     * Makes string's first char uppercase.
11470
     *
11471
     * @param string      $str                   <p>The input string.</p>
11472
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11473
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11474
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11475
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11476
     *
11477
     * @return string the resulting string
11478
     */
11479 69
    public static function ucfirst(
11480
        string $str,
11481
        string $encoding = 'UTF-8',
11482
        bool $cleanUtf8 = false,
11483
        string $lang = null,
11484
        bool $tryToKeepStringLength = false
11485
    ): string {
11486 69
        if ($str === '') {
11487 3
            return '';
11488
        }
11489
11490 68
        if ($cleanUtf8 === true) {
11491
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11492
            // if invalid characters are found in $haystack before $needle
11493 1
            $str = self::clean($str);
11494
        }
11495
11496 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11497
11498 68
        if ($encoding === 'UTF-8') {
11499 22
            $strPartTwo = (string) \mb_substr($str, 1);
11500
11501 22
            if ($useMbFunction === true) {
11502 22
                $strPartOne = \mb_strtoupper(
11503 22
                    (string) \mb_substr($str, 0, 1)
11504
                );
11505
            } else {
11506
                $strPartOne = self::strtoupper(
11507
                    (string) \mb_substr($str, 0, 1),
11508
                    $encoding,
11509
                    false,
11510
                    $lang,
11511 22
                    $tryToKeepStringLength
11512
                );
11513
            }
11514
        } else {
11515 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11516
11517 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11518
11519 47
            if ($useMbFunction === true) {
11520 47
                $strPartOne = \mb_strtoupper(
11521 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11522 47
                    $encoding
11523
                );
11524
            } else {
11525
                $strPartOne = self::strtoupper(
11526
                    (string) self::substr($str, 0, 1, $encoding),
11527
                    $encoding,
11528
                    false,
11529
                    $lang,
11530
                    $tryToKeepStringLength
11531
                );
11532
            }
11533
        }
11534
11535 68
        return $strPartOne . $strPartTwo;
11536
    }
11537
11538
    /**
11539
     * alias for "UTF8::ucfirst()"
11540
     *
11541
     * @see UTF8::ucfirst()
11542
     *
11543
     * @param string $str
11544
     * @param string $encoding
11545
     * @param bool   $cleanUtf8
11546
     *
11547
     * @return string
11548
     */
11549 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11550
    {
11551 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11552
    }
11553
11554
    /**
11555
     * Uppercase for all words in the string.
11556
     *
11557
     * @param string   $str        <p>The input string.</p>
11558
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11559
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11560
     *                             word.</p>
11561
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11562
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11563
     *
11564
     * @return string
11565
     */
11566 8
    public static function ucwords(
11567
        string $str,
11568
        array $exceptions = [],
11569
        string $charlist = '',
11570
        string $encoding = 'UTF-8',
11571
        bool $cleanUtf8 = false
11572
    ): string {
11573 8
        if (!$str) {
11574 2
            return '';
11575
        }
11576
11577
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11578
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11579
11580 7
        if ($cleanUtf8 === true) {
11581
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11582
            // if invalid characters are found in $haystack before $needle
11583 1
            $str = self::clean($str);
11584
        }
11585
11586 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11587
11588
        if (
11589 7
            $usePhpDefaultFunctions === true
11590
            &&
11591 7
            self::is_ascii($str) === true
11592
        ) {
11593
            return \ucwords($str);
11594
        }
11595
11596 7
        $words = self::str_to_words($str, $charlist);
11597 7
        $useExceptions = \count($exceptions) > 0;
11598
11599 7
        foreach ($words as &$word) {
11600 7
            if (!$word) {
11601 7
                continue;
11602
            }
11603
11604
            if (
11605 7
                $useExceptions === false
11606
                ||
11607 7
                !\in_array($word, $exceptions, true)
11608
            ) {
11609 7
                $word = self::ucfirst($word, $encoding);
11610
            }
11611
        }
11612
11613 7
        return \implode('', $words);
11614
    }
11615
11616
    /**
11617
     * Multi decode html entity & fix urlencoded-win1252-chars.
11618
     *
11619
     * e.g:
11620
     * 'test+test'                     => 'test test'
11621
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11622
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11623
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11624
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11625
     * 'Düsseldorf'                   => 'Düsseldorf'
11626
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11627
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11628
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11629
     *
11630
     * @param string $str          <p>The input string.</p>
11631
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11632
     *
11633
     * @return string
11634
     */
11635 2
    public static function urldecode(string $str, bool $multi_decode = true): string
11636
    {
11637 2
        if ($str === '') {
11638 2
            return '';
11639
        }
11640
11641
        if (
11642 2
            \strpos($str, '&') === false
11643
            &&
11644 2
            \strpos($str, '%') === false
11645
            &&
11646 2
            \strpos($str, '+') === false
11647
            &&
11648 2
            \strpos($str, '\u') === false
11649
        ) {
11650 2
            return self::fix_simple_utf8($str);
11651
        }
11652
11653 2
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11654 2
        if (\preg_match($pattern, $str)) {
11655 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11656
        }
11657
11658 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11659
11660 2
        if ($multi_decode === true) {
11661
            do {
11662 2
                $str_compare = $str;
11663
11664
                /**
11665
                 * @psalm-suppress PossiblyInvalidArgument
11666
                 */
11667 2
                $str = self::fix_simple_utf8(
11668 2
                    \urldecode(
11669 2
                        self::html_entity_decode(
11670 2
                            self::to_utf8($str),
11671 2
                            $flags
11672
                        )
11673
                    )
11674
                );
11675 2
            } while ($str_compare !== $str);
11676
        }
11677
11678 2
        return $str;
11679
    }
11680
11681
    /**
11682
     * Return a array with "urlencoded"-win1252 -> UTF-8
11683
     *
11684
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11685
     *
11686
     * @return string[]
11687
     */
11688 2
    public static function urldecode_fix_win1252_chars(): array
11689
    {
11690
        return [
11691 2
            '%20' => ' ',
11692
            '%21' => '!',
11693
            '%22' => '"',
11694
            '%23' => '#',
11695
            '%24' => '$',
11696
            '%25' => '%',
11697
            '%26' => '&',
11698
            '%27' => "'",
11699
            '%28' => '(',
11700
            '%29' => ')',
11701
            '%2A' => '*',
11702
            '%2B' => '+',
11703
            '%2C' => ',',
11704
            '%2D' => '-',
11705
            '%2E' => '.',
11706
            '%2F' => '/',
11707
            '%30' => '0',
11708
            '%31' => '1',
11709
            '%32' => '2',
11710
            '%33' => '3',
11711
            '%34' => '4',
11712
            '%35' => '5',
11713
            '%36' => '6',
11714
            '%37' => '7',
11715
            '%38' => '8',
11716
            '%39' => '9',
11717
            '%3A' => ':',
11718
            '%3B' => ';',
11719
            '%3C' => '<',
11720
            '%3D' => '=',
11721
            '%3E' => '>',
11722
            '%3F' => '?',
11723
            '%40' => '@',
11724
            '%41' => 'A',
11725
            '%42' => 'B',
11726
            '%43' => 'C',
11727
            '%44' => 'D',
11728
            '%45' => 'E',
11729
            '%46' => 'F',
11730
            '%47' => 'G',
11731
            '%48' => 'H',
11732
            '%49' => 'I',
11733
            '%4A' => 'J',
11734
            '%4B' => 'K',
11735
            '%4C' => 'L',
11736
            '%4D' => 'M',
11737
            '%4E' => 'N',
11738
            '%4F' => 'O',
11739
            '%50' => 'P',
11740
            '%51' => 'Q',
11741
            '%52' => 'R',
11742
            '%53' => 'S',
11743
            '%54' => 'T',
11744
            '%55' => 'U',
11745
            '%56' => 'V',
11746
            '%57' => 'W',
11747
            '%58' => 'X',
11748
            '%59' => 'Y',
11749
            '%5A' => 'Z',
11750
            '%5B' => '[',
11751
            '%5C' => '\\',
11752
            '%5D' => ']',
11753
            '%5E' => '^',
11754
            '%5F' => '_',
11755
            '%60' => '`',
11756
            '%61' => 'a',
11757
            '%62' => 'b',
11758
            '%63' => 'c',
11759
            '%64' => 'd',
11760
            '%65' => 'e',
11761
            '%66' => 'f',
11762
            '%67' => 'g',
11763
            '%68' => 'h',
11764
            '%69' => 'i',
11765
            '%6A' => 'j',
11766
            '%6B' => 'k',
11767
            '%6C' => 'l',
11768
            '%6D' => 'm',
11769
            '%6E' => 'n',
11770
            '%6F' => 'o',
11771
            '%70' => 'p',
11772
            '%71' => 'q',
11773
            '%72' => 'r',
11774
            '%73' => 's',
11775
            '%74' => 't',
11776
            '%75' => 'u',
11777
            '%76' => 'v',
11778
            '%77' => 'w',
11779
            '%78' => 'x',
11780
            '%79' => 'y',
11781
            '%7A' => 'z',
11782
            '%7B' => '{',
11783
            '%7C' => '|',
11784
            '%7D' => '}',
11785
            '%7E' => '~',
11786
            '%7F' => '',
11787
            '%80' => '`',
11788
            '%81' => '',
11789
            '%82' => '‚',
11790
            '%83' => 'ƒ',
11791
            '%84' => '„',
11792
            '%85' => '…',
11793
            '%86' => '†',
11794
            '%87' => '‡',
11795
            '%88' => 'ˆ',
11796
            '%89' => '‰',
11797
            '%8A' => 'Š',
11798
            '%8B' => '‹',
11799
            '%8C' => 'Œ',
11800
            '%8D' => '',
11801
            '%8E' => 'Ž',
11802
            '%8F' => '',
11803
            '%90' => '',
11804
            '%91' => '‘',
11805
            '%92' => '’',
11806
            '%93' => '“',
11807
            '%94' => '”',
11808
            '%95' => '•',
11809
            '%96' => '–',
11810
            '%97' => '—',
11811
            '%98' => '˜',
11812
            '%99' => '™',
11813
            '%9A' => 'š',
11814
            '%9B' => '›',
11815
            '%9C' => 'œ',
11816
            '%9D' => '',
11817
            '%9E' => 'ž',
11818
            '%9F' => 'Ÿ',
11819
            '%A0' => '',
11820
            '%A1' => '¡',
11821
            '%A2' => '¢',
11822
            '%A3' => '£',
11823
            '%A4' => '¤',
11824
            '%A5' => '¥',
11825
            '%A6' => '¦',
11826
            '%A7' => '§',
11827
            '%A8' => '¨',
11828
            '%A9' => '©',
11829
            '%AA' => 'ª',
11830
            '%AB' => '«',
11831
            '%AC' => '¬',
11832
            '%AD' => '',
11833
            '%AE' => '®',
11834
            '%AF' => '¯',
11835
            '%B0' => '°',
11836
            '%B1' => '±',
11837
            '%B2' => '²',
11838
            '%B3' => '³',
11839
            '%B4' => '´',
11840
            '%B5' => 'µ',
11841
            '%B6' => '¶',
11842
            '%B7' => '·',
11843
            '%B8' => '¸',
11844
            '%B9' => '¹',
11845
            '%BA' => 'º',
11846
            '%BB' => '»',
11847
            '%BC' => '¼',
11848
            '%BD' => '½',
11849
            '%BE' => '¾',
11850
            '%BF' => '¿',
11851
            '%C0' => 'À',
11852
            '%C1' => 'Á',
11853
            '%C2' => 'Â',
11854
            '%C3' => 'Ã',
11855
            '%C4' => 'Ä',
11856
            '%C5' => 'Å',
11857
            '%C6' => 'Æ',
11858
            '%C7' => 'Ç',
11859
            '%C8' => 'È',
11860
            '%C9' => 'É',
11861
            '%CA' => 'Ê',
11862
            '%CB' => 'Ë',
11863
            '%CC' => 'Ì',
11864
            '%CD' => 'Í',
11865
            '%CE' => 'Î',
11866
            '%CF' => 'Ï',
11867
            '%D0' => 'Ð',
11868
            '%D1' => 'Ñ',
11869
            '%D2' => 'Ò',
11870
            '%D3' => 'Ó',
11871
            '%D4' => 'Ô',
11872
            '%D5' => 'Õ',
11873
            '%D6' => 'Ö',
11874
            '%D7' => '×',
11875
            '%D8' => 'Ø',
11876
            '%D9' => 'Ù',
11877
            '%DA' => 'Ú',
11878
            '%DB' => 'Û',
11879
            '%DC' => 'Ü',
11880
            '%DD' => 'Ý',
11881
            '%DE' => 'Þ',
11882
            '%DF' => 'ß',
11883
            '%E0' => 'à',
11884
            '%E1' => 'á',
11885
            '%E2' => 'â',
11886
            '%E3' => 'ã',
11887
            '%E4' => 'ä',
11888
            '%E5' => 'å',
11889
            '%E6' => 'æ',
11890
            '%E7' => 'ç',
11891
            '%E8' => 'è',
11892
            '%E9' => 'é',
11893
            '%EA' => 'ê',
11894
            '%EB' => 'ë',
11895
            '%EC' => 'ì',
11896
            '%ED' => 'í',
11897
            '%EE' => 'î',
11898
            '%EF' => 'ï',
11899
            '%F0' => 'ð',
11900
            '%F1' => 'ñ',
11901
            '%F2' => 'ò',
11902
            '%F3' => 'ó',
11903
            '%F4' => 'ô',
11904
            '%F5' => 'õ',
11905
            '%F6' => 'ö',
11906
            '%F7' => '÷',
11907
            '%F8' => 'ø',
11908
            '%F9' => 'ù',
11909
            '%FA' => 'ú',
11910
            '%FB' => 'û',
11911
            '%FC' => 'ü',
11912
            '%FD' => 'ý',
11913
            '%FE' => 'þ',
11914
            '%FF' => 'ÿ',
11915
        ];
11916
    }
11917
11918
    /**
11919
     * Decodes an UTF-8 string to ISO-8859-1.
11920
     *
11921
     * @param string $str           <p>The input string.</p>
11922
     * @param bool   $keepUtf8Chars
11923
     *
11924
     * @return string
11925
     */
11926 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11927
    {
11928 14
        if ($str === '') {
11929 5
            return '';
11930
        }
11931
11932 14
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11933 14
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11934
11935 14
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11936 1
            if (self::$WIN1252_TO_UTF8 === null) {
11937
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11938
            }
11939
11940 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11941 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11942
        }
11943
11944 14
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11945
11946
        // save for later comparision
11947 14
        $str_backup = $str;
11948 14
        $len = \strlen($str);
11949
11950 14
        if (self::$ORD === null) {
11951
            self::$ORD = self::getData('ord');
11952
        }
11953
11954 14
        if (self::$CHR === null) {
11955
            self::$CHR = self::getData('chr');
11956
        }
11957
11958 14
        $noCharFound = '?';
11959
        /** @noinspection ForeachInvariantsInspection */
11960 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11961 14
            switch ($str[$i] & "\xF0") {
11962 14
                case "\xC0":
11963 13
                case "\xD0":
11964 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11965 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11966
11967 13
                    break;
11968
11969
                /** @noinspection PhpMissingBreakStatementInspection */
11970 13
                case "\xF0":
11971
                    ++$i;
11972
11973
                // no break
11974
11975 13
                case "\xE0":
11976 11
                    $str[$j] = $noCharFound;
11977 11
                    $i += 2;
11978
11979 11
                    break;
11980
11981
                default:
11982 12
                    $str[$j] = $str[$i];
11983
            }
11984
        }
11985
11986 14
        $return = \substr($str, 0, $j);
11987 14
        if ($return === false) {
11988
            $return = '';
11989
        }
11990
11991
        if (
11992 14
            $keepUtf8Chars === true
11993
            &&
11994 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11995
        ) {
11996 2
            return $str_backup;
11997
        }
11998
11999 14
        return $return;
12000
    }
12001
12002
    /**
12003
     * Encodes an ISO-8859-1 string to UTF-8.
12004
     *
12005
     * @param string $str <p>The input string.</p>
12006
     *
12007
     * @return string
12008
     */
12009 14
    public static function utf8_encode(string $str): string
12010
    {
12011 14
        if ($str === '') {
12012 13
            return '';
12013
        }
12014
12015 14
        $str = \utf8_encode($str);
12016
12017
        // the polyfill maybe return false
12018
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12019
        /** @psalm-suppress TypeDoesNotContainType */
12020 14
        if ($str === false) {
12021
            return '';
12022
        }
12023
12024 14
        if (\strpos($str, "\xC2") === false) {
12025 6
            return $str;
12026
        }
12027
12028 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
12029 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
12030
12031 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
12032 1
            if (self::$WIN1252_TO_UTF8 === null) {
12033
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12034
            }
12035
12036 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
12037 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
12038
        }
12039
12040 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
12041
    }
12042
12043
    /**
12044
     * fix -> utf8-win1252 chars
12045
     *
12046
     * @param string $str <p>The input string.</p>
12047
     *
12048
     * @return string
12049
     *
12050
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12051
     */
12052 2
    public static function utf8_fix_win1252_chars(string $str): string
12053
    {
12054 2
        return self::fix_simple_utf8($str);
12055
    }
12056
12057
    /**
12058
     * Returns an array with all utf8 whitespace characters.
12059
     *
12060
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12061
     *
12062
     * @author: Derek E. [email protected]
12063
     *
12064
     * @return string[]
12065
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12066
     *                  as defined in above URL
12067
     */
12068 2
    public static function whitespace_table(): array
12069
    {
12070 2
        return self::$WHITESPACE_TABLE;
12071
    }
12072
12073
    /**
12074
     * Limit the number of words in a string.
12075
     *
12076
     * @param string $str      <p>The input string.</p>
12077
     * @param int    $limit    <p>The limit of words as integer.</p>
12078
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12079
     *
12080
     * @return string
12081
     */
12082 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12083
    {
12084 2
        if ($str === '' || $limit < 1) {
12085 2
            return '';
12086
        }
12087
12088 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12089
12090
        if (
12091 2
            !isset($matches[0])
12092
            ||
12093 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12094
        ) {
12095 2
            return $str;
12096
        }
12097
12098 2
        return \rtrim($matches[0]) . $strAddOn;
12099
    }
12100
12101
    /**
12102
     * Wraps a string to a given number of characters
12103
     *
12104
     * @see  http://php.net/manual/en/function.wordwrap.php
12105
     *
12106
     * @param string $str   <p>The input string.</p>
12107
     * @param int    $width [optional] <p>The column width.</p>
12108
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12109
     * @param bool   $cut   [optional] <p>
12110
     *                      If the cut is set to true, the string is
12111
     *                      always wrapped at or before the specified width. So if you have
12112
     *                      a word that is larger than the given width, it is broken apart.
12113
     *                      </p>
12114
     *
12115
     * @return string
12116
     *                <p>The given string wrapped at the specified column.</p>
12117
     */
12118 10
    public static function wordwrap(
12119
        string $str,
12120
        int $width = 75,
12121
        string $break = "\n",
12122
        bool $cut = false
12123
    ): string {
12124 10
        if ($str === '' || $break === '') {
12125 3
            return '';
12126
        }
12127
12128 8
        $strSplit = \explode($break, $str);
12129 8
        if ($strSplit === false) {
12130
            return '';
12131
        }
12132
12133 8
        $chars = [];
12134 8
        $wordSplit = '';
12135 8
        foreach ($strSplit as $i => $iValue) {
12136 8
            if ($i) {
12137 1
                $chars[] = $break;
12138 1
                $wordSplit .= '#';
12139
            }
12140
12141 8
            foreach (self::str_split($iValue) as $c) {
12142 8
                $chars[] = $c;
12143 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12144
            }
12145
        }
12146
12147 8
        $strReturn = '';
12148 8
        $j = 0;
12149 8
        $b = $i = -1;
12150 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12151
12152 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12153 6
            for (++$i; $i < $b; ++$i) {
12154 6
                $strReturn .= $chars[$j];
12155 6
                unset($chars[$j++]);
12156
            }
12157
12158
            if (
12159 6
                $break === $chars[$j]
12160
                ||
12161 6
                $chars[$j] === ' '
12162
            ) {
12163 3
                unset($chars[$j++]);
12164
            }
12165
12166 6
            $strReturn .= $break;
12167
        }
12168
12169 8
        return $strReturn . \implode('', $chars);
12170
    }
12171
12172
    /**
12173
     * Line-Wrap the string after $limit, but also after the next word.
12174
     *
12175
     * @param string $str
12176
     * @param int    $limit
12177
     *
12178
     * @return string
12179
     */
12180 1
    public static function wordwrap_per_line(string $str, int $limit): string
12181
    {
12182 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12183
12184 1
        $string = '';
12185 1
        foreach ($strings as &$value) {
12186 1
            if ($value === false) {
12187
                continue;
12188
            }
12189
12190 1
            $string .= \wordwrap($value, $limit);
12191 1
            $string .= "\n";
12192
        }
12193
12194 1
        return $string;
12195
    }
12196
12197
    /**
12198
     * Returns an array of Unicode White Space characters.
12199
     *
12200
     * @return string[] an array with numeric code point as key and White Space Character as value
12201
     */
12202 2
    public static function ws(): array
12203
    {
12204 2
        return self::$WHITESPACE;
12205
    }
12206
12207
    /**
12208
     * @return void
12209
     */
12210 9
    private static function initEmojiData()
12211
    {
12212 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12213 1
            if (self::$EMOJI === null) {
12214 1
                self::$EMOJI = self::getData('emoji');
12215
            }
12216
12217 1
            \uksort(
12218
                self::$EMOJI,
12219
                static function (string $a, string $b): int {
12220 1
                    return \strlen($b) <=> \strlen($a);
12221 1
                }
12222
            );
12223
12224 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12225 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12226
12227 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12228 1
                $tmpKey = \crc32($key);
12229 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12230
            }
12231
        }
12232 9
    }
12233
12234
    /**
12235
     * @param string $str
12236
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12237
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12238
     *
12239
     * @return string
12240
     */
12241 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12242
    {
12243 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12244 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12245
12246 33
        if ($useLower === true) {
12247 2
            $str = \str_replace(
12248 2
                $upper,
12249 2
                $lower,
12250 2
                $str
12251
            );
12252
        } else {
12253 31
            $str = \str_replace(
12254 31
                $lower,
12255 31
                $upper,
12256 31
                $str
12257
            );
12258
        }
12259
12260 33
        if ($fullCaseFold) {
12261 31
            static $FULL_CASE_FOLD = null;
12262 31
            if ($FULL_CASE_FOLD === null) {
12263 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12264
            }
12265
12266 31
            if ($useLower === true) {
12267 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12268
            } else {
12269 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12270
            }
12271
        }
12272
12273 33
        return $str;
12274
    }
12275
12276
    /**
12277
     * get data from "/data/*.php"
12278
     *
12279
     * @param string $file
12280
     *
12281
     * @return mixed
12282
     */
12283 5
    private static function getData(string $file)
12284
    {
12285
        /** @noinspection PhpIncludeInspection */
12286
        /** @noinspection UsingInclusionReturnValueInspection */
12287
        /** @psalm-suppress UnresolvableInclude */
12288 5
        return include __DIR__ . '/data/' . $file . '.php';
12289
    }
12290
12291
    /**
12292
     * get data from "/data/*.php"
12293
     *
12294
     * @param string $file
12295
     *
12296
     * @return false|mixed will return false on error
12297
     */
12298 9
    private static function getDataIfExists(string $file)
12299
    {
12300 9
        $file = __DIR__ . '/data/' . $file . '.php';
12301 9
        if (\file_exists($file)) {
12302
            /** @noinspection PhpIncludeInspection */
12303
            /** @noinspection UsingInclusionReturnValueInspection */
12304 8
            return include $file;
12305
        }
12306
12307 2
        return false;
12308
    }
12309
12310
    /**
12311
     * Checks whether mbstring "overloaded" is active on the server.
12312
     *
12313
     * @return bool
12314
     */
12315
    private static function mbstring_overloaded(): bool
12316
    {
12317
        /**
12318
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12319
         */
12320
12321
        /** @noinspection PhpComposerExtensionStubsInspection */
12322
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12323
        return \defined('MB_OVERLOAD_STRING')
12324
               &&
12325
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12326
    }
12327
12328
    /**
12329
     * @param array $strings
12330
     * @param bool  $removeEmptyValues
12331
     * @param int   $removeShortValues
12332
     *
12333
     * @return array
12334
     */
12335 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12336
    {
12337
        // init
12338 2
        $return = [];
12339
12340 2
        foreach ($strings as &$str) {
12341
            if (
12342 2
                $removeShortValues !== null
12343
                &&
12344 2
                \mb_strlen($str) <= $removeShortValues
12345
            ) {
12346 2
                continue;
12347
            }
12348
12349
            if (
12350 2
                $removeEmptyValues === true
12351
                &&
12352 2
                \trim($str) === ''
12353
            ) {
12354 2
                continue;
12355
            }
12356
12357 2
            $return[] = $str;
12358
        }
12359
12360 2
        return $return;
12361
    }
12362
12363
    /**
12364
     * rxClass
12365
     *
12366
     * @param string $s
12367
     * @param string $class
12368
     *
12369
     * @return string
12370
     */
12371 33
    private static function rxClass(string $s, string $class = ''): string
12372
    {
12373 33
        static $RX_CLASS_CACHE = [];
12374
12375 33
        $cacheKey = $s . $class;
12376
12377 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12378 21
            return $RX_CLASS_CACHE[$cacheKey];
12379
        }
12380
12381 16
        $classArray = [$class];
12382
12383
        /** @noinspection SuspiciousLoopInspection */
12384
        /** @noinspection AlterInForeachInspection */
12385 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12386 15
            if ($s === '-') {
12387
                $classArray[0] = '-' . $classArray[0];
12388 15
            } elseif (!isset($s[2])) {
12389 15
                $classArray[0] .= \preg_quote($s, '/');
12390 1
            } elseif (self::strlen($s) === 1) {
12391 1
                $classArray[0] .= $s;
12392
            } else {
12393 15
                $classArray[] = $s;
12394
            }
12395
        }
12396
12397 16
        if ($classArray[0]) {
12398 16
            $classArray[0] = '[' . $classArray[0] . ']';
12399
        }
12400
12401 16
        if (\count($classArray) === 1) {
12402 16
            $return = $classArray[0];
12403
        } else {
12404
            $return = '(?:' . \implode('|', $classArray) . ')';
12405
        }
12406
12407 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12408
12409 16
        return $return;
12410
    }
12411
12412
    /**
12413
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12414
     *
12415
     * @param string $names
12416
     * @param string $delimiter
12417
     * @param string $encoding
12418
     *
12419
     * @return string
12420
     */
12421 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12422
    {
12423
        // init
12424 1
        $namesArray = \explode($delimiter, $names);
12425
12426 1
        if ($namesArray === false) {
12427
            return '';
12428
        }
12429
12430
        $specialCases = [
12431 1
            'names' => [
12432
                'ab',
12433
                'af',
12434
                'al',
12435
                'and',
12436
                'ap',
12437
                'bint',
12438
                'binte',
12439
                'da',
12440
                'de',
12441
                'del',
12442
                'den',
12443
                'der',
12444
                'di',
12445
                'dit',
12446
                'ibn',
12447
                'la',
12448
                'mac',
12449
                'nic',
12450
                'of',
12451
                'ter',
12452
                'the',
12453
                'und',
12454
                'van',
12455
                'von',
12456
                'y',
12457
                'zu',
12458
            ],
12459
            'prefixes' => [
12460
                'al-',
12461
                "d'",
12462
                'ff',
12463
                "l'",
12464
                'mac',
12465
                'mc',
12466
                'nic',
12467
            ],
12468
        ];
12469
12470 1
        foreach ($namesArray as &$name) {
12471 1
            if (\in_array($name, $specialCases['names'], true)) {
12472 1
                continue;
12473
            }
12474
12475 1
            $continue = false;
12476
12477 1
            if ($delimiter === '-') {
12478 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12479 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12480 1
                        $continue = true;
12481
                    }
12482
                }
12483 1
                unset($beginning);
12484
            }
12485
12486 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12487 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12488 1
                    $continue = true;
12489
                }
12490
            }
12491 1
            unset($beginning);
12492
12493 1
            if ($continue === true) {
12494 1
                continue;
12495
            }
12496
12497 1
            $name = self::ucfirst($name);
12498
        }
12499
12500 1
        return \implode($delimiter, $namesArray);
12501
    }
12502
12503
    /**
12504
     * Generic case sensitive transformation for collation matching.
12505
     *
12506
     * @param string $str <p>The input string</p>
12507
     *
12508
     * @return string|null
12509
     */
12510 6
    private static function strtonatfold(string $str)
12511
    {
12512 6
        return \preg_replace(
12513 6
            '/\p{Mn}+/u',
12514 6
            '',
12515 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12516
        );
12517
    }
12518
12519
    /**
12520
     * @param int|string $input
12521
     *
12522
     * @return string
12523
     */
12524 29
    private static function to_utf8_convert_helper($input): string
12525
    {
12526
        // init
12527 29
        $buf = '';
12528
12529 29
        if (self::$ORD === null) {
12530 1
            self::$ORD = self::getData('ord');
12531
        }
12532
12533 29
        if (self::$CHR === null) {
12534 1
            self::$CHR = self::getData('chr');
12535
        }
12536
12537 29
        if (self::$WIN1252_TO_UTF8 === null) {
12538 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12539
        }
12540
12541 29
        $ordC1 = self::$ORD[$input];
12542 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12543 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12544
        } else {
12545 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12546 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12547 1
            $buf .= $cc1 . $cc2;
12548
        }
12549
12550 29
        return $buf;
12551
    }
12552
}
12553