Passed
Push — master ( 158cf2...6eb254 )
by Lars
03:36
created

UTF8::strpos()   F

Complexity

Conditions 27
Paths 907

Size

Total Lines 131
Code Lines 58

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 40
CRAP Score 31.5191

Importance

Changes 0
Metric Value
cc 27
eloc 58
nc 907
nop 5
dl 0
loc 131
ccs 40
cts 49
cp 0.8163
crap 31.5191
rs 0.1291
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $CHR;
214
215
    /**
216
     * __construct()
217
     */
218 32
    public function __construct()
219
    {
220 32
    }
221
222
    /**
223
     * Return the character at the specified position: $str[1] like functionality.
224
     *
225
     * @param string $str      <p>A UTF-8 string.</p>
226
     * @param int    $pos      <p>The position of character to return.</p>
227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
228
     *
229
     * @return string single multi-byte character
230
     */
231 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
232
    {
233 3
        if ($str === '' || $pos < 0) {
234 2
            return '';
235
        }
236
237 3
        if ($encoding === 'UTF-8') {
238 3
            return (string) \mb_substr($str, $pos, 1);
239
        }
240
241
        return (string) self::substr($str, $pos, 1, $encoding);
242
    }
243
244
    /**
245
     * Prepends UTF-8 BOM character to the string and returns the whole string.
246
     *
247
     * INFO: If BOM already existed there, the Input string is returned.
248
     *
249
     * @param string $str <p>The input string.</p>
250
     *
251
     * @return string the output string that contains BOM
252
     */
253 2
    public static function add_bom_to_string(string $str): string
254
    {
255 2
        if (self::string_has_bom($str) === false) {
256 2
            $str = self::bom() . $str;
257
        }
258
259 2
        return $str;
260
    }
261
262
    /**
263
     * Changes all keys in an array.
264
     *
265
     * @param array  $array    <p>The array to work on</p>
266
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
267
     *                         or <strong>CASE_LOWER</strong> (default)</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @return string[] an array with its keys lower or uppercased
271
     */
272 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
273
    {
274
        if (
275 2
            $case !== \CASE_LOWER
276
            &&
277 2
            $case !== \CASE_UPPER
278
        ) {
279
            $case = \CASE_LOWER;
280
        }
281
282 2
        $return = [];
283 2
        foreach ($array as $key => &$value) {
284 2
            $key = $case === \CASE_LOWER
285 2
                ? self::strtolower((string) $key, $encoding)
286 2
                : self::strtoupper((string) $key, $encoding);
287
288 2
            $return[$key] = $value;
289
        }
290
291 2
        return $return;
292
    }
293
294
    /**
295
     * Returns the substring between $start and $end, if found, or an empty
296
     * string. An optional offset may be supplied from which to begin the
297
     * search for the start string.
298
     *
299
     * @param string $str
300
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
301
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
302
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
303
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
304
     *
305
     * @return string
306
     */
307 16
    public static function between(
308
        string $str,
309
        string $start,
310
        string $end,
311
        int $offset = 0,
312
        string $encoding = 'UTF-8'
313
    ): string {
314 16
        if ($encoding === 'UTF-8') {
315 8
            $posStart = \mb_strpos($str, $start, $offset);
316 8
            if ($posStart === false) {
317 1
                return '';
318
            }
319
320 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
321 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
322
            if (
323 7
                $posEnd === false
324
                ||
325 7
                $posEnd === $substrIndex
326
            ) {
327 2
                return '';
328
            }
329
330 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
331
        }
332
333 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
334
335 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
336 8
        if ($posStart === false) {
337 1
            return '';
338
        }
339
340 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
341 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
342
        if (
343 7
            $posEnd === false
344
            ||
345 7
            $posEnd === $substrIndex
346
        ) {
347 2
            return '';
348
        }
349
350 5
        return (string) self::substr(
351 5
            $str,
352 5
            $substrIndex,
353 5
            $posEnd - $substrIndex,
354 5
            $encoding
355
        );
356
    }
357
358
    /**
359
     * Convert binary into an string.
360
     *
361
     * @param mixed $bin 1|0
362
     *
363
     * @return string
364
     */
365 2
    public static function binary_to_str($bin): string
366
    {
367 2
        if (!isset($bin[0])) {
368
            return '';
369
        }
370
371 2
        $convert = \base_convert($bin, 2, 16);
372 2
        if ($convert === '0') {
373 1
            return '';
374
        }
375
376 2
        return \pack('H*', $convert);
377
    }
378
379
    /**
380
     * Returns the UTF-8 Byte Order Mark Character.
381
     *
382
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
383
     *
384
     * @return string UTF-8 Byte Order Mark
385
     */
386 4
    public static function bom(): string
387
    {
388 4
        return "\xef\xbb\xbf";
389
    }
390
391
    /**
392
     * @alias of UTF8::chr_map()
393
     *
394
     * @see   UTF8::chr_map()
395
     *
396
     * @param array|string $callback
397
     * @param string       $str
398
     *
399
     * @return string[]
400
     */
401 2
    public static function callback($callback, string $str): array
402
    {
403 2
        return self::chr_map($callback, $str);
404
    }
405
406
    /**
407
     * Returns the character at $index, with indexes starting at 0.
408
     *
409
     * @param string $str      <p>The input string.</p>
410
     * @param int    $index    <p>Position of the character.</p>
411
     * @param string $encoding [optional] <p>Default is UTF-8</p>
412
     *
413
     * @return string the character at $index
414
     */
415 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
416
    {
417 9
        if ($encoding === 'UTF-8') {
418 5
            return (string) \mb_substr($str, $index, 1);
419
        }
420
421 4
        return (string) self::substr($str, $index, 1, $encoding);
422
    }
423
424
    /**
425
     * Returns an array consisting of the characters in the string.
426
     *
427
     * @param string $str <p>The input string.</p>
428
     *
429
     * @return string[] an array of chars
430
     */
431 3
    public static function chars(string $str): array
432
    {
433 3
        return self::str_split($str);
434
    }
435
436
    /**
437
     * This method will auto-detect your server environment for UTF-8 support.
438
     *
439
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
440
     */
441 5
    public static function checkForSupport()
442
    {
443 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
445
446
            // http://php.net/manual/en/book.mbstring.php
447
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
448
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
449
            if (self::$SUPPORT['mbstring'] === true) {
450
                \mb_internal_encoding('UTF-8');
451
                /** @noinspection UnusedFunctionResultInspection */
452
                /** @noinspection PhpComposerExtensionStubsInspection */
453
                \mb_regex_encoding('UTF-8');
454
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
455
            }
456
457
            // http://php.net/manual/en/book.iconv.php
458
            self::$SUPPORT['iconv'] = self::iconv_loaded();
459
460
            // http://php.net/manual/en/book.intl.php
461
            self::$SUPPORT['intl'] = self::intl_loaded();
462
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
463
464
            if (
465
                self::$SUPPORT['intl'] === true
466
                &&
467
                \function_exists('transliterator_list_ids') === true
468
            ) {
469
                /** @noinspection PhpComposerExtensionStubsInspection */
470
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
471
            }
472
473
            // http://php.net/manual/en/class.intlchar.php
474
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
475
476
            // http://php.net/manual/en/book.ctype.php
477
            self::$SUPPORT['ctype'] = self::ctype_loaded();
478
479
            // http://php.net/manual/en/class.finfo.php
480
            self::$SUPPORT['finfo'] = self::finfo_loaded();
481
482
            // http://php.net/manual/en/book.json.php
483
            self::$SUPPORT['json'] = self::json_loaded();
484
485
            // http://php.net/manual/en/book.pcre.php
486
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
487
488
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
489
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
490
                \mb_internal_encoding('UTF-8');
491
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
492
            }
493
        }
494 5
    }
495
496
    /**
497
     * Generates a UTF-8 encoded character from the given code point.
498
     *
499
     * INFO: opposite to UTF8::ord()
500
     *
501
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
502
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
503
     *
504
     * @return string|null multi-byte character, returns null on failure or empty input
505
     */
506 17
    public static function chr($code_point, string $encoding = 'UTF-8')
507
    {
508
        // init
509 17
        static $CHAR_CACHE = [];
510
511 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
512 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
513
        }
514
515
        if (
516 17
            $encoding !== 'UTF-8'
517
            &&
518 17
            $encoding !== 'ISO-8859-1'
519
            &&
520 17
            $encoding !== 'WINDOWS-1252'
521
            &&
522 17
            self::$SUPPORT['mbstring'] === false
523
        ) {
524
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
525
        }
526
527 17
        $cacheKey = $code_point . $encoding;
528 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
529 16
            return $CHAR_CACHE[$cacheKey];
530
        }
531
532 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
533
534 10
            if (self::$CHR === null) {
535
                self::$CHR = (array) self::getData('chr');
536
            }
537
538
            /**
539
             * @psalm-suppress PossiblyNullArrayAccess
540
             */
541 10
            $chr = self::$CHR[$code_point];
542
543 10
            if ($encoding !== 'UTF-8') {
544 1
                $chr = self::encode($encoding, $chr);
545
            }
546
547 10
            return $CHAR_CACHE[$cacheKey] = $chr;
548
        }
549
550
        //
551
        // fallback via "IntlChar"
552
        //
553
554 7
        if (self::$SUPPORT['intlChar'] === true) {
555
            /** @noinspection PhpComposerExtensionStubsInspection */
556 7
            $chr = \IntlChar::chr($code_point);
557
558 7
            if ($encoding !== 'UTF-8') {
559
                $chr = self::encode($encoding, $chr);
560
            }
561
562 7
            return $CHAR_CACHE[$cacheKey] = $chr;
563
        }
564
565
        //
566
        // fallback via vanilla php
567
        //
568
569
        if (self::$CHR === null) {
570
            self::$CHR = (array) self::getData('chr');
571
        }
572
573
        $code_point = (int) $code_point;
574
        if ($code_point <= 0x7F) {
575
            /**
576
             * @psalm-suppress PossiblyNullArrayAccess
577
             */
578
            $chr = self::$CHR[$code_point];
579
        } elseif ($code_point <= 0x7FF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
584
                   self::$CHR[($code_point & 0x3F) + 0x80];
585
        } elseif ($code_point <= 0xFFFF) {
586
            /**
587
             * @psalm-suppress PossiblyNullArrayAccess
588
             */
589
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
590
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
591
                   self::$CHR[($code_point & 0x3F) + 0x80];
592
        } else {
593
            /**
594
             * @psalm-suppress PossiblyNullArrayAccess
595
             */
596
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
597
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
598
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
599
                   self::$CHR[($code_point & 0x3F) + 0x80];
600
        }
601
602
        if ($encoding !== 'UTF-8') {
603
            $chr = self::encode($encoding, $chr);
604
        }
605
606
        return $CHAR_CACHE[$cacheKey] = $chr;
607
    }
608
609
    /**
610
     * Applies callback to all characters of a string.
611
     *
612
     * @param array|string $callback <p>The callback function.</p>
613
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
614
     *
615
     * @return string[] the outcome of callback
616
     */
617 2
    public static function chr_map($callback, string $str): array
618
    {
619 2
        return \array_map(
620 2
            $callback,
621 2
            self::str_split($str)
622
        );
623
    }
624
625
    /**
626
     * Generates an array of byte length of each character of a Unicode string.
627
     *
628
     * 1 byte => U+0000  - U+007F
629
     * 2 byte => U+0080  - U+07FF
630
     * 3 byte => U+0800  - U+FFFF
631
     * 4 byte => U+10000 - U+10FFFF
632
     *
633
     * @param string $str <p>The original unicode string.</p>
634
     *
635
     * @return int[] an array of byte lengths of each character
636
     */
637 4
    public static function chr_size_list(string $str): array
638
    {
639 4
        if ($str === '') {
640 4
            return [];
641
        }
642
643 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
644
            return \array_map(
645
                static function (string $data): int {
646
                    // "mb_" is available if overload is used, so use it ...
647
                    return \mb_strlen($data, 'CP850'); // 8-BIT
648
                },
649
                self::str_split($str)
650
            );
651
        }
652
653 4
        return \array_map('\strlen', self::str_split($str));
654
    }
655
656
    /**
657
     * Get a decimal code representation of a specific character.
658
     *
659
     * @param string $char <p>The input character.</p>
660
     *
661
     * @return int
662
     */
663 4
    public static function chr_to_decimal(string $char): int
664
    {
665 4
        $code = self::ord($char[0]);
666 4
        $bytes = 1;
667
668 4
        if (!($code & 0x80)) {
669
            // 0xxxxxxx
670 4
            return $code;
671
        }
672
673 4
        if (($code & 0xe0) === 0xc0) {
674
            // 110xxxxx
675 4
            $bytes = 2;
676 4
            $code &= ~0xc0;
677 4
        } elseif (($code & 0xf0) === 0xe0) {
678
            // 1110xxxx
679 4
            $bytes = 3;
680 4
            $code &= ~0xe0;
681 2
        } elseif (($code & 0xf8) === 0xf0) {
682
            // 11110xxx
683 2
            $bytes = 4;
684 2
            $code &= ~0xf0;
685
        }
686
687 4
        for ($i = 2; $i <= $bytes; ++$i) {
688
            // 10xxxxxx
689 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
690
        }
691
692 4
        return $code;
693
    }
694
695
    /**
696
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
697
     *
698
     * @param int|string $char <p>The input character</p>
699
     * @param string     $pfix [optional]
700
     *
701
     * @return string The code point encoded as U+xxxx
702
     */
703 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
704
    {
705 2
        if ($char === '') {
706 2
            return '';
707
        }
708
709 2
        if ($char === '&#0;') {
710 2
            $char = '';
711
        }
712
713 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
714
    }
715
716
    /**
717
     * alias for "UTF8::chr_to_decimal()"
718
     *
719
     * @see UTF8::chr_to_decimal()
720
     *
721
     * @param string $chr
722
     *
723
     * @return int
724
     */
725 2
    public static function chr_to_int(string $chr): int
726
    {
727 2
        return self::chr_to_decimal($chr);
728
    }
729
730
    /**
731
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
732
     *
733
     * @param string $body     <p>The original string to be split.</p>
734
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
735
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
736
     *
737
     * @return string the chunked string
738
     */
739 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
740
    {
741 4
        return \implode($end, self::str_split($body, $chunklen));
742
    }
743
744
    /**
745
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
746
     *
747
     * @param string $str                           <p>The string to be sanitized.</p>
748
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
749
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
750
     *                                              whitespace.</p>
751
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
752
     *                                              e.g.: "…"
753
     *                                              => "..."</p>
754
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
755
     *                                              combination with
756
     *                                              $normalize_whitespace</p>
757
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
758
     *                                              mark e.g.: "�"</p>
759
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
760
     *                                              characters e.g.: "\0"</p>
761
     *
762
     * @return string clean UTF-8 encoded string
763
     */
764 111
    public static function clean(
765
        string $str,
766
        bool $remove_bom = false,
767
        bool $normalize_whitespace = false,
768
        bool $normalize_msword = false,
769
        bool $keep_non_breaking_space = false,
770
        bool $replace_diamond_question_mark = false,
771
        bool $remove_invisible_characters = true
772
    ): string {
773
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
774
        // caused connection reset problem on larger strings
775
776 111
        $regx = '/
777
          (
778
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
779
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
780
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
781
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
782
            ){1,100}                      # ...one or more times
783
          )
784
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
785
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
786
        /x';
787 111
        $str = (string) \preg_replace($regx, '$1', $str);
788
789 111
        if ($replace_diamond_question_mark === true) {
790 60
            $str = self::replace_diamond_question_mark($str, '');
791
        }
792
793 111
        if ($remove_invisible_characters === true) {
794 111
            $str = self::remove_invisible_characters($str);
795
        }
796
797 111
        if ($normalize_whitespace === true) {
798 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
799
        }
800
801 111
        if ($normalize_msword === true) {
802 32
            $str = self::normalize_msword($str);
803
        }
804
805 111
        if ($remove_bom === true) {
806 62
            $str = self::remove_bom($str);
807
        }
808
809 111
        return $str;
810
    }
811
812
    /**
813
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
814
     *
815
     * @param string $str <p>The input string.</p>
816
     *
817
     * @return string
818
     */
819 33
    public static function cleanup($str): string
820
    {
821
        // init
822 33
        $str = (string) $str;
823
824 33
        if ($str === '') {
825 5
            return '';
826
        }
827
828
        // fixed ISO <-> UTF-8 Errors
829 33
        $str = self::fix_simple_utf8($str);
830
831
        // remove all none UTF-8 symbols
832
        // && remove diamond question mark (�)
833
        // && remove remove invisible characters (e.g. "\0")
834
        // && remove BOM
835
        // && normalize whitespace chars (but keep non-breaking-spaces)
836 33
        return self::clean(
837 33
            $str,
838 33
            true,
839 33
            true,
840 33
            false,
841 33
            true,
842 33
            true,
843 33
            true
844
        );
845
    }
846
847
    /**
848
     * Accepts a string or a array of strings and returns an array of Unicode code points.
849
     *
850
     * INFO: opposite to UTF8::string()
851
     *
852
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
853
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
854
     *                                 default, code points will be returned as integers.</p>
855
     *
856
     * @return array<int|string>
857
     *                           The array of code points:<br>
858
     *                           array<int> for $u_style === false<br>
859
     *                           array<string> for $u_style === true<br>
860
     */
861 12
    public static function codepoints($arg, bool $u_style = false): array
862
    {
863 12
        if (\is_string($arg) === true) {
864 12
            $arg = self::str_split($arg);
865
        }
866
867 12
        $arg = \array_map(
868
            [
869 12
                self::class,
870
                'ord',
871
            ],
872 12
            $arg
873
        );
874
875 12
        if (\count($arg) === 0) {
876 7
            return [];
877
        }
878
879 11
        if ($u_style === true) {
880 2
            $arg = \array_map(
881
                [
882 2
                    self::class,
883
                    'int_to_hex',
884
                ],
885 2
                $arg
886
            );
887
        }
888
889 11
        return $arg;
890
    }
891
892
    /**
893
     * Trims the string and replaces consecutive whitespace characters with a
894
     * single space. This includes tabs and newline characters, as well as
895
     * multibyte whitespace such as the thin space and ideographic space.
896
     *
897
     * @param string $str <p>The input string.</p>
898
     *
899
     * @return string string with a trimmed $str and condensed whitespace
900
     */
901 13
    public static function collapse_whitespace(string $str): string
902
    {
903 13
        if (self::$SUPPORT['mbstring'] === true) {
904
            /** @noinspection PhpComposerExtensionStubsInspection */
905 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
906
        }
907
908
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
909
    }
910
911
    /**
912
     * Returns count of characters used in a string.
913
     *
914
     * @param string $str                <p>The input string.</p>
915
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
916
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
917
     *
918
     * @return int[] an associative array of Character as keys and
919
     *               their count as values
920
     */
921 19
    public static function count_chars(
922
        string $str,
923
        bool $cleanUtf8 = false,
924
        bool $tryToUseMbFunction = true
925
    ): array {
926 19
        return \array_count_values(
927 19
            self::str_split(
928 19
                $str,
929 19
                1,
930 19
                $cleanUtf8,
931 19
                $tryToUseMbFunction
932
            )
933
        );
934
    }
935
936
    /**
937
     * Remove css media-queries.
938
     *
939
     * @param string $str
940
     *
941
     * @return string
942
     */
943 1
    public static function css_stripe_media_queries(string $str): string
944
    {
945 1
        return (string) \preg_replace(
946 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
947 1
            '',
948 1
            $str
949
        );
950
    }
951
952
    /**
953
     * Checks whether ctype is available on the server.
954
     *
955
     * @return bool
956
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
957
     */
958
    public static function ctype_loaded(): bool
959
    {
960
        return \extension_loaded('ctype');
961
    }
962
963
    /**
964
     * Converts a int-value into an UTF-8 character.
965
     *
966
     * @param mixed $int
967
     *
968
     * @return string
969
     */
970 10
    public static function decimal_to_chr($int): string
971
    {
972 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
973
    }
974
975
    /**
976
     * Decodes a MIME header field
977
     *
978
     * @param string $str
979
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
980
     *
981
     * @return false|string
982
     *                      A decoded MIME field on success,
983
     *                      or false if an error occurs during the decoding
984
     */
985
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
986
    {
987
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
988
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
989
        }
990
991
        if (self::$SUPPORT['iconv'] === true) {
992
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
993
        }
994
995
        if ($encoding !== 'UTF-8') {
996
            $str = self::encode($encoding, $str);
997
        }
998
999
        return \mb_decode_mimeheader($str);
1000
    }
1001
1002
    /**
1003
     * Encode a string with a new charset-encoding.
1004
     *
1005
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1006
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1007
     *
1008
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1009
     * @param string $str                    <p>The input string</p>
1010
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1011
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1012
     *                                       string-encoding</p>
1013
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1014
     *                                       A empty string will trigger the autodetect anyway.</p>
1015
     *
1016
     * @return string
1017
     *
1018
     * @psalm-suppress InvalidReturnStatement
1019
     */
1020 28
    public static function encode(
1021
        string $toEncoding,
1022
        string $str,
1023
        bool $autodetectFromEncoding = true,
1024
        string $fromEncoding = ''
1025
    ): string {
1026 28
        if ($str === '' || $toEncoding === '') {
1027 13
            return $str;
1028
        }
1029
1030 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1031 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1032
        }
1033
1034 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1035 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1036
        }
1037
1038
        if (
1039 28
            $toEncoding
1040
            &&
1041 28
            $fromEncoding
1042
            &&
1043 28
            $fromEncoding === $toEncoding
1044
        ) {
1045
            return $str;
1046
        }
1047
1048 28
        if ($toEncoding === 'JSON') {
1049 1
            $return = self::json_encode($str);
1050 1
            if ($return === false) {
1051
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1052
            }
1053
1054 1
            return $return;
1055
        }
1056 28
        if ($fromEncoding === 'JSON') {
1057 1
            $str = self::json_decode($str);
1058 1
            $fromEncoding = '';
1059
        }
1060
1061 28
        if ($toEncoding === 'BASE64') {
1062 2
            return \base64_encode($str);
1063
        }
1064 28
        if ($fromEncoding === 'BASE64') {
1065 2
            $str = \base64_decode($str, true);
1066 2
            $fromEncoding = '';
1067
        }
1068
1069 28
        if ($toEncoding === 'HTML-ENTITIES') {
1070 2
            return self::html_encode($str, true, 'UTF-8');
1071
        }
1072 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1073 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1074 2
            $fromEncoding = '';
1075
        }
1076
1077 28
        $fromEncodingDetected = false;
1078
        if (
1079 28
            $autodetectFromEncoding === true
1080
            ||
1081 28
            !$fromEncoding
1082
        ) {
1083 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1084
        }
1085
1086
        // DEBUG
1087
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1088
1089 28
        if ($fromEncodingDetected !== false) {
1090 24
            $fromEncoding = $fromEncodingDetected;
1091 7
        } elseif ($autodetectFromEncoding === true) {
1092
            // fallback for the "autodetect"-mode
1093 7
            return self::to_utf8($str);
1094
        }
1095
1096
        if (
1097 24
            !$fromEncoding
1098
            ||
1099 24
            $fromEncoding === $toEncoding
1100
        ) {
1101 15
            return $str;
1102
        }
1103
1104
        if (
1105 18
            $toEncoding === 'UTF-8'
1106
            &&
1107
            (
1108 16
                $fromEncoding === 'WINDOWS-1252'
1109
                ||
1110 18
                $fromEncoding === 'ISO-8859-1'
1111
            )
1112
        ) {
1113 13
            return self::to_utf8($str);
1114
        }
1115
1116
        if (
1117 11
            $toEncoding === 'ISO-8859-1'
1118
            &&
1119
            (
1120 6
                $fromEncoding === 'WINDOWS-1252'
1121
                ||
1122 11
                $fromEncoding === 'UTF-8'
1123
            )
1124
        ) {
1125 6
            return self::to_iso8859($str);
1126
        }
1127
1128
        if (
1129 9
            $toEncoding !== 'UTF-8'
1130
            &&
1131 9
            $toEncoding !== 'ISO-8859-1'
1132
            &&
1133 9
            $toEncoding !== 'WINDOWS-1252'
1134
            &&
1135 9
            self::$SUPPORT['mbstring'] === false
1136
        ) {
1137
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1138
        }
1139
1140 9
        if (self::$SUPPORT['mbstring'] === true) {
1141
            // warning: do not use the symfony polyfill here
1142 9
            $strEncoded = \mb_convert_encoding(
1143 9
                $str,
1144 9
                $toEncoding,
1145 9
                $fromEncoding
1146
            );
1147
1148 9
            if ($strEncoded) {
1149 9
                return $strEncoded;
1150
            }
1151
        }
1152
1153
        $return = \iconv($fromEncoding, $toEncoding, $str);
1154
        if ($return !== false) {
1155
            return $return;
1156
        }
1157
1158
        return $str;
1159
    }
1160
1161
    /**
1162
     * @param string $str
1163
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1164
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1165
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1166
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1167
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1168
     *
1169
     * @return false|string
1170
     *                      An encoded MIME field on success,
1171
     *                      or false if an error occurs during the encoding
1172
     */
1173
    public static function encode_mimeheader(
1174
        $str,
1175
        $fromCharset = 'UTF-8',
1176
        $toCharset = 'UTF-8',
1177
        $transferEncoding = 'Q',
1178
        $linefeed = "\r\n",
1179
        $indent = 76
1180
    ) {
1181
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1182
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1183
        }
1184
1185
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1186
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1187
        }
1188
1189
        return \iconv_mime_encode(
1190
            '',
1191
            $str,
1192
            [
1193
                'scheme'           => $transferEncoding,
1194
                'line-length'      => $indent,
1195
                'input-charset'    => $fromCharset,
1196
                'output-charset'   => $toCharset,
1197
                'line-break-chars' => $linefeed,
1198
            ]
1199
        );
1200
    }
1201
1202
    /**
1203
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1204
     *
1205
     * @param string   $str                    <p>The input string.</p>
1206
     * @param string   $search                 <p>The searched string.</p>
1207
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1208
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1209
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1210
     *
1211
     * @return string
1212
     */
1213 1
    public static function extract_text(
1214
        string $str,
1215
        string $search = '',
1216
        int $length = null,
1217
        string $replacerForSkippedText = '…',
1218
        string $encoding = 'UTF-8'
1219
    ): string {
1220 1
        if ($str === '') {
1221 1
            return '';
1222
        }
1223
1224 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1225
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1226
        }
1227
1228 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1229
1230 1
        if ($length === null) {
1231 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1232
        }
1233
1234 1
        if ($search === '') {
1235 1
            if ($encoding === 'UTF-8') {
1236 1
                if ($length > 0) {
1237 1
                    $stringLength = (int) \mb_strlen($str);
1238 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1239
                } else {
1240 1
                    $end = 0;
1241
                }
1242
1243 1
                $pos = (int) \min(
1244 1
                    \mb_strpos($str, ' ', $end),
1245 1
                    \mb_strpos($str, '.', $end)
1246
                );
1247
            } else {
1248
                if ($length > 0) {
1249
                    $stringLength = (int) self::strlen($str, $encoding);
1250
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1251
                } else {
1252
                    $end = 0;
1253
                }
1254
1255
                $pos = (int) \min(
1256
                    self::strpos($str, ' ', $end, $encoding),
1257
                    self::strpos($str, '.', $end, $encoding)
1258
                );
1259
            }
1260
1261 1
            if ($pos) {
1262 1
                if ($encoding === 'UTF-8') {
1263 1
                    $strSub = \mb_substr($str, 0, $pos);
1264
                } else {
1265
                    $strSub = self::substr($str, 0, $pos, $encoding);
1266
                }
1267
1268 1
                if ($strSub === false) {
1269
                    return '';
1270
                }
1271
1272 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1273
            }
1274
1275
            return $str;
1276
        }
1277
1278 1
        if ($encoding === 'UTF-8') {
1279 1
            $wordPos = (int) \mb_stripos($str, $search);
1280 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1281
        } else {
1282
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1283
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1284
        }
1285
1286 1
        $pos_start = 0;
1287 1
        if ($halfSide > 0) {
1288 1
            if ($encoding === 'UTF-8') {
1289 1
                $halfText = \mb_substr($str, 0, $halfSide);
1290
            } else {
1291
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1292
            }
1293 1
            if ($halfText !== false) {
1294 1
                if ($encoding === 'UTF-8') {
1295 1
                    $pos_start = (int) \max(
1296 1
                        \mb_strrpos($halfText, ' '),
1297 1
                        \mb_strrpos($halfText, '.')
1298
                    );
1299
                } else {
1300
                    $pos_start = (int) \max(
1301
                        self::strrpos($halfText, ' ', 0, $encoding),
1302
                        self::strrpos($halfText, '.', 0, $encoding)
1303
                    );
1304
                }
1305
            }
1306
        }
1307
1308 1
        if ($wordPos && $halfSide > 0) {
1309 1
            $offset = $pos_start + $length - 1;
1310 1
            $realLength = (int) self::strlen($str, $encoding);
1311
1312 1
            if ($offset > $realLength) {
1313
                $offset = $realLength;
1314
            }
1315
1316 1
            if ($encoding === 'UTF-8') {
1317 1
                $pos_end = (int) \min(
1318 1
                        \mb_strpos($str, ' ', $offset),
1319 1
                        \mb_strpos($str, '.', $offset)
1320 1
                    ) - $pos_start;
1321
            } else {
1322
                $pos_end = (int) \min(
1323
                        self::strpos($str, ' ', $offset, $encoding),
1324
                        self::strpos($str, '.', $offset, $encoding)
1325
                    ) - $pos_start;
1326
            }
1327
1328 1
            if (!$pos_end || $pos_end <= 0) {
1329 1
                if ($encoding === 'UTF-8') {
1330 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1331
                } else {
1332
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1333
                }
1334 1
                if ($strSub !== false) {
1335 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1336
                } else {
1337 1
                    $extract = '';
1338
                }
1339
            } else {
1340 1
                if ($encoding === 'UTF-8') {
1341 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1342
                } else {
1343
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1344
                }
1345 1
                if ($strSub !== false) {
1346 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1347
                } else {
1348 1
                    $extract = '';
1349
                }
1350
            }
1351
        } else {
1352 1
            $offset = $length - 1;
1353 1
            $trueLength = (int) self::strlen($str, $encoding);
1354
1355 1
            if ($offset > $trueLength) {
1356
                $offset = $trueLength;
1357
            }
1358
1359 1
            if ($encoding === 'UTF-8') {
1360 1
                $pos_end = (int) \min(
1361 1
                    \mb_strpos($str, ' ', $offset),
1362 1
                    \mb_strpos($str, '.', $offset)
1363
                );
1364
            } else {
1365
                $pos_end = (int) \min(
1366
                    self::strpos($str, ' ', $offset, $encoding),
1367
                    self::strpos($str, '.', $offset, $encoding)
1368
                );
1369
            }
1370
1371 1
            if ($pos_end) {
1372 1
                if ($encoding === 'UTF-8') {
1373 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1374
                } else {
1375
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1376
                }
1377 1
                if ($strSub !== false) {
1378 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1379
                } else {
1380 1
                    $extract = '';
1381
                }
1382
            } else {
1383 1
                $extract = $str;
1384
            }
1385
        }
1386
1387 1
        return $extract;
1388
    }
1389
1390
    /**
1391
     * Reads entire file into a string.
1392
     *
1393
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1394
     *
1395
     * @see http://php.net/manual/en/function.file-get-contents.php
1396
     *
1397
     * @param string        $filename         <p>
1398
     *                                        Name of the file to read.
1399
     *                                        </p>
1400
     * @param bool          $use_include_path [optional] <p>
1401
     *                                        Prior to PHP 5, this parameter is called
1402
     *                                        use_include_path and is a bool.
1403
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1404
     *                                        to trigger include path
1405
     *                                        search.
1406
     *                                        </p>
1407
     * @param resource|null $context          [optional] <p>
1408
     *                                        A valid context resource created with
1409
     *                                        stream_context_create. If you don't need to use a
1410
     *                                        custom context, you can skip this parameter by &null;.
1411
     *                                        </p>
1412
     * @param int|null      $offset           [optional] <p>
1413
     *                                        The offset where the reading starts.
1414
     *                                        </p>
1415
     * @param int|null      $maxLength        [optional] <p>
1416
     *                                        Maximum length of data read. The default is to read until end
1417
     *                                        of file is reached.
1418
     *                                        </p>
1419
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1420
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1421
     *                                        some files, because they used non default utf-8 chars. Binary files
1422
     *                                        like images or pdf will not be converted.</p>
1423
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1424
     *                                        A empty string will trigger the autodetect anyway.</p>
1425
     *
1426
     * @return false|string the function returns the read data or false on failure
1427
     */
1428 12
    public static function file_get_contents(
1429
        string $filename,
1430
        bool $use_include_path = false,
1431
        $context = null,
1432
        int $offset = null,
1433
        int $maxLength = null,
1434
        int $timeout = 10,
1435
        bool $convertToUtf8 = true,
1436
        string $fromEncoding = ''
1437
    ) {
1438
        // init
1439 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1440
1441 12
        if ($timeout && $context === null) {
1442 9
            $context = \stream_context_create(
1443
                [
1444
                    'http' => [
1445 9
                        'timeout' => $timeout,
1446
                    ],
1447
                ]
1448
            );
1449
        }
1450
1451 12
        if ($offset === null) {
1452 12
            $offset = 0;
1453
        }
1454
1455 12
        if (\is_int($maxLength) === true) {
1456 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1457
        } else {
1458 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1459
        }
1460
1461
        // return false on error
1462 12
        if ($data === false) {
1463
            return false;
1464
        }
1465
1466 12
        if ($convertToUtf8 === true) {
1467
            if (
1468 12
                self::is_binary($data, true) === true
1469
                &&
1470 12
                self::is_utf16($data, false) === false
1471
                &&
1472 12
                self::is_utf32($data, false) === false
1473 7
            ) {
1474
                // do nothing, it's binary and not UTF16 or UTF32
1475
            } else {
1476 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1477 9
                $data = self::cleanup($data);
1478
            }
1479
        }
1480
1481 12
        return $data;
1482
    }
1483
1484
    /**
1485
     * Checks if a file starts with BOM (Byte Order Mark) character.
1486
     *
1487
     * @param string $file_path <p>Path to a valid file.</p>
1488
     *
1489
     * @throws \RuntimeException if file_get_contents() returned false
1490
     *
1491
     * @return bool
1492
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1493
     */
1494 2
    public static function file_has_bom(string $file_path): bool
1495
    {
1496 2
        $file_content = \file_get_contents($file_path);
1497 2
        if ($file_content === false) {
1498
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1499
        }
1500
1501 2
        return self::string_has_bom($file_content);
1502
    }
1503
1504
    /**
1505
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1506
     *
1507
     * @param mixed  $var
1508
     * @param int    $normalization_form
1509
     * @param string $leading_combining
1510
     *
1511
     * @return mixed
1512
     */
1513 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1514
    {
1515 62
        switch (\gettype($var)) {
1516 62
            case 'array':
1517 6
                foreach ($var as $k => &$v) {
1518 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1519
                }
1520 6
                unset($v);
1521
1522 6
                break;
1523 62
            case 'object':
1524 4
                foreach ($var as $k => &$v) {
1525 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1526
                }
1527 4
                unset($v);
1528
1529 4
                break;
1530 62
            case 'string':
1531
1532 62
                if (\strpos($var, "\r") !== false) {
1533
                    // Workaround https://bugs.php.net/65732
1534 3
                    $var = self::normalize_line_ending($var);
1535
                }
1536
1537 62
                if (self::is_ascii($var) === false) {
1538 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1539 27
                        $n = '-';
1540
                    } else {
1541 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1542
1543 12
                        if (isset($n[0])) {
1544 7
                            $var = $n;
1545
                        } else {
1546 8
                            $var = self::encode('UTF-8', $var, true);
1547
                        }
1548
                    }
1549
1550
                    if (
1551 32
                        $var[0] >= "\x80"
1552
                        &&
1553 32
                        isset($n[0], $leading_combining[0])
1554
                        &&
1555 32
                        \preg_match('/^\p{Mn}/u', $var)
1556
                    ) {
1557
                        // Prevent leading combining chars
1558
                        // for NFC-safe concatenations.
1559 3
                        $var = $leading_combining . $var;
1560
                    }
1561
                }
1562
1563 62
                break;
1564
        }
1565
1566 62
        return $var;
1567
    }
1568
1569
    /**
1570
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1571
     *
1572
     * Gets a specific external variable by name and optionally filters it
1573
     *
1574
     * @see  http://php.net/manual/en/function.filter-input.php
1575
     *
1576
     * @param int    $type          <p>
1577
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1578
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1579
     *                              <b>INPUT_ENV</b>.
1580
     *                              </p>
1581
     * @param string $variable_name <p>
1582
     *                              Name of a variable to get.
1583
     *                              </p>
1584
     * @param int    $filter        [optional] <p>
1585
     *                              The ID of the filter to apply. The
1586
     *                              manual page lists the available filters.
1587
     *                              </p>
1588
     * @param mixed  $options       [optional] <p>
1589
     *                              Associative array of options or bitwise disjunction of flags. If filter
1590
     *                              accepts options, flags can be provided in "flags" field of array.
1591
     *                              </p>
1592
     *
1593
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1594
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1595
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1596
     */
1597
    public static function filter_input(
1598
        int $type,
1599
        string $variable_name,
1600
        int $filter = \FILTER_DEFAULT,
1601
        $options = null
1602
    ) {
1603
        if (\func_num_args() < 4) {
1604
            $var = \filter_input($type, $variable_name, $filter);
1605
        } else {
1606
            $var = \filter_input($type, $variable_name, $filter, $options);
1607
        }
1608
1609
        return self::filter($var);
1610
    }
1611
1612
    /**
1613
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1614
     *
1615
     * Gets external variables and optionally filters them
1616
     *
1617
     * @see  http://php.net/manual/en/function.filter-input-array.php
1618
     *
1619
     * @param int   $type       <p>
1620
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1621
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1622
     *                          <b>INPUT_ENV</b>.
1623
     *                          </p>
1624
     * @param mixed $definition [optional] <p>
1625
     *                          An array defining the arguments. A valid key is a string
1626
     *                          containing a variable name and a valid value is either a filter type, or an array
1627
     *                          optionally specifying the filter, flags and options. If the value is an
1628
     *                          array, valid keys are filter which specifies the
1629
     *                          filter type,
1630
     *                          flags which specifies any flags that apply to the
1631
     *                          filter, and options which specifies any options that
1632
     *                          apply to the filter. See the example below for a better understanding.
1633
     *                          </p>
1634
     *                          <p>
1635
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1636
     *                          input array are filtered by this filter.
1637
     *                          </p>
1638
     * @param bool  $add_empty  [optional] <p>
1639
     *                          Add missing keys as <b>NULL</b> to the return value.
1640
     *                          </p>
1641
     *
1642
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1643
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1644
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1645
     *               is not set and <b>NULL</b> if the filter fails.
1646
     */
1647
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1648
    {
1649
        if (\func_num_args() < 2) {
1650
            $a = \filter_input_array($type);
1651
        } else {
1652
            $a = \filter_input_array($type, $definition, $add_empty);
1653
        }
1654
1655
        return self::filter($a);
1656
    }
1657
1658
    /**
1659
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1660
     *
1661
     * Filters a variable with a specified filter
1662
     *
1663
     * @see  http://php.net/manual/en/function.filter-var.php
1664
     *
1665
     * @param mixed $variable <p>
1666
     *                        Value to filter.
1667
     *                        </p>
1668
     * @param int   $filter   [optional] <p>
1669
     *                        The ID of the filter to apply. The
1670
     *                        manual page lists the available filters.
1671
     *                        </p>
1672
     * @param mixed $options  [optional] <p>
1673
     *                        Associative array of options or bitwise disjunction of flags. If filter
1674
     *                        accepts options, flags can be provided in "flags" field of array. For
1675
     *                        the "callback" filter, callable type should be passed. The
1676
     *                        callback must accept one argument, the value to be filtered, and return
1677
     *                        the value after filtering/sanitizing it.
1678
     *                        </p>
1679
     *                        <p>
1680
     *                        <code>
1681
     *                        // for filters that accept options, use this format
1682
     *                        $options = array(
1683
     *                        'options' => array(
1684
     *                        'default' => 3, // value to return if the filter fails
1685
     *                        // other options here
1686
     *                        'min_range' => 0
1687
     *                        ),
1688
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1689
     *                        );
1690
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1691
     *                        // for filter that only accept flags, you can pass them directly
1692
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1693
     *                        // for filter that only accept flags, you can also pass as an array
1694
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1695
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1696
     *                        // callback validate filter
1697
     *                        function foo($value)
1698
     *                        {
1699
     *                        // Expected format: Surname, GivenNames
1700
     *                        if (strpos($value, ", ") === false) return false;
1701
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1702
     *                        $empty = (empty($surname) || empty($givennames));
1703
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1704
     *                        if ($empty || $notstrings) {
1705
     *                        return false;
1706
     *                        } else {
1707
     *                        return $value;
1708
     *                        }
1709
     *                        }
1710
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1711
     *                        </code>
1712
     *                        </p>
1713
     *
1714
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1715
     */
1716 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1717
    {
1718 2
        if (\func_num_args() < 3) {
1719 2
            $variable = \filter_var($variable, $filter);
1720
        } else {
1721 2
            $variable = \filter_var($variable, $filter, $options);
1722
        }
1723
1724 2
        return self::filter($variable);
1725
    }
1726
1727
    /**
1728
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1729
     *
1730
     * Gets multiple variables and optionally filters them
1731
     *
1732
     * @see  http://php.net/manual/en/function.filter-var-array.php
1733
     *
1734
     * @param array $data       <p>
1735
     *                          An array with string keys containing the data to filter.
1736
     *                          </p>
1737
     * @param mixed $definition [optional] <p>
1738
     *                          An array defining the arguments. A valid key is a string
1739
     *                          containing a variable name and a valid value is either a
1740
     *                          filter type, or an
1741
     *                          array optionally specifying the filter, flags and options.
1742
     *                          If the value is an array, valid keys are filter
1743
     *                          which specifies the filter type,
1744
     *                          flags which specifies any flags that apply to the
1745
     *                          filter, and options which specifies any options that
1746
     *                          apply to the filter. See the example below for a better understanding.
1747
     *                          </p>
1748
     *                          <p>
1749
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1750
     *                          input array are filtered by this filter.
1751
     *                          </p>
1752
     * @param bool  $add_empty  [optional] <p>
1753
     *                          Add missing keys as <b>NULL</b> to the return value.
1754
     *                          </p>
1755
     *
1756
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1757
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1758
     *               set
1759
     */
1760 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1761
    {
1762 2
        if (\func_num_args() < 2) {
1763 2
            $a = \filter_var_array($data);
1764
        } else {
1765 2
            $a = \filter_var_array($data, $definition, $add_empty);
1766
        }
1767
1768 2
        return self::filter($a);
1769
    }
1770
1771
    /**
1772
     * Checks whether finfo is available on the server.
1773
     *
1774
     * @return bool
1775
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1776
     */
1777
    public static function finfo_loaded(): bool
1778
    {
1779
        return \class_exists('finfo');
1780
    }
1781
1782
    /**
1783
     * Returns the first $n characters of the string.
1784
     *
1785
     * @param string $str      <p>The input string.</p>
1786
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1787
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1788
     *
1789
     * @return string
1790
     */
1791 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1792
    {
1793 13
        if ($str === '' || $n <= 0) {
1794 5
            return '';
1795
        }
1796
1797 8
        if ($encoding === 'UTF-8') {
1798 4
            return (string) \mb_substr($str, 0, $n);
1799
        }
1800
1801 4
        return (string) self::substr($str, 0, $n, $encoding);
1802
    }
1803
1804
    /**
1805
     * Check if the number of unicode characters are not more than the specified integer.
1806
     *
1807
     * @param string $str      the original string to be checked
1808
     * @param int    $box_size the size in number of chars to be checked against string
1809
     *
1810
     * @return bool true if string is less than or equal to $box_size, false otherwise
1811
     */
1812 2
    public static function fits_inside(string $str, int $box_size): bool
1813
    {
1814 2
        return self::strlen($str) <= $box_size;
1815
    }
1816
1817
    /**
1818
     * Try to fix simple broken UTF-8 strings.
1819
     *
1820
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1821
     *
1822
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1823
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1824
     * See: http://en.wikipedia.org/wiki/Windows-1252
1825
     *
1826
     * @param string $str <p>The input string</p>
1827
     *
1828
     * @return string
1829
     */
1830 42
    public static function fix_simple_utf8(string $str): string
1831
    {
1832 42
        if ($str === '') {
1833 4
            return '';
1834
        }
1835
1836 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1837 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1838
1839 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1840 1
            if (self::$BROKEN_UTF8_FIX === null) {
1841 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1842
            }
1843
1844 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1845 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1846
        }
1847
1848 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1849
    }
1850
1851
    /**
1852
     * Fix a double (or multiple) encoded UTF8 string.
1853
     *
1854
     * @param string|string[] $str you can use a string or an array of strings
1855
     *
1856
     * @return string|string[]
1857
     *                         Will return the fixed input-"array" or
1858
     *                         the fixed input-"string"
1859
     *
1860
     * @psalm-suppress InvalidReturnType
1861
     */
1862 2
    public static function fix_utf8($str)
1863
    {
1864 2
        if (\is_array($str) === true) {
1865 2
            foreach ($str as $k => &$v) {
1866 2
                $v = self::fix_utf8($v);
1867
            }
1868 2
            unset($v);
1869
1870
            /**
1871
             * @psalm-suppress InvalidReturnStatement
1872
             */
1873 2
            return $str;
1874
        }
1875
1876 2
        $str = (string) $str;
1877 2
        $last = '';
1878 2
        while ($last !== $str) {
1879 2
            $last = $str;
1880
            /**
1881
             * @psalm-suppress PossiblyInvalidArgument
1882
             */
1883 2
            $str = self::to_utf8(
1884 2
                self::utf8_decode($str, true)
1885
            );
1886
        }
1887
1888
        /**
1889
         * @psalm-suppress InvalidReturnStatement
1890
         */
1891 2
        return $str;
1892
    }
1893
1894
    /**
1895
     * Get character of a specific character.
1896
     *
1897
     * @param string $char
1898
     *
1899
     * @return string 'RTL' or 'LTR'
1900
     */
1901 2
    public static function getCharDirection(string $char): string
1902
    {
1903 2
        if (self::$SUPPORT['intlChar'] === true) {
1904
            /** @noinspection PhpComposerExtensionStubsInspection */
1905 2
            $tmpReturn = \IntlChar::charDirection($char);
1906
1907
            // from "IntlChar"-Class
1908
            $charDirection = [
1909 2
                'RTL' => [1, 13, 14, 15, 21],
1910
                'LTR' => [0, 11, 12, 20],
1911
            ];
1912
1913 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1914
                return 'LTR';
1915
            }
1916
1917 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1918 2
                return 'RTL';
1919
            }
1920
        }
1921
1922 2
        $c = static::chr_to_decimal($char);
1923
1924 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1925 2
            return 'LTR';
1926
        }
1927
1928 2
        if ($c <= 0x85e) {
1929 2
            if ($c === 0x5be ||
1930 2
                $c === 0x5c0 ||
1931 2
                $c === 0x5c3 ||
1932 2
                $c === 0x5c6 ||
1933 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1934 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1935 2
                $c === 0x608 ||
1936 2
                $c === 0x60b ||
1937 2
                $c === 0x60d ||
1938 2
                $c === 0x61b ||
1939 2
                ($c >= 0x61e && $c <= 0x64a) ||
1940
                ($c >= 0x66d && $c <= 0x66f) ||
1941
                ($c >= 0x671 && $c <= 0x6d5) ||
1942
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1943
                ($c >= 0x6ee && $c <= 0x6ef) ||
1944
                ($c >= 0x6fa && $c <= 0x70d) ||
1945
                $c === 0x710 ||
1946
                ($c >= 0x712 && $c <= 0x72f) ||
1947
                ($c >= 0x74d && $c <= 0x7a5) ||
1948
                $c === 0x7b1 ||
1949
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1950
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1951
                $c === 0x7fa ||
1952
                ($c >= 0x800 && $c <= 0x815) ||
1953
                $c === 0x81a ||
1954
                $c === 0x824 ||
1955
                $c === 0x828 ||
1956
                ($c >= 0x830 && $c <= 0x83e) ||
1957
                ($c >= 0x840 && $c <= 0x858) ||
1958 2
                $c === 0x85e
1959
            ) {
1960 2
                return 'RTL';
1961
            }
1962 2
        } elseif ($c === 0x200f) {
1963
            return 'RTL';
1964 2
        } elseif ($c >= 0xfb1d) {
1965 2
            if ($c === 0xfb1d ||
1966 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1967 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1968 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1969 2
                $c === 0xfb3e ||
1970 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1971 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1972 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1973 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1974 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1975 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1976 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1977 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1978 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1979 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1980 2
                $c === 0x10808 ||
1981 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1982 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1983 2
                $c === 0x1083c ||
1984 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1985 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1986 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1987 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1988 2
                $c === 0x1093f ||
1989 2
                $c === 0x10a00 ||
1990 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1991 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1992 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1993 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1994 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1995 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1996 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1997 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1998 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1999 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2000
            ) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        return 'LTR';
2006
    }
2007
2008
    /**
2009
     * Check for php-support.
2010
     *
2011
     * @param string|null $key
2012
     *
2013
     * @return mixed
2014
     *               Return the full support-"array", if $key === null<br>
2015
     *               return bool-value, if $key is used and available<br>
2016
     *               otherwise return <strong>null</strong>
2017
     */
2018 26
    public static function getSupportInfo(string $key = null)
2019
    {
2020 26
        if ($key === null) {
2021 4
            return self::$SUPPORT;
2022
        }
2023
2024 24
        if (!isset(self::$SUPPORT[$key])) {
2025 2
            return null;
2026
        }
2027
2028 22
        return self::$SUPPORT[$key];
2029
    }
2030
2031
    /**
2032
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2033
     *          if you need more supported types, please use e.g. "finfo"
2034
     *
2035
     * @param string $str
2036
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2037
     *
2038
     * @return array
2039
     *               with this keys: 'ext', 'mime', 'type'
2040
     */
2041 39
    public static function get_file_type(
2042
        string $str,
2043
        array $fallback = [
2044
            'ext'  => null,
2045
            'mime' => 'application/octet-stream',
2046
            'type' => null,
2047
        ]
2048
    ): array {
2049 39
        if ($str === '') {
2050
            return $fallback;
2051
        }
2052
2053 39
        $str_info = \substr($str, 0, 2);
2054 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2055 11
            return $fallback;
2056
        }
2057
2058 35
        $str_info = \unpack('C2chars', $str_info);
2059 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2060
2061
        // DEBUG
2062
        //var_dump($type_code);
2063
2064
        switch ($type_code) {
2065 35
            case 3780:
2066 5
                $ext = 'pdf';
2067 5
                $mime = 'application/pdf';
2068 5
                $type = 'binary';
2069
2070 5
                break;
2071 35
            case 7790:
2072
                $ext = 'exe';
2073
                $mime = 'application/octet-stream';
2074
                $type = 'binary';
2075
2076
                break;
2077 35
            case 7784:
2078
                $ext = 'midi';
2079
                $mime = 'audio/x-midi';
2080
                $type = 'binary';
2081
2082
                break;
2083 35
            case 8075:
2084 7
                $ext = 'zip';
2085 7
                $mime = 'application/zip';
2086 7
                $type = 'binary';
2087
2088 7
                break;
2089 35
            case 8297:
2090
                $ext = 'rar';
2091
                $mime = 'application/rar';
2092
                $type = 'binary';
2093
2094
                break;
2095 35
            case 255216:
2096
                $ext = 'jpg';
2097
                $mime = 'image/jpeg';
2098
                $type = 'binary';
2099
2100
                break;
2101 35
            case 7173:
2102
                $ext = 'gif';
2103
                $mime = 'image/gif';
2104
                $type = 'binary';
2105
2106
                break;
2107 35
            case 6677:
2108
                $ext = 'bmp';
2109
                $mime = 'image/bmp';
2110
                $type = 'binary';
2111
2112
                break;
2113 35
            case 13780:
2114 7
                $ext = 'png';
2115 7
                $mime = 'image/png';
2116 7
                $type = 'binary';
2117
2118 7
                break;
2119
            default:
2120 32
                return $fallback;
2121
        }
2122
2123
        return [
2124 7
            'ext'  => $ext,
2125 7
            'mime' => $mime,
2126 7
            'type' => $type,
2127
        ];
2128
    }
2129
2130
    /**
2131
     * @param int    $length        <p>Length of the random string.</p>
2132
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2133
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2134
     *
2135
     * @return string
2136
     */
2137 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2138
    {
2139
        // init
2140 1
        $i = 0;
2141 1
        $str = '';
2142
2143
        //
2144
        // add random chars
2145
        //
2146
2147 1
        if ($encoding === 'UTF-8') {
2148 1
            $maxlength = (int) \mb_strlen($possibleChars);
2149 1
            if ($maxlength === 0) {
2150 1
                return '';
2151
            }
2152
2153 1
            while ($i < $length) {
2154
                try {
2155 1
                    $randInt = \random_int(0, $maxlength - 1);
2156
                } catch (\Exception $e) {
2157
                    /** @noinspection RandomApiMigrationInspection */
2158
                    $randInt = \mt_rand(0, $maxlength - 1);
2159
                }
2160 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2161 1
                if ($char !== false) {
2162 1
                    $str .= $char;
2163 1
                    ++$i;
2164
                }
2165
            }
2166
        } else {
2167
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2168
2169
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2170
            if ($maxlength === 0) {
2171
                return '';
2172
            }
2173
2174
            while ($i < $length) {
2175
                try {
2176
                    $randInt = \random_int(0, $maxlength - 1);
2177
                } catch (\Exception $e) {
2178
                    /** @noinspection RandomApiMigrationInspection */
2179
                    $randInt = \mt_rand(0, $maxlength - 1);
2180
                }
2181
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2182
                if ($char !== false) {
2183
                    $str .= $char;
2184
                    ++$i;
2185
                }
2186
            }
2187
        }
2188
2189 1
        return $str;
2190
    }
2191
2192
    /**
2193
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2194
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2195
     *
2196
     * @return string
2197
     */
2198 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2199
    {
2200 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2201 1
                        \session_id() .
2202 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2203 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2204 1
                        $entropyExtra;
2205
2206 1
        $uniqueString = \uniqid($uniqueHelper, true);
2207
2208 1
        if ($md5) {
2209 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2210
        }
2211
2212 1
        return $uniqueString;
2213
    }
2214
2215
    /**
2216
     * alias for "UTF8::string_has_bom()"
2217
     *
2218
     * @see        UTF8::string_has_bom()
2219
     *
2220
     * @param string $str
2221
     *
2222
     * @return bool
2223
     *
2224
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2225
     */
2226 2
    public static function hasBom(string $str): bool
2227
    {
2228 2
        return self::string_has_bom($str);
2229
    }
2230
2231
    /**
2232
     * Returns true if the string contains a lower case char, false otherwise.
2233
     *
2234
     * @param string $str <p>The input string.</p>
2235
     *
2236
     * @return bool whether or not the string contains a lower case character
2237
     */
2238 47
    public static function has_lowercase(string $str): bool
2239
    {
2240 47
        if (self::$SUPPORT['mbstring'] === true) {
2241
            /** @noinspection PhpComposerExtensionStubsInspection */
2242 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2243
        }
2244
2245
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2246
    }
2247
2248
    /**
2249
     * Returns true if the string contains an upper case char, false otherwise.
2250
     *
2251
     * @param string $str <p>The input string.</p>
2252
     *
2253
     * @return bool whether or not the string contains an upper case character
2254
     */
2255 12
    public static function has_uppercase(string $str): bool
2256
    {
2257 12
        if (self::$SUPPORT['mbstring'] === true) {
2258
            /** @noinspection PhpComposerExtensionStubsInspection */
2259 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2260
        }
2261
2262
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2263
    }
2264
2265
    /**
2266
     * Converts a hexadecimal-value into an UTF-8 character.
2267
     *
2268
     * @param string $hexdec <p>The hexadecimal value.</p>
2269
     *
2270
     * @return false|string one single UTF-8 character
2271
     */
2272 4
    public static function hex_to_chr(string $hexdec)
2273
    {
2274 4
        return self::decimal_to_chr(\hexdec($hexdec));
2275
    }
2276
2277
    /**
2278
     * Converts hexadecimal U+xxxx code point representation to integer.
2279
     *
2280
     * INFO: opposite to UTF8::int_to_hex()
2281
     *
2282
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2283
     *
2284
     * @return false|int the code point, or false on failure
2285
     */
2286 2
    public static function hex_to_int($hexDec)
2287
    {
2288
        // init
2289 2
        $hexDec = (string) $hexDec;
2290
2291 2
        if ($hexDec === '') {
2292 2
            return false;
2293
        }
2294
2295 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2296 2
            return \intval($match[1], 16);
2297
        }
2298
2299 2
        return false;
2300
    }
2301
2302
    /**
2303
     * alias for "UTF8::html_entity_decode()"
2304
     *
2305
     * @see UTF8::html_entity_decode()
2306
     *
2307
     * @param string $str
2308
     * @param int    $flags
2309
     * @param string $encoding
2310
     *
2311
     * @return string
2312
     */
2313 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2314
    {
2315 4
        return self::html_entity_decode($str, $flags, $encoding);
2316
    }
2317
2318
    /**
2319
     * Converts a UTF-8 string to a series of HTML numbered entities.
2320
     *
2321
     * INFO: opposite to UTF8::html_decode()
2322
     *
2323
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2324
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2325
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2326
     *
2327
     * @return string HTML numbered entities
2328
     */
2329 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2330
    {
2331 13
        if ($str === '') {
2332 4
            return '';
2333
        }
2334
2335 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2336 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2337
        }
2338
2339
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2340 13
        if (self::$SUPPORT['mbstring'] === true) {
2341 13
            $startCode = 0x00;
2342 13
            if ($keepAsciiChars === true) {
2343 13
                $startCode = 0x80;
2344
            }
2345
2346 13
            if ($encoding === 'UTF-8') {
2347 13
                return \mb_encode_numericentity(
2348 13
                    $str,
2349 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2350
                );
2351
            }
2352
2353 4
            return \mb_encode_numericentity(
2354 4
                $str,
2355 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2356 4
                $encoding
2357
            );
2358
        }
2359
2360
        //
2361
        // fallback via vanilla php
2362
        //
2363
2364
        return \implode(
2365
            '',
2366
            \array_map(
2367
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2368
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2369
                },
2370
                self::str_split($str)
2371
            )
2372
        );
2373
    }
2374
2375
    /**
2376
     * UTF-8 version of html_entity_decode()
2377
     *
2378
     * The reason we are not using html_entity_decode() by itself is because
2379
     * while it is not technically correct to leave out the semicolon
2380
     * at the end of an entity most browsers will still interpret the entity
2381
     * correctly. html_entity_decode() does not convert entities without
2382
     * semicolons, so we are left with our own little solution here. Bummer.
2383
     *
2384
     * Convert all HTML entities to their applicable characters
2385
     *
2386
     * INFO: opposite to UTF8::html_encode()
2387
     *
2388
     * @see http://php.net/manual/en/function.html-entity-decode.php
2389
     *
2390
     * @param string $str      <p>
2391
     *                         The input string.
2392
     *                         </p>
2393
     * @param int    $flags    [optional] <p>
2394
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2395
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2396
     *                         <table>
2397
     *                         Available <i>flags</i> constants
2398
     *                         <tr valign="top">
2399
     *                         <td>Constant Name</td>
2400
     *                         <td>Description</td>
2401
     *                         </tr>
2402
     *                         <tr valign="top">
2403
     *                         <td><b>ENT_COMPAT</b></td>
2404
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2405
     *                         </tr>
2406
     *                         <tr valign="top">
2407
     *                         <td><b>ENT_QUOTES</b></td>
2408
     *                         <td>Will convert both double and single quotes.</td>
2409
     *                         </tr>
2410
     *                         <tr valign="top">
2411
     *                         <td><b>ENT_NOQUOTES</b></td>
2412
     *                         <td>Will leave both double and single quotes unconverted.</td>
2413
     *                         </tr>
2414
     *                         <tr valign="top">
2415
     *                         <td><b>ENT_HTML401</b></td>
2416
     *                         <td>
2417
     *                         Handle code as HTML 4.01.
2418
     *                         </td>
2419
     *                         </tr>
2420
     *                         <tr valign="top">
2421
     *                         <td><b>ENT_XML1</b></td>
2422
     *                         <td>
2423
     *                         Handle code as XML 1.
2424
     *                         </td>
2425
     *                         </tr>
2426
     *                         <tr valign="top">
2427
     *                         <td><b>ENT_XHTML</b></td>
2428
     *                         <td>
2429
     *                         Handle code as XHTML.
2430
     *                         </td>
2431
     *                         </tr>
2432
     *                         <tr valign="top">
2433
     *                         <td><b>ENT_HTML5</b></td>
2434
     *                         <td>
2435
     *                         Handle code as HTML 5.
2436
     *                         </td>
2437
     *                         </tr>
2438
     *                         </table>
2439
     *                         </p>
2440
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2441
     *
2442
     * @return string the decoded string
2443
     */
2444 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2445
    {
2446
        if (
2447 40
            !isset($str[3]) // examples: &; || &x;
2448
            ||
2449 40
            \strpos($str, '&') === false // no "&"
2450
        ) {
2451 19
            return $str;
2452
        }
2453
2454 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2455 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2456
        }
2457
2458 39
        if ($flags === null) {
2459 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2460
        }
2461
2462
        if (
2463 39
            $encoding !== 'UTF-8'
2464
            &&
2465 39
            $encoding !== 'ISO-8859-1'
2466
            &&
2467 39
            $encoding !== 'WINDOWS-1252'
2468
            &&
2469 39
            self::$SUPPORT['mbstring'] === false
2470
        ) {
2471
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2472
        }
2473
2474
        do {
2475 39
            $str_compare = $str;
2476
2477
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2478 39
            if (self::$SUPPORT['mbstring'] === true) {
2479 39
                if ($encoding === 'UTF-8') {
2480 39
                    $str = \mb_decode_numericentity(
2481 39
                        $str,
2482 39
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2483
                    );
2484
                } else {
2485 4
                    $str = \mb_decode_numericentity(
2486 4
                        $str,
2487 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2488 39
                        $encoding
2489
                    );
2490
                }
2491
            } else {
2492
                $str = (string) \preg_replace_callback(
2493
                    "/&#\d{2,6};/",
2494
                    /**
2495
                     * @param string[] $matches
2496
                     *
2497
                     * @return string
2498
                     */
2499
                    static function (array $matches) use ($encoding): string {
2500
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2501
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2502
                            return $returnTmp;
2503
                        }
2504
2505
                        return $matches[0];
2506
                    },
2507
                    $str
2508
                );
2509
            }
2510
2511 39
            if (\strpos($str, '&') !== false) {
2512 37
                if (\strpos($str, '&#') !== false) {
2513
                    // decode also numeric & UTF16 two byte entities
2514 29
                    $str = (string) \preg_replace(
2515 29
                        '/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS',
2516 29
                        '$1;',
2517 29
                        $str
2518
                    );
2519
                }
2520
2521 37
                $str = \html_entity_decode(
2522 37
                    $str,
2523 37
                    $flags,
2524 37
                    $encoding
2525
                );
2526
            }
2527 39
        } while ($str_compare !== $str);
2528
2529 39
        return $str;
2530
    }
2531
2532
    /**
2533
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2534
     *
2535
     * @param string $str
2536
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2537
     *
2538
     * @return string
2539
     */
2540 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2541
    {
2542 6
        return self::htmlspecialchars(
2543 6
            $str,
2544 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2545 6
            $encoding
2546
        );
2547
    }
2548
2549
    /**
2550
     * Remove empty html-tag.
2551
     *
2552
     * e.g.: <tag></tag>
2553
     *
2554
     * @param string $str
2555
     *
2556
     * @return string
2557
     */
2558 1
    public static function html_stripe_empty_tags(string $str): string
2559
    {
2560 1
        return (string) \preg_replace(
2561 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2562 1
            '',
2563 1
            $str
2564
        );
2565
    }
2566
2567
    /**
2568
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2569
     *
2570
     * @see http://php.net/manual/en/function.htmlentities.php
2571
     *
2572
     * @param string $str           <p>
2573
     *                              The input string.
2574
     *                              </p>
2575
     * @param int    $flags         [optional] <p>
2576
     *                              A bitmask of one or more of the following flags, which specify how to handle
2577
     *                              quotes, invalid code unit sequences and the used document type. The default is
2578
     *                              ENT_COMPAT | ENT_HTML401.
2579
     *                              <table>
2580
     *                              Available <i>flags</i> constants
2581
     *                              <tr valign="top">
2582
     *                              <td>Constant Name</td>
2583
     *                              <td>Description</td>
2584
     *                              </tr>
2585
     *                              <tr valign="top">
2586
     *                              <td><b>ENT_COMPAT</b></td>
2587
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2588
     *                              </tr>
2589
     *                              <tr valign="top">
2590
     *                              <td><b>ENT_QUOTES</b></td>
2591
     *                              <td>Will convert both double and single quotes.</td>
2592
     *                              </tr>
2593
     *                              <tr valign="top">
2594
     *                              <td><b>ENT_NOQUOTES</b></td>
2595
     *                              <td>Will leave both double and single quotes unconverted.</td>
2596
     *                              </tr>
2597
     *                              <tr valign="top">
2598
     *                              <td><b>ENT_IGNORE</b></td>
2599
     *                              <td>
2600
     *                              Silently discard invalid code unit sequences instead of returning
2601
     *                              an empty string. Using this flag is discouraged as it
2602
     *                              may have security implications.
2603
     *                              </td>
2604
     *                              </tr>
2605
     *                              <tr valign="top">
2606
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2607
     *                              <td>
2608
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2609
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2610
     *                              string.
2611
     *                              </td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_DISALLOWED</b></td>
2615
     *                              <td>
2616
     *                              Replace invalid code points for the given document type with a
2617
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2618
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2619
     *                              instance, to ensure the well-formedness of XML documents with
2620
     *                              embedded external content.
2621
     *                              </td>
2622
     *                              </tr>
2623
     *                              <tr valign="top">
2624
     *                              <td><b>ENT_HTML401</b></td>
2625
     *                              <td>
2626
     *                              Handle code as HTML 4.01.
2627
     *                              </td>
2628
     *                              </tr>
2629
     *                              <tr valign="top">
2630
     *                              <td><b>ENT_XML1</b></td>
2631
     *                              <td>
2632
     *                              Handle code as XML 1.
2633
     *                              </td>
2634
     *                              </tr>
2635
     *                              <tr valign="top">
2636
     *                              <td><b>ENT_XHTML</b></td>
2637
     *                              <td>
2638
     *                              Handle code as XHTML.
2639
     *                              </td>
2640
     *                              </tr>
2641
     *                              <tr valign="top">
2642
     *                              <td><b>ENT_HTML5</b></td>
2643
     *                              <td>
2644
     *                              Handle code as HTML 5.
2645
     *                              </td>
2646
     *                              </tr>
2647
     *                              </table>
2648
     *                              </p>
2649
     * @param string $encoding      [optional] <p>
2650
     *                              Like <b>htmlspecialchars</b>,
2651
     *                              <b>htmlentities</b> takes an optional third argument
2652
     *                              <i>encoding</i> which defines encoding used in
2653
     *                              conversion.
2654
     *                              Although this argument is technically optional, you are highly
2655
     *                              encouraged to specify the correct value for your code.
2656
     *                              </p>
2657
     * @param bool   $double_encode [optional] <p>
2658
     *                              When <i>double_encode</i> is turned off PHP will not
2659
     *                              encode existing html entities. The default is to convert everything.
2660
     *                              </p>
2661
     *
2662
     * @return string
2663
     *                <p>
2664
     *                The encoded string.
2665
     *                <br><br>
2666
     *                If the input <i>string</i> contains an invalid code unit
2667
     *                sequence within the given <i>encoding</i> an empty string
2668
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2669
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2670
     *                </p>
2671
     */
2672 9
    public static function htmlentities(
2673
        string $str,
2674
        int $flags = \ENT_COMPAT,
2675
        string $encoding = 'UTF-8',
2676
        bool $double_encode = true
2677
    ): string {
2678 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2679 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2680
        }
2681
2682 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2683
2684
        /**
2685
         * PHP doesn't replace a backslash to its html entity since this is something
2686
         * that's mostly used to escape characters when inserting in a database. Since
2687
         * we're using a decent database layer, we don't need this shit and we're replacing
2688
         * the double backslashes by its' html entity equivalent.
2689
         *
2690
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2691
         */
2692 9
        $str = \str_replace('\\', '&#92;', $str);
2693
2694 9
        return self::html_encode($str, true, $encoding);
2695
    }
2696
2697
    /**
2698
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2699
     *
2700
     * INFO: Take a look at "UTF8::htmlentities()"
2701
     *
2702
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2703
     *
2704
     * @param string $str           <p>
2705
     *                              The string being converted.
2706
     *                              </p>
2707
     * @param int    $flags         [optional] <p>
2708
     *                              A bitmask of one or more of the following flags, which specify how to handle
2709
     *                              quotes, invalid code unit sequences and the used document type. The default is
2710
     *                              ENT_COMPAT | ENT_HTML401.
2711
     *                              <table>
2712
     *                              Available <i>flags</i> constants
2713
     *                              <tr valign="top">
2714
     *                              <td>Constant Name</td>
2715
     *                              <td>Description</td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_COMPAT</b></td>
2719
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2720
     *                              </tr>
2721
     *                              <tr valign="top">
2722
     *                              <td><b>ENT_QUOTES</b></td>
2723
     *                              <td>Will convert both double and single quotes.</td>
2724
     *                              </tr>
2725
     *                              <tr valign="top">
2726
     *                              <td><b>ENT_NOQUOTES</b></td>
2727
     *                              <td>Will leave both double and single quotes unconverted.</td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_IGNORE</b></td>
2731
     *                              <td>
2732
     *                              Silently discard invalid code unit sequences instead of returning
2733
     *                              an empty string. Using this flag is discouraged as it
2734
     *                              may have security implications.
2735
     *                              </td>
2736
     *                              </tr>
2737
     *                              <tr valign="top">
2738
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2739
     *                              <td>
2740
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2741
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2742
     *                              string.
2743
     *                              </td>
2744
     *                              </tr>
2745
     *                              <tr valign="top">
2746
     *                              <td><b>ENT_DISALLOWED</b></td>
2747
     *                              <td>
2748
     *                              Replace invalid code points for the given document type with a
2749
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2750
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2751
     *                              instance, to ensure the well-formedness of XML documents with
2752
     *                              embedded external content.
2753
     *                              </td>
2754
     *                              </tr>
2755
     *                              <tr valign="top">
2756
     *                              <td><b>ENT_HTML401</b></td>
2757
     *                              <td>
2758
     *                              Handle code as HTML 4.01.
2759
     *                              </td>
2760
     *                              </tr>
2761
     *                              <tr valign="top">
2762
     *                              <td><b>ENT_XML1</b></td>
2763
     *                              <td>
2764
     *                              Handle code as XML 1.
2765
     *                              </td>
2766
     *                              </tr>
2767
     *                              <tr valign="top">
2768
     *                              <td><b>ENT_XHTML</b></td>
2769
     *                              <td>
2770
     *                              Handle code as XHTML.
2771
     *                              </td>
2772
     *                              </tr>
2773
     *                              <tr valign="top">
2774
     *                              <td><b>ENT_HTML5</b></td>
2775
     *                              <td>
2776
     *                              Handle code as HTML 5.
2777
     *                              </td>
2778
     *                              </tr>
2779
     *                              </table>
2780
     *                              </p>
2781
     * @param string $encoding      [optional] <p>
2782
     *                              Defines encoding used in conversion.
2783
     *                              </p>
2784
     *                              <p>
2785
     *                              For the purposes of this function, the encodings
2786
     *                              ISO-8859-1, ISO-8859-15,
2787
     *                              UTF-8, cp866,
2788
     *                              cp1251, cp1252, and
2789
     *                              KOI8-R are effectively equivalent, provided the
2790
     *                              <i>string</i> itself is valid for the encoding, as
2791
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2792
     *                              the same positions in all of these encodings.
2793
     *                              </p>
2794
     * @param bool   $double_encode [optional] <p>
2795
     *                              When <i>double_encode</i> is turned off PHP will not
2796
     *                              encode existing html entities, the default is to convert everything.
2797
     *                              </p>
2798
     *
2799
     * @return string the converted string.
2800
     *                </p>
2801
     *                <p>
2802
     *                If the input <i>string</i> contains an invalid code unit
2803
     *                sequence within the given <i>encoding</i> an empty string
2804
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2805
     *                <b>ENT_SUBSTITUTE</b> flags are set
2806
     */
2807 8
    public static function htmlspecialchars(
2808
        string $str,
2809
        int $flags = \ENT_COMPAT,
2810
        string $encoding = 'UTF-8',
2811
        bool $double_encode = true
2812
    ): string {
2813 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2814 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2815
        }
2816
2817 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2818
    }
2819
2820
    /**
2821
     * Checks whether iconv is available on the server.
2822
     *
2823
     * @return bool
2824
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2825
     */
2826
    public static function iconv_loaded(): bool
2827
    {
2828
        return \extension_loaded('iconv');
2829
    }
2830
2831
    /**
2832
     * alias for "UTF8::decimal_to_chr()"
2833
     *
2834
     * @see UTF8::decimal_to_chr()
2835
     *
2836
     * @param mixed $int
2837
     *
2838
     * @return string
2839
     */
2840 4
    public static function int_to_chr($int): string
2841
    {
2842 4
        return self::decimal_to_chr($int);
2843
    }
2844
2845
    /**
2846
     * Converts Integer to hexadecimal U+xxxx code point representation.
2847
     *
2848
     * INFO: opposite to UTF8::hex_to_int()
2849
     *
2850
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2851
     * @param string $pfix [optional]
2852
     *
2853
     * @return string the code point, or empty string on failure
2854
     */
2855 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2856
    {
2857 6
        $hex = \dechex($int);
2858
2859 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2860
2861 6
        return $pfix . $hex . '';
2862
    }
2863
2864
    /**
2865
     * Checks whether intl-char is available on the server.
2866
     *
2867
     * @return bool
2868
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2869
     */
2870
    public static function intlChar_loaded(): bool
2871
    {
2872
        return \class_exists('IntlChar');
2873
    }
2874
2875
    /**
2876
     * Checks whether intl is available on the server.
2877
     *
2878
     * @return bool
2879
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2880
     */
2881 5
    public static function intl_loaded(): bool
2882
    {
2883 5
        return \extension_loaded('intl');
2884
    }
2885
2886
    /**
2887
     * alias for "UTF8::is_ascii()"
2888
     *
2889
     * @see        UTF8::is_ascii()
2890
     *
2891
     * @param string $str
2892
     *
2893
     * @return bool
2894
     *
2895
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2896
     */
2897 2
    public static function isAscii(string $str): bool
2898
    {
2899 2
        return self::is_ascii($str);
2900
    }
2901
2902
    /**
2903
     * alias for "UTF8::is_base64()"
2904
     *
2905
     * @see        UTF8::is_base64()
2906
     *
2907
     * @param string $str
2908
     *
2909
     * @return bool
2910
     *
2911
     * @deprecated <p>use "UTF8::is_base64()"</p>
2912
     */
2913 2
    public static function isBase64($str): bool
2914
    {
2915 2
        return self::is_base64($str);
2916
    }
2917
2918
    /**
2919
     * alias for "UTF8::is_binary()"
2920
     *
2921
     * @see        UTF8::is_binary()
2922
     *
2923
     * @param mixed $str
2924
     * @param bool  $strict
2925
     *
2926
     * @return bool
2927
     *
2928
     * @deprecated <p>use "UTF8::is_binary()"</p>
2929
     */
2930 4
    public static function isBinary($str, $strict = false): bool
2931
    {
2932 4
        return self::is_binary($str, $strict);
2933
    }
2934
2935
    /**
2936
     * alias for "UTF8::is_bom()"
2937
     *
2938
     * @see        UTF8::is_bom()
2939
     *
2940
     * @param string $utf8_chr
2941
     *
2942
     * @return bool
2943
     *
2944
     * @deprecated <p>use "UTF8::is_bom()"</p>
2945
     */
2946 2
    public static function isBom(string $utf8_chr): bool
2947
    {
2948 2
        return self::is_bom($utf8_chr);
2949
    }
2950
2951
    /**
2952
     * alias for "UTF8::is_html()"
2953
     *
2954
     * @see        UTF8::is_html()
2955
     *
2956
     * @param string $str
2957
     *
2958
     * @return bool
2959
     *
2960
     * @deprecated <p>use "UTF8::is_html()"</p>
2961
     */
2962 2
    public static function isHtml(string $str): bool
2963
    {
2964 2
        return self::is_html($str);
2965
    }
2966
2967
    /**
2968
     * alias for "UTF8::is_json()"
2969
     *
2970
     * @see        UTF8::is_json()
2971
     *
2972
     * @param string $str
2973
     *
2974
     * @return bool
2975
     *
2976
     * @deprecated <p>use "UTF8::is_json()"</p>
2977
     */
2978
    public static function isJson(string $str): bool
2979
    {
2980
        return self::is_json($str);
2981
    }
2982
2983
    /**
2984
     * alias for "UTF8::is_utf16()"
2985
     *
2986
     * @see        UTF8::is_utf16()
2987
     *
2988
     * @param mixed $str
2989
     *
2990
     * @return false|int
2991
     *                   <strong>false</strong> if is't not UTF16,<br>
2992
     *                   <strong>1</strong> for UTF-16LE,<br>
2993
     *                   <strong>2</strong> for UTF-16BE
2994
     *
2995
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2996
     */
2997 2
    public static function isUtf16($str)
2998
    {
2999 2
        return self::is_utf16($str);
3000
    }
3001
3002
    /**
3003
     * alias for "UTF8::is_utf32()"
3004
     *
3005
     * @see        UTF8::is_utf32()
3006
     *
3007
     * @param mixed $str
3008
     *
3009
     * @return false|int
3010
     *                   <strong>false</strong> if is't not UTF16,
3011
     *                   <strong>1</strong> for UTF-32LE,
3012
     *                   <strong>2</strong> for UTF-32BE
3013
     *
3014
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3015
     */
3016 2
    public static function isUtf32($str)
3017
    {
3018 2
        return self::is_utf32($str);
3019
    }
3020
3021
    /**
3022
     * alias for "UTF8::is_utf8()"
3023
     *
3024
     * @see        UTF8::is_utf8()
3025
     *
3026
     * @param string $str
3027
     * @param bool   $strict
3028
     *
3029
     * @return bool
3030
     *
3031
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3032
     */
3033 17
    public static function isUtf8($str, $strict = false): bool
3034
    {
3035 17
        return self::is_utf8($str, $strict);
3036
    }
3037
3038
    /**
3039
     * Returns true if the string contains only alphabetic chars, false otherwise.
3040
     *
3041
     * @param string $str
3042
     *
3043
     * @return bool
3044
     *              Whether or not $str contains only alphabetic chars
3045
     */
3046 10
    public static function is_alpha(string $str): bool
3047
    {
3048 10
        if (self::$SUPPORT['mbstring'] === true) {
3049
            /** @noinspection PhpComposerExtensionStubsInspection */
3050 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3051
        }
3052
3053
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3054
    }
3055
3056
    /**
3057
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *              Whether or not $str contains only alphanumeric chars
3063
     */
3064 13
    public static function is_alphanumeric(string $str): bool
3065
    {
3066 13
        if (self::$SUPPORT['mbstring'] === true) {
3067
            /** @noinspection PhpComposerExtensionStubsInspection */
3068 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3069
        }
3070
3071
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3072
    }
3073
3074
    /**
3075
     * Checks if a string is 7 bit ASCII.
3076
     *
3077
     * @param string $str <p>The string to check.</p>
3078
     *
3079
     * @return bool
3080
     *              <strong>true</strong> if it is ASCII<br>
3081
     *              <strong>false</strong> otherwise
3082
     */
3083 137
    public static function is_ascii(string $str): bool
3084
    {
3085 137
        if ($str === '') {
3086 10
            return true;
3087
        }
3088
3089 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3090
    }
3091
3092
    /**
3093
     * Returns true if the string is base64 encoded, false otherwise.
3094
     *
3095
     * @param mixed|string $str                <p>The input string.</p>
3096
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3097
     *
3098
     * @return bool whether or not $str is base64 encoded
3099
     */
3100 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3101
    {
3102 16
        if ($emptyStringIsValid === false && $str === '') {
3103 3
            return false;
3104
        }
3105
3106
        /**
3107
         * @psalm-suppress RedundantConditionGivenDocblockType
3108
         */
3109 15
        if (\is_string($str) === false) {
3110 2
            return false;
3111
        }
3112
3113 15
        $base64String = \base64_decode($str, true);
3114
3115 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3116
    }
3117
3118
    /**
3119
     * Check if the input is binary... (is look like a hack).
3120
     *
3121
     * @param mixed $input
3122
     * @param bool  $strict
3123
     *
3124
     * @return bool
3125
     */
3126 39
    public static function is_binary($input, bool $strict = false): bool
3127
    {
3128 39
        $input = (string) $input;
3129 39
        if ($input === '') {
3130 10
            return false;
3131
        }
3132
3133 39
        if (\preg_match('~^[01]+$~', $input)) {
3134 13
            return true;
3135
        }
3136
3137 39
        $ext = self::get_file_type($input);
3138 39
        if ($ext['type'] === 'binary') {
3139 7
            return true;
3140
        }
3141
3142 36
        $testLength = \strlen($input);
3143 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3144 36
        if (($testNull / $testLength) > 0.25) {
3145 12
            return true;
3146
        }
3147
3148 34
        if ($strict === true) {
3149 34
            if (self::$SUPPORT['finfo'] === false) {
3150
                throw new \RuntimeException('ext-fileinfo: is not installed');
3151
            }
3152
3153
            /** @noinspection PhpComposerExtensionStubsInspection */
3154 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3155 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3156 15
                return true;
3157
            }
3158
        }
3159
3160 30
        return false;
3161
    }
3162
3163
    /**
3164
     * Check if the file is binary.
3165
     *
3166
     * @param string $file
3167
     *
3168
     * @return bool
3169
     */
3170 6
    public static function is_binary_file($file): bool
3171
    {
3172
        // init
3173 6
        $block = '';
3174
3175 6
        $fp = \fopen($file, 'rb');
3176 6
        if (\is_resource($fp)) {
3177 6
            $block = \fread($fp, 512);
3178 6
            \fclose($fp);
3179
        }
3180
3181 6
        if ($block === '') {
3182 2
            return false;
3183
        }
3184
3185 6
        return self::is_binary($block, true);
3186
    }
3187
3188
    /**
3189
     * Returns true if the string contains only whitespace chars, false otherwise.
3190
     *
3191
     * @param string $str
3192
     *
3193
     * @return bool
3194
     *              Whether or not $str contains only whitespace characters
3195
     */
3196 15
    public static function is_blank(string $str): bool
3197
    {
3198 15
        if (self::$SUPPORT['mbstring'] === true) {
3199
            /** @noinspection PhpComposerExtensionStubsInspection */
3200 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3201
        }
3202
3203
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3204
    }
3205
3206
    /**
3207
     * Checks if the given string is equal to any "Byte Order Mark".
3208
     *
3209
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3210
     *
3211
     * @param string $str <p>The input string.</p>
3212
     *
3213
     * @return bool
3214
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3215
     */
3216 2
    public static function is_bom($str): bool
3217
    {
3218
        /** @noinspection PhpUnusedLocalVariableInspection */
3219 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3220 2
            if ($str === $bomString) {
3221 2
                return true;
3222
            }
3223
        }
3224
3225 2
        return false;
3226
    }
3227
3228
    /**
3229
     * Determine whether the string is considered to be empty.
3230
     *
3231
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3232
     * empty() does not generate a warning if the variable does not exist.
3233
     *
3234
     * @param mixed $str
3235
     *
3236
     * @return bool whether or not $str is empty()
3237
     */
3238
    public static function is_empty($str): bool
3239
    {
3240
        return empty($str);
3241
    }
3242
3243
    /**
3244
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3245
     *
3246
     * @param string $str
3247
     *
3248
     * @return bool
3249
     *              Whether or not $str contains only hexadecimal chars
3250
     */
3251 13
    public static function is_hexadecimal(string $str): bool
3252
    {
3253 13
        if (self::$SUPPORT['mbstring'] === true) {
3254
            /** @noinspection PhpComposerExtensionStubsInspection */
3255 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3256
        }
3257
3258
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3259
    }
3260
3261
    /**
3262
     * Check if the string contains any html-tags <lall>.
3263
     *
3264
     * @param string $str <p>The input string.</p>
3265
     *
3266
     * @return bool
3267
     */
3268 3
    public static function is_html(string $str): bool
3269
    {
3270 3
        if ($str === '') {
3271 3
            return false;
3272
        }
3273
3274
        // init
3275 3
        $matches = [];
3276
3277 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3278
3279 3
        return \count($matches) !== 0;
3280
    }
3281
3282
    /**
3283
     * Try to check if "$str" is an json-string.
3284
     *
3285
     * @param string $str                              <p>The input string.</p>
3286
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3287
     *
3288
     * @return bool
3289
     */
3290 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3291
    {
3292 42
        if ($str === '') {
3293 4
            return false;
3294
        }
3295
3296 40
        if (self::$SUPPORT['json'] === false) {
3297
            throw new \RuntimeException('ext-json: is not installed');
3298
        }
3299
3300 40
        $json = self::json_decode($str);
3301 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3302 18
            return false;
3303
        }
3304
3305
        if (
3306 24
            $onlyArrayOrObjectResultsAreValid === true
3307
            &&
3308 24
            \is_object($json) === false
3309
            &&
3310 24
            \is_array($json) === false
3311
        ) {
3312 5
            return false;
3313
        }
3314
3315
        /** @noinspection PhpComposerExtensionStubsInspection */
3316 19
        return \json_last_error() === \JSON_ERROR_NONE;
3317
    }
3318
3319
    /**
3320
     * @param string $str
3321
     *
3322
     * @return bool
3323
     */
3324 8
    public static function is_lowercase(string $str): bool
3325
    {
3326 8
        if (self::$SUPPORT['mbstring'] === true) {
3327
            /** @noinspection PhpComposerExtensionStubsInspection */
3328 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3329
        }
3330
3331
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3332
    }
3333
3334
    /**
3335
     * Returns true if the string is serialized, false otherwise.
3336
     *
3337
     * @param string $str
3338
     *
3339
     * @return bool whether or not $str is serialized
3340
     */
3341 7
    public static function is_serialized(string $str): bool
3342
    {
3343 7
        if ($str === '') {
3344 1
            return false;
3345
        }
3346
3347
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3348
        /** @noinspection UnserializeExploitsInspection */
3349 6
        return $str === 'b:0;'
3350
               ||
3351 6
               @\unserialize($str) !== false;
3352
    }
3353
3354
    /**
3355
     * Returns true if the string contains only lower case chars, false
3356
     * otherwise.
3357
     *
3358
     * @param string $str <p>The input string.</p>
3359
     *
3360
     * @return bool
3361
     *              Whether or not $str contains only lower case characters
3362
     */
3363 8
    public static function is_uppercase(string $str): bool
3364
    {
3365 8
        if (self::$SUPPORT['mbstring'] === true) {
3366
            /** @noinspection PhpComposerExtensionStubsInspection */
3367 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3368
        }
3369
3370
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3371
    }
3372
3373
    /**
3374
     * Check if the string is UTF-16.
3375
     *
3376
     * @param mixed $str                   <p>The input string.</p>
3377
     * @param bool  $checkIfStringIsBinary
3378
     *
3379
     * @return false|int
3380
     *                   <strong>false</strong> if is't not UTF-16,<br>
3381
     *                   <strong>1</strong> for UTF-16LE,<br>
3382
     *                   <strong>2</strong> for UTF-16BE
3383
     */
3384 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3385
    {
3386
        // init
3387 22
        $str = (string) $str;
3388 22
        $strChars = [];
3389
3390
        if (
3391 22
            $checkIfStringIsBinary === true
3392
            &&
3393 22
            self::is_binary($str, true) === false
3394
        ) {
3395 2
            return false;
3396
        }
3397
3398 22
        if (self::$SUPPORT['mbstring'] === false) {
3399 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3400
        }
3401
3402 22
        $str = self::remove_bom($str);
3403
3404 22
        $maybeUTF16LE = 0;
3405 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3406 22
        if ($test) {
3407 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3408 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3409 15
            if ($test3 === $test) {
3410 15
                if (\count($strChars) === 0) {
3411 15
                    $strChars = self::count_chars($str, true, false);
3412
                }
3413 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3414 15
                    if (\in_array($test3char, $strChars, true) === true) {
3415 15
                        ++$maybeUTF16LE;
3416
                    }
3417
                }
3418 15
                unset($test3charEmpty);
3419
            }
3420
        }
3421
3422 22
        $maybeUTF16BE = 0;
3423 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3424 22
        if ($test) {
3425 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3426 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3427 15
            if ($test3 === $test) {
3428 15
                if (\count($strChars) === 0) {
3429 7
                    $strChars = self::count_chars($str, true, false);
3430
                }
3431 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3432 15
                    if (\in_array($test3char, $strChars, true) === true) {
3433 15
                        ++$maybeUTF16BE;
3434
                    }
3435
                }
3436 15
                unset($test3charEmpty);
3437
            }
3438
        }
3439
3440 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3441 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3442 4
                return 1;
3443
            }
3444
3445 6
            return 2;
3446
        }
3447
3448 18
        return false;
3449
    }
3450
3451
    /**
3452
     * Check if the string is UTF-32.
3453
     *
3454
     * @param mixed $str                   <p>The input string.</p>
3455
     * @param bool  $checkIfStringIsBinary
3456
     *
3457
     * @return false|int
3458
     *                   <strong>false</strong> if is't not UTF-32,<br>
3459
     *                   <strong>1</strong> for UTF-32LE,<br>
3460
     *                   <strong>2</strong> for UTF-32BE
3461
     */
3462 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3463
    {
3464
        // init
3465 18
        $str = (string) $str;
3466 18
        $strChars = [];
3467
3468
        if (
3469 18
            $checkIfStringIsBinary === true
3470
            &&
3471 18
            self::is_binary($str, true) === false
3472
        ) {
3473 2
            return false;
3474
        }
3475
3476 18
        if (self::$SUPPORT['mbstring'] === false) {
3477 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3478
        }
3479
3480 18
        $str = self::remove_bom($str);
3481
3482 18
        $maybeUTF32LE = 0;
3483 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3484 18
        if ($test) {
3485 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3486 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3487 11
            if ($test3 === $test) {
3488 11
                if (\count($strChars) === 0) {
3489 11
                    $strChars = self::count_chars($str, true, false);
3490
                }
3491 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3492 11
                    if (\in_array($test3char, $strChars, true) === true) {
3493 11
                        ++$maybeUTF32LE;
3494
                    }
3495
                }
3496 11
                unset($test3charEmpty);
3497
            }
3498
        }
3499
3500 18
        $maybeUTF32BE = 0;
3501 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3502 18
        if ($test) {
3503 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3504 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3505 11
            if ($test3 === $test) {
3506 11
                if (\count($strChars) === 0) {
3507 7
                    $strChars = self::count_chars($str, true, false);
3508
                }
3509 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3510 11
                    if (\in_array($test3char, $strChars, true) === true) {
3511 11
                        ++$maybeUTF32BE;
3512
                    }
3513
                }
3514 11
                unset($test3charEmpty);
3515
            }
3516
        }
3517
3518 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3519 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3520 2
                return 1;
3521
            }
3522
3523 2
            return 2;
3524
        }
3525
3526 18
        return false;
3527
    }
3528
3529
    /**
3530
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3531
     *
3532
     * @see    http://hsivonen.iki.fi/php-utf8/
3533
     *
3534
     * @param string|string[] $str    <p>The string to be checked.</p>
3535
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3536
     *
3537
     * @return bool
3538
     */
3539 106
    public static function is_utf8($str, bool $strict = false): bool
3540
    {
3541 106
        if (\is_array($str) === true) {
3542 2
            foreach ($str as &$v) {
3543 2
                if (self::is_utf8($v, $strict) === false) {
3544 2
                    return false;
3545
                }
3546
            }
3547
3548
            return true;
3549
        }
3550
3551 106
        if ($str === '') {
3552 12
            return true;
3553
        }
3554
3555 102
        if ($strict === true) {
3556 2
            $isBinary = self::is_binary($str, true);
3557
3558 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3559 2
                return false;
3560
            }
3561
3562
            if ($isBinary && self::is_utf32($str, false) !== false) {
3563
                return false;
3564
            }
3565
        }
3566
3567 102
        if (self::pcre_utf8_support() !== true) {
3568
3569
            // If even just the first character can be matched, when the /u
3570
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3571
            // invalid, nothing at all will match, even if the string contains
3572
            // some valid sequences
3573
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3574
        }
3575
3576 102
        $mState = 0; // cached expected number of octets after the current octet
3577
        // until the beginning of the next UTF8 character sequence
3578 102
        $mUcs4 = 0; // cached Unicode character
3579 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3580
3581 102
        if (self::$ORD === null) {
3582
            self::$ORD = self::getData('ord');
3583
        }
3584
3585 102
        $len = \strlen((string) $str);
3586
        /** @noinspection ForeachInvariantsInspection */
3587 102
        for ($i = 0; $i < $len; ++$i) {
3588 102
            $in = self::$ORD[$str[$i]];
3589 102
            if ($mState === 0) {
3590
                // When mState is zero we expect either a US-ASCII character or a
3591
                // multi-octet sequence.
3592 102
                if ((0x80 & $in) === 0) {
3593
                    // US-ASCII, pass straight through.
3594 97
                    $mBytes = 1;
3595 83
                } elseif ((0xE0 & $in) === 0xC0) {
3596
                    // First octet of 2 octet sequence.
3597 73
                    $mUcs4 = $in;
3598 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3599 73
                    $mState = 1;
3600 73
                    $mBytes = 2;
3601 58
                } elseif ((0xF0 & $in) === 0xE0) {
3602
                    // First octet of 3 octet sequence.
3603 42
                    $mUcs4 = $in;
3604 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3605 42
                    $mState = 2;
3606 42
                    $mBytes = 3;
3607 29
                } elseif ((0xF8 & $in) === 0xF0) {
3608
                    // First octet of 4 octet sequence.
3609 18
                    $mUcs4 = $in;
3610 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3611 18
                    $mState = 3;
3612 18
                    $mBytes = 4;
3613 13
                } elseif ((0xFC & $in) === 0xF8) {
3614
                    /* First octet of 5 octet sequence.
3615
                     *
3616
                     * This is illegal because the encoded codepoint must be either
3617
                     * (a) not the shortest form or
3618
                     * (b) outside the Unicode range of 0-0x10FFFF.
3619
                     * Rather than trying to resynchronize, we will carry on until the end
3620
                     * of the sequence and let the later error handling code catch it.
3621
                     */
3622 5
                    $mUcs4 = $in;
3623 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3624 5
                    $mState = 4;
3625 5
                    $mBytes = 5;
3626 10
                } elseif ((0xFE & $in) === 0xFC) {
3627
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3628 5
                    $mUcs4 = $in;
3629 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3630 5
                    $mState = 5;
3631 5
                    $mBytes = 6;
3632
                } else {
3633
                    // Current octet is neither in the US-ASCII range nor a legal first
3634
                    // octet of a multi-octet sequence.
3635 102
                    return false;
3636
                }
3637 83
            } elseif ((0xC0 & $in) === 0x80) {
3638
3639
                // When mState is non-zero, we expect a continuation of the multi-octet
3640
                // sequence
3641
3642
                // Legal continuation.
3643 75
                $shift = ($mState - 1) * 6;
3644 75
                $tmp = $in;
3645 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3646 75
                $mUcs4 |= $tmp;
3647
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3648
                // Unicode code point to be output.
3649 75
                if (--$mState === 0) {
3650
                    // Check for illegal sequences and code points.
3651
                    //
3652
                    // From Unicode 3.1, non-shortest form is illegal
3653
                    if (
3654 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3655
                        ||
3656 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3657
                        ||
3658 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3659
                        ||
3660 75
                        ($mBytes > 4)
3661
                        ||
3662
                        // From Unicode 3.2, surrogate characters are illegal.
3663 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3664
                        ||
3665
                        // Code points outside the Unicode range are illegal.
3666 75
                        ($mUcs4 > 0x10FFFF)
3667
                    ) {
3668 8
                        return false;
3669
                    }
3670
                    // initialize UTF8 cache
3671 75
                    $mState = 0;
3672 75
                    $mUcs4 = 0;
3673 75
                    $mBytes = 1;
3674
                }
3675
            } else {
3676
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3677
                // Incomplete multi-octet sequence.
3678 35
                return false;
3679
            }
3680
        }
3681
3682 67
        return true;
3683
    }
3684
3685
    /**
3686
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3687
     * Decodes a JSON string
3688
     *
3689
     * @see http://php.net/manual/en/function.json-decode.php
3690
     *
3691
     * @param string $json    <p>
3692
     *                        The <i>json</i> string being decoded.
3693
     *                        </p>
3694
     *                        <p>
3695
     *                        This function only works with UTF-8 encoded strings.
3696
     *                        </p>
3697
     *                        <p>PHP implements a superset of
3698
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3699
     *                        only supports these values when they are nested inside an array or an object.
3700
     *                        </p>
3701
     * @param bool   $assoc   [optional] <p>
3702
     *                        When <b>TRUE</b>, returned objects will be converted into
3703
     *                        associative arrays.
3704
     *                        </p>
3705
     * @param int    $depth   [optional] <p>
3706
     *                        User specified recursion depth.
3707
     *                        </p>
3708
     * @param int    $options [optional] <p>
3709
     *                        Bitmask of JSON decode options. Currently only
3710
     *                        <b>JSON_BIGINT_AS_STRING</b>
3711
     *                        is supported (default is to cast large integers as floats)
3712
     *                        </p>
3713
     *
3714
     * @return mixed
3715
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3716
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3717
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3718
     *               is deeper than the recursion limit.
3719
     */
3720 43
    public static function json_decode(
3721
        string $json,
3722
        bool $assoc = false,
3723
        int $depth = 512,
3724
        int $options = 0
3725
    ) {
3726 43
        $json = self::filter($json);
3727
3728 43
        if (self::$SUPPORT['json'] === false) {
3729
            throw new \RuntimeException('ext-json: is not installed');
3730
        }
3731
3732
        /** @noinspection PhpComposerExtensionStubsInspection */
3733 43
        return \json_decode($json, $assoc, $depth, $options);
3734
    }
3735
3736
    /**
3737
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3738
     * Returns the JSON representation of a value.
3739
     *
3740
     * @see http://php.net/manual/en/function.json-encode.php
3741
     *
3742
     * @param mixed $value   <p>
3743
     *                       The <i>value</i> being encoded. Can be any type except
3744
     *                       a resource.
3745
     *                       </p>
3746
     *                       <p>
3747
     *                       All string data must be UTF-8 encoded.
3748
     *                       </p>
3749
     *                       <p>PHP implements a superset of
3750
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3751
     *                       only supports these values when they are nested inside an array or an object.
3752
     *                       </p>
3753
     * @param int   $options [optional] <p>
3754
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3755
     *                       <b>JSON_HEX_TAG</b>,
3756
     *                       <b>JSON_HEX_AMP</b>,
3757
     *                       <b>JSON_HEX_APOS</b>,
3758
     *                       <b>JSON_NUMERIC_CHECK</b>,
3759
     *                       <b>JSON_PRETTY_PRINT</b>,
3760
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3761
     *                       <b>JSON_FORCE_OBJECT</b>,
3762
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3763
     *                       constants is described on
3764
     *                       the JSON constants page.
3765
     *                       </p>
3766
     * @param int   $depth   [optional] <p>
3767
     *                       Set the maximum depth. Must be greater than zero.
3768
     *                       </p>
3769
     *
3770
     * @return false|string
3771
     *                      A JSON encoded <strong>string</strong> on success or<br>
3772
     *                      <strong>FALSE</strong> on failure
3773
     */
3774 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3775
    {
3776 5
        $value = self::filter($value);
3777
3778 5
        if (self::$SUPPORT['json'] === false) {
3779
            throw new \RuntimeException('ext-json: is not installed');
3780
        }
3781
3782
        /** @noinspection PhpComposerExtensionStubsInspection */
3783 5
        return \json_encode($value, $options, $depth);
3784
    }
3785
3786
    /**
3787
     * Checks whether JSON is available on the server.
3788
     *
3789
     * @return bool
3790
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3791
     */
3792
    public static function json_loaded(): bool
3793
    {
3794
        return \function_exists('json_decode');
3795
    }
3796
3797
    /**
3798
     * Makes string's first char lowercase.
3799
     *
3800
     * @param string      $str                   <p>The input string</p>
3801
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3802
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3803
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3804
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3805
     *
3806
     * @return string the resulting string
3807
     */
3808 46
    public static function lcfirst(
3809
        string $str,
3810
        string $encoding = 'UTF-8',
3811
        bool $cleanUtf8 = false,
3812
        string $lang = null,
3813
        bool $tryToKeepStringLength = false
3814
    ): string {
3815 46
        if ($cleanUtf8 === true) {
3816
            $str = self::clean($str);
3817
        }
3818
3819 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3820
3821 46
        if ($encoding === 'UTF-8') {
3822 43
            $strPartTwo = (string) \mb_substr($str, 1);
3823
3824 43
            if ($useMbFunction === true) {
3825 43
                $strPartOne = \mb_strtolower(
3826 43
                    (string) \mb_substr($str, 0, 1)
3827
                );
3828
            } else {
3829
                $strPartOne = self::strtolower(
3830
                    (string) \mb_substr($str, 0, 1),
3831
                    $encoding,
3832
                    false,
3833
                    $lang,
3834 43
                    $tryToKeepStringLength
3835
                );
3836
            }
3837
        } else {
3838 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3839
3840 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3841
3842 3
            $strPartOne = self::strtolower(
3843 3
                (string) self::substr($str, 0, 1, $encoding),
3844 3
                $encoding,
3845 3
                false,
3846 3
                $lang,
3847 3
                $tryToKeepStringLength
3848
            );
3849
        }
3850
3851 46
        return $strPartOne . $strPartTwo;
3852
    }
3853
3854
    /**
3855
     * alias for "UTF8::lcfirst()"
3856
     *
3857
     * @see UTF8::lcfirst()
3858
     *
3859
     * @param string      $str
3860
     * @param string      $encoding
3861
     * @param bool        $cleanUtf8
3862
     * @param string|null $lang
3863
     * @param bool        $tryToKeepStringLength
3864
     *
3865
     * @return string
3866
     */
3867 2
    public static function lcword(
3868
        string $str,
3869
        string $encoding = 'UTF-8',
3870
        bool $cleanUtf8 = false,
3871
        string $lang = null,
3872
        bool $tryToKeepStringLength = false
3873
    ): string {
3874 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3875
    }
3876
3877
    /**
3878
     * Lowercase for all words in the string.
3879
     *
3880
     * @param string      $str                   <p>The input string.</p>
3881
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3882
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3883
     *                                           a new word.</p>
3884
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3885
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3886
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3887
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3888
     *
3889
     * @return string
3890
     */
3891 2
    public static function lcwords(
3892
        string $str,
3893
        array $exceptions = [],
3894
        string $charlist = '',
3895
        string $encoding = 'UTF-8',
3896
        bool $cleanUtf8 = false,
3897
        string $lang = null,
3898
        bool $tryToKeepStringLength = false
3899
    ): string {
3900 2
        if (!$str) {
3901 2
            return '';
3902
        }
3903
3904 2
        $words = self::str_to_words($str, $charlist);
3905 2
        $useExceptions = \count($exceptions) > 0;
3906
3907 2
        foreach ($words as &$word) {
3908 2
            if (!$word) {
3909 2
                continue;
3910
            }
3911
3912
            if (
3913 2
                $useExceptions === false
3914
                ||
3915 2
                !\in_array($word, $exceptions, true)
3916
            ) {
3917 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3918
            }
3919
        }
3920
3921 2
        return \implode('', $words);
3922
    }
3923
3924
    /**
3925
     * alias for "UTF8::lcfirst()"
3926
     *
3927
     * @see UTF8::lcfirst()
3928
     *
3929
     * @param string      $str
3930
     * @param string      $encoding
3931
     * @param bool        $cleanUtf8
3932
     * @param string|null $lang
3933
     * @param bool        $tryToKeepStringLength
3934
     *
3935
     * @return string
3936
     */
3937 5
    public static function lowerCaseFirst(
3938
        string $str,
3939
        string $encoding = 'UTF-8',
3940
        bool $cleanUtf8 = false,
3941
        string $lang = null,
3942
        bool $tryToKeepStringLength = false
3943
    ): string {
3944 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3945
    }
3946
3947
    /**
3948
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3949
     *
3950
     * @param string      $str   <p>The string to be trimmed</p>
3951
     * @param string|null $chars <p>Optional characters to be stripped</p>
3952
     *
3953
     * @return string the string with unwanted characters stripped from the left
3954
     */
3955 22
    public static function ltrim(string $str = '', string $chars = null): string
3956
    {
3957 22
        if ($str === '') {
3958 3
            return '';
3959
        }
3960
3961 21
        if ($chars) {
3962 10
            $chars = \preg_quote($chars, '/');
3963 10
            $pattern = "^[${chars}]+";
3964
        } else {
3965 14
            $pattern = "^[\s]+";
3966
        }
3967
3968 21
        if (self::$SUPPORT['mbstring'] === true) {
3969
            /** @noinspection PhpComposerExtensionStubsInspection */
3970 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3971
        }
3972
3973
        return self::regex_replace($str, $pattern, '', '', '/');
3974
    }
3975
3976
    /**
3977
     * Returns the UTF-8 character with the maximum code point in the given data.
3978
     *
3979
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3980
     *
3981
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3982
     */
3983 2
    public static function max($arg)
3984
    {
3985 2
        if (\is_array($arg) === true) {
3986 2
            $arg = \implode('', $arg);
3987
        }
3988
3989 2
        $codepoints = self::codepoints($arg, false);
3990 2
        if (\count($codepoints) === 0) {
3991 2
            return null;
3992
        }
3993
3994 2
        $codepoint_max = \max($codepoints);
3995
3996 2
        return self::chr($codepoint_max);
3997
    }
3998
3999
    /**
4000
     * Calculates and returns the maximum number of bytes taken by any
4001
     * UTF-8 encoded character in the given string.
4002
     *
4003
     * @param string $str <p>The original Unicode string.</p>
4004
     *
4005
     * @return int max byte lengths of the given chars
4006
     */
4007 2
    public static function max_chr_width(string $str): int
4008
    {
4009 2
        $bytes = self::chr_size_list($str);
4010 2
        if (\count($bytes) > 0) {
4011 2
            return (int) \max($bytes);
4012
        }
4013
4014 2
        return 0;
4015
    }
4016
4017
    /**
4018
     * Checks whether mbstring is available on the server.
4019
     *
4020
     * @return bool
4021
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4022
     */
4023 27
    public static function mbstring_loaded(): bool
4024
    {
4025 27
        return \extension_loaded('mbstring');
4026
    }
4027
4028
    /**
4029
     * Returns the UTF-8 character with the minimum code point in the given data.
4030
     *
4031
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4032
     *
4033
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4034
     */
4035 2
    public static function min($arg)
4036
    {
4037 2
        if (\is_array($arg) === true) {
4038 2
            $arg = \implode('', $arg);
4039
        }
4040
4041 2
        $codepoints = self::codepoints($arg, false);
4042 2
        if (\count($codepoints) === 0) {
4043 2
            return null;
4044
        }
4045
4046 2
        $codepoint_min = \min($codepoints);
4047
4048 2
        return self::chr($codepoint_min);
4049
    }
4050
4051
    /**
4052
     * alias for "UTF8::normalize_encoding()"
4053
     *
4054
     * @see        UTF8::normalize_encoding()
4055
     *
4056
     * @param mixed $encoding
4057
     * @param mixed $fallback
4058
     *
4059
     * @return mixed
4060
     *
4061
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4062
     */
4063 2
    public static function normalizeEncoding($encoding, $fallback = '')
4064
    {
4065 2
        return self::normalize_encoding($encoding, $fallback);
4066
    }
4067
4068
    /**
4069
     * Normalize the encoding-"name" input.
4070
     *
4071
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4072
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4073
     *
4074
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4075
     */
4076 323
    public static function normalize_encoding($encoding, $fallback = '')
4077
    {
4078 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4079
4080
        // init
4081 323
        $encoding = (string) $encoding;
4082
4083 323
        if (!$encoding) {
4084 278
            return $fallback;
4085
        }
4086
4087
        if (
4088 50
            $encoding === 'UTF-8'
4089
            ||
4090 50
            $encoding === 'UTF8'
4091
        ) {
4092 24
            return 'UTF-8';
4093
        }
4094
4095
        if (
4096 43
            $encoding === '8BIT'
4097
            ||
4098 43
            $encoding === 'BINARY'
4099
        ) {
4100
            return 'CP850';
4101
        }
4102
4103
        if (
4104 43
            $encoding === 'HTML'
4105
            ||
4106 43
            $encoding === 'HTML-ENTITIES'
4107
        ) {
4108 2
            return 'HTML-ENTITIES';
4109
        }
4110
4111
        if (
4112 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4113
            ||
4114 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4115
        ) {
4116 1
            return $fallback;
4117
        }
4118
4119 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4120 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4121
        }
4122
4123 6
        if (self::$ENCODINGS === null) {
4124 1
            self::$ENCODINGS = self::getData('encodings');
4125
        }
4126
4127 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4128 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4129
4130 4
            return $encoding;
4131
        }
4132
4133 5
        $encodingOrig = $encoding;
4134 5
        $encoding = \strtoupper($encoding);
4135 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4136
4137
        $equivalences = [
4138 5
            'ISO8859'     => 'ISO-8859-1',
4139
            'ISO88591'    => 'ISO-8859-1',
4140
            'ISO'         => 'ISO-8859-1',
4141
            'LATIN'       => 'ISO-8859-1',
4142
            'LATIN1'      => 'ISO-8859-1', // Western European
4143
            'ISO88592'    => 'ISO-8859-2',
4144
            'LATIN2'      => 'ISO-8859-2', // Central European
4145
            'ISO88593'    => 'ISO-8859-3',
4146
            'LATIN3'      => 'ISO-8859-3', // Southern European
4147
            'ISO88594'    => 'ISO-8859-4',
4148
            'LATIN4'      => 'ISO-8859-4', // Northern European
4149
            'ISO88595'    => 'ISO-8859-5',
4150
            'ISO88596'    => 'ISO-8859-6', // Greek
4151
            'ISO88597'    => 'ISO-8859-7',
4152
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4153
            'ISO88599'    => 'ISO-8859-9',
4154
            'LATIN5'      => 'ISO-8859-9', // Turkish
4155
            'ISO885911'   => 'ISO-8859-11',
4156
            'TIS620'      => 'ISO-8859-11', // Thai
4157
            'ISO885910'   => 'ISO-8859-10',
4158
            'LATIN6'      => 'ISO-8859-10', // Nordic
4159
            'ISO885913'   => 'ISO-8859-13',
4160
            'LATIN7'      => 'ISO-8859-13', // Baltic
4161
            'ISO885914'   => 'ISO-8859-14',
4162
            'LATIN8'      => 'ISO-8859-14', // Celtic
4163
            'ISO885915'   => 'ISO-8859-15',
4164
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4165
            'ISO885916'   => 'ISO-8859-16',
4166
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4167
            'CP1250'      => 'WINDOWS-1250',
4168
            'WIN1250'     => 'WINDOWS-1250',
4169
            'WINDOWS1250' => 'WINDOWS-1250',
4170
            'CP1251'      => 'WINDOWS-1251',
4171
            'WIN1251'     => 'WINDOWS-1251',
4172
            'WINDOWS1251' => 'WINDOWS-1251',
4173
            'CP1252'      => 'WINDOWS-1252',
4174
            'WIN1252'     => 'WINDOWS-1252',
4175
            'WINDOWS1252' => 'WINDOWS-1252',
4176
            'CP1253'      => 'WINDOWS-1253',
4177
            'WIN1253'     => 'WINDOWS-1253',
4178
            'WINDOWS1253' => 'WINDOWS-1253',
4179
            'CP1254'      => 'WINDOWS-1254',
4180
            'WIN1254'     => 'WINDOWS-1254',
4181
            'WINDOWS1254' => 'WINDOWS-1254',
4182
            'CP1255'      => 'WINDOWS-1255',
4183
            'WIN1255'     => 'WINDOWS-1255',
4184
            'WINDOWS1255' => 'WINDOWS-1255',
4185
            'CP1256'      => 'WINDOWS-1256',
4186
            'WIN1256'     => 'WINDOWS-1256',
4187
            'WINDOWS1256' => 'WINDOWS-1256',
4188
            'CP1257'      => 'WINDOWS-1257',
4189
            'WIN1257'     => 'WINDOWS-1257',
4190
            'WINDOWS1257' => 'WINDOWS-1257',
4191
            'CP1258'      => 'WINDOWS-1258',
4192
            'WIN1258'     => 'WINDOWS-1258',
4193
            'WINDOWS1258' => 'WINDOWS-1258',
4194
            'UTF16'       => 'UTF-16',
4195
            'UTF32'       => 'UTF-32',
4196
            'UTF8'        => 'UTF-8',
4197
            'UTF'         => 'UTF-8',
4198
            'UTF7'        => 'UTF-7',
4199
            '8BIT'        => 'CP850',
4200
            'BINARY'      => 'CP850',
4201
        ];
4202
4203 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4204 4
            $encoding = $equivalences[$encodingUpperHelper];
4205
        }
4206
4207 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4208
4209 5
        return $encoding;
4210
    }
4211
4212
    /**
4213
     * Standardize line ending to unix-like.
4214
     *
4215
     * @param string $str
4216
     *
4217
     * @return string
4218
     */
4219 5
    public static function normalize_line_ending(string $str): string
4220
    {
4221 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4222
    }
4223
4224
    /**
4225
     * Normalize some MS Word special characters.
4226
     *
4227
     * @param string $str <p>The string to be normalized.</p>
4228
     *
4229
     * @return string
4230
     */
4231 38
    public static function normalize_msword(string $str): string
4232
    {
4233 38
        if ($str === '') {
4234 2
            return '';
4235
        }
4236
4237
        $keys = [
4238 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4239
            "\xc2\xbb", // » (U+00BB) in UTF-8
4240
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4241
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4242
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4243
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4244
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4245
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4246
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4247
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4248
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4249
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4250
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4251
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4252
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4253
        ];
4254
4255
        $values = [
4256 38
            '"', // « (U+00AB) in UTF-8
4257
            '"', // » (U+00BB) in UTF-8
4258
            "'", // ‘ (U+2018) in UTF-8
4259
            "'", // ’ (U+2019) in UTF-8
4260
            "'", // ‚ (U+201A) in UTF-8
4261
            "'", // ‛ (U+201B) in UTF-8
4262
            '"', // “ (U+201C) in UTF-8
4263
            '"', // ” (U+201D) in UTF-8
4264
            '"', // „ (U+201E) in UTF-8
4265
            '"', // ‟ (U+201F) in UTF-8
4266
            "'", // ‹ (U+2039) in UTF-8
4267
            "'", // › (U+203A) in UTF-8
4268
            '-', // – (U+2013) in UTF-8
4269
            '-', // — (U+2014) in UTF-8
4270
            '...', // … (U+2026) in UTF-8
4271
        ];
4272
4273 38
        return \str_replace($keys, $values, $str);
4274
    }
4275
4276
    /**
4277
     * Normalize the whitespace.
4278
     *
4279
     * @param string $str                     <p>The string to be normalized.</p>
4280
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4281
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4282
     *                                        bidirectional text chars.</p>
4283
     *
4284
     * @return string
4285
     */
4286 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4287
    {
4288 86
        if ($str === '') {
4289 9
            return '';
4290
        }
4291
4292 86
        static $WHITESPACE_CACHE = [];
4293 86
        $cacheKey = (int) $keepNonBreakingSpace;
4294
4295 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4296 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4297
4298 2
            if ($keepNonBreakingSpace === true) {
4299 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4300
            }
4301
4302 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4303
        }
4304
4305 86
        if ($keepBidiUnicodeControls === false) {
4306 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4307
4308 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4309 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4310
            }
4311
4312 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4313
        }
4314
4315 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4316
    }
4317
4318
    /**
4319
     * Calculates Unicode code point of the given UTF-8 encoded character.
4320
     *
4321
     * INFO: opposite to UTF8::chr()
4322
     *
4323
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4324
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4325
     *
4326
     * @return int
4327
     *             Unicode code point of the given character,<br>
4328
     *             0 on invalid UTF-8 byte sequence
4329
     */
4330 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4331
    {
4332 30
        static $CHAR_CACHE = [];
4333
4334
        // init
4335 30
        $chr = (string) $chr;
4336
4337 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4338 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4339
        }
4340
4341 30
        $cacheKey = $chr . $encoding;
4342 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4343 30
            return $CHAR_CACHE[$cacheKey];
4344
        }
4345
4346
        // check again, if it's still not UTF-8
4347 12
        if ($encoding !== 'UTF-8') {
4348 3
            $chr = self::encode($encoding, $chr);
4349
        }
4350
4351 12
        if (self::$ORD === null) {
4352
            self::$ORD = self::getData('ord');
4353
        }
4354
4355 12
        if (isset(self::$ORD[$chr])) {
4356 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4357
        }
4358
4359
        //
4360
        // fallback via "IntlChar"
4361
        //
4362
4363 6
        if (self::$SUPPORT['intlChar'] === true) {
4364
            /** @noinspection PhpComposerExtensionStubsInspection */
4365 5
            $code = \IntlChar::ord($chr);
4366 5
            if ($code) {
4367 5
                return $CHAR_CACHE[$cacheKey] = $code;
4368
            }
4369
        }
4370
4371
        //
4372
        // fallback via vanilla php
4373
        //
4374
4375
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4376 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4377 1
        $code = $chr ? $chr[1] : 0;
4378
4379 1
        if ($code >= 0xF0 && isset($chr[4])) {
4380
            /** @noinspection UnnecessaryCastingInspection */
4381
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4382
        }
4383
4384 1
        if ($code >= 0xE0 && isset($chr[3])) {
4385
            /** @noinspection UnnecessaryCastingInspection */
4386 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4387
        }
4388
4389 1
        if ($code >= 0xC0 && isset($chr[2])) {
4390
            /** @noinspection UnnecessaryCastingInspection */
4391 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4392
        }
4393
4394
        return $CHAR_CACHE[$cacheKey] = $code;
4395
    }
4396
4397
    /**
4398
     * Parses the string into an array (into the the second parameter).
4399
     *
4400
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4401
     *          if the second parameter is not set!
4402
     *
4403
     * @see http://php.net/manual/en/function.parse-str.php
4404
     *
4405
     * @param string $str       <p>The input string.</p>
4406
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4407
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4408
     *
4409
     * @return bool
4410
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4411
     */
4412 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4413
    {
4414 2
        if ($cleanUtf8 === true) {
4415 2
            $str = self::clean($str);
4416
        }
4417
4418 2
        if (self::$SUPPORT['mbstring'] === true) {
4419 2
            $return = \mb_parse_str($str, $result);
4420
4421 2
            return $return !== false && $result !== [];
4422
        }
4423
4424
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4425
        \parse_str($str, $result);
4426
4427
        return $result !== [];
4428
    }
4429
4430
    /**
4431
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4432
     *
4433
     * @return bool
4434
     *              <strong>true</strong> if support is available,<br>
4435
     *              <strong>false</strong> otherwise
4436
     */
4437 102
    public static function pcre_utf8_support(): bool
4438
    {
4439
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4440 102
        return (bool) @\preg_match('//u', '');
4441
    }
4442
4443
    /**
4444
     * Create an array containing a range of UTF-8 characters.
4445
     *
4446
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4447
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4448
     *
4449
     * @return string[]
4450
     */
4451 2
    public static function range($var1, $var2): array
4452
    {
4453 2
        if (!$var1 || !$var2) {
4454 2
            return [];
4455
        }
4456
4457 2
        if (self::$SUPPORT['ctype'] === false) {
4458
            throw new \RuntimeException('ext-ctype: is not installed');
4459
        }
4460
4461
        /** @noinspection PhpComposerExtensionStubsInspection */
4462 2
        if (\ctype_digit((string) $var1)) {
4463 2
            $start = (int) $var1;
4464 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4465
            $start = (int) self::hex_to_int($var1);
4466
        } else {
4467 2
            $start = self::ord($var1);
4468
        }
4469
4470 2
        if (!$start) {
4471
            return [];
4472
        }
4473
4474
        /** @noinspection PhpComposerExtensionStubsInspection */
4475 2
        if (\ctype_digit((string) $var2)) {
4476 2
            $end = (int) $var2;
4477 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4478
            $end = (int) self::hex_to_int($var2);
4479
        } else {
4480 2
            $end = self::ord($var2);
4481
        }
4482
4483 2
        if (!$end) {
4484
            return [];
4485
        }
4486
4487 2
        return \array_map(
4488
            static function (int $i): string {
4489 2
                return (string) self::chr($i);
4490 2
            },
4491 2
            \range($start, $end)
4492
        );
4493
    }
4494
4495
    /**
4496
     * Multi decode html entity & fix urlencoded-win1252-chars.
4497
     *
4498
     * e.g:
4499
     * 'test+test'                     => 'test+test'
4500
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4501
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4502
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4503
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4504
     * 'Düsseldorf'                   => 'Düsseldorf'
4505
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4506
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4507
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4508
     *
4509
     * @param string $str          <p>The input string.</p>
4510
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4511
     *
4512
     * @return string
4513
     */
4514 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4515
    {
4516 3
        if ($str === '') {
4517 2
            return '';
4518
        }
4519
4520 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4521 3
        if (\preg_match($pattern, $str)) {
4522 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4523
        }
4524
4525 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4526
4527 3
        if ($multi_decode === true) {
4528
            do {
4529 3
                $str_compare = $str;
4530
4531
                /**
4532
                 * @psalm-suppress PossiblyInvalidArgument
4533
                 */
4534 3
                $str = self::fix_simple_utf8(
4535 3
                    \rawurldecode(
4536 3
                        self::html_entity_decode(
4537 3
                            self::to_utf8($str),
4538 3
                            $flags
4539
                        )
4540
                    )
4541
                );
4542 3
            } while ($str_compare !== $str);
4543
        }
4544
4545 3
        return $str;
4546
    }
4547
4548
    /**
4549
     * Replaces all occurrences of $pattern in $str by $replacement.
4550
     *
4551
     * @param string $str         <p>The input string.</p>
4552
     * @param string $pattern     <p>The regular expression pattern.</p>
4553
     * @param string $replacement <p>The string to replace with.</p>
4554
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4555
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4556
     *
4557
     * @return string
4558
     */
4559 18
    public static function regex_replace(
4560
        string $str,
4561
        string $pattern,
4562
        string $replacement,
4563
        string $options = '',
4564
        string $delimiter = '/'
4565
    ): string {
4566 18
        if ($options === 'msr') {
4567 9
            $options = 'ms';
4568
        }
4569
4570
        // fallback
4571 18
        if (!$delimiter) {
4572
            $delimiter = '/';
4573
        }
4574
4575 18
        return (string) \preg_replace(
4576 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4577 18
            $replacement,
4578 18
            $str
4579
        );
4580
    }
4581
4582
    /**
4583
     * alias for "UTF8::remove_bom()"
4584
     *
4585
     * @see        UTF8::remove_bom()
4586
     *
4587
     * @param string $str
4588
     *
4589
     * @return string
4590
     *
4591
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4592
     */
4593
    public static function removeBOM(string $str): string
4594
    {
4595
        return self::remove_bom($str);
4596
    }
4597
4598
    /**
4599
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4600
     *
4601
     * @param string $str <p>The input string.</p>
4602
     *
4603
     * @return string string without UTF-BOM
4604
     */
4605 80
    public static function remove_bom(string $str): string
4606
    {
4607 80
        if ($str === '') {
4608 7
            return '';
4609
        }
4610
4611 80
        $strLength = \strlen($str);
4612 80
        foreach (self::$BOM as $bomString => $bomByteLength) {
4613 80
            if (\strpos($str, $bomString, 0) === 0) {
4614 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4615 10
                if ($strTmp === false) {
4616
                    return '';
4617
                }
4618
4619 10
                $strLength -= (int) $bomByteLength;
4620
4621 80
                $str = (string) $strTmp;
4622
            }
4623
        }
4624
4625 80
        return $str;
4626
    }
4627
4628
    /**
4629
     * Removes duplicate occurrences of a string in another string.
4630
     *
4631
     * @param string          $str  <p>The base string.</p>
4632
     * @param string|string[] $what <p>String to search for in the base string.</p>
4633
     *
4634
     * @return string the result string with removed duplicates
4635
     */
4636 2
    public static function remove_duplicates(string $str, $what = ' '): string
4637
    {
4638 2
        if (\is_string($what) === true) {
4639 2
            $what = [$what];
4640
        }
4641
4642 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4643
            /** @noinspection ForeachSourceInspection */
4644 2
            foreach ($what as $item) {
4645 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4646
            }
4647
        }
4648
4649 2
        return $str;
4650
    }
4651
4652
    /**
4653
     * Remove html via "strip_tags()" from the string.
4654
     *
4655
     * @param string $str
4656
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4657
     *                              not be stripped. Default: null
4658
     *                              </p>
4659
     *
4660
     * @return string
4661
     */
4662 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4663
    {
4664 6
        return \strip_tags($str, $allowableTags);
4665
    }
4666
4667
    /**
4668
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4669
     *
4670
     * @param string $str
4671
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4672
     *
4673
     * @return string
4674
     */
4675 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4676
    {
4677 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4678
    }
4679
4680
    /**
4681
     * Remove invisible characters from a string.
4682
     *
4683
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4684
     *
4685
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4686
     *
4687
     * @param string $str
4688
     * @param bool   $url_encoded
4689
     * @param string $replacement
4690
     *
4691
     * @return string
4692
     */
4693 113
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4694
    {
4695
        // init
4696 113
        $non_displayables = [];
4697
4698
        // every control character except newline (dec 10),
4699
        // carriage return (dec 13) and horizontal tab (dec 09)
4700 113
        if ($url_encoded) {
4701 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4702 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4703
        }
4704
4705 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4706
4707
        do {
4708 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4709 113
        } while ($count !== 0);
4710
4711 113
        return $str;
4712
    }
4713
4714
    /**
4715
     * Returns a new string with the prefix $substring removed, if present.
4716
     *
4717
     * @param string $str
4718
     * @param string $substring <p>The prefix to remove.</p>
4719
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4720
     *
4721
     * @return string string without the prefix $substring
4722
     */
4723 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4724
    {
4725 12
        if ($substring && \strpos($str, $substring) === 0) {
4726 6
            if ($encoding === 'UTF-8') {
4727 4
                return (string) \mb_substr(
4728 4
                    $str,
4729 4
                    (int) \mb_strlen($substring)
4730
                );
4731
            }
4732
4733 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4734
4735 2
            return (string) self::substr(
4736 2
                $str,
4737 2
                (int) self::strlen($substring, $encoding),
4738 2
                null,
4739 2
                $encoding
4740
            );
4741
        }
4742
4743 6
        return $str;
4744
    }
4745
4746
    /**
4747
     * Returns a new string with the suffix $substring removed, if present.
4748
     *
4749
     * @param string $str
4750
     * @param string $substring <p>The suffix to remove.</p>
4751
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4752
     *
4753
     * @return string string having a $str without the suffix $substring
4754
     */
4755 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4756
    {
4757 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4758 6
            if ($encoding === 'UTF-8') {
4759 4
                return (string) \mb_substr(
4760 4
                    $str,
4761 4
                    0,
4762 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4763
                );
4764
            }
4765
4766 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4767
4768 2
            return (string) self::substr(
4769 2
                $str,
4770 2
                0,
4771 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4772 2
                $encoding
4773
            );
4774
        }
4775
4776 6
        return $str;
4777
    }
4778
4779
    /**
4780
     * Replaces all occurrences of $search in $str by $replacement.
4781
     *
4782
     * @param string $str           <p>The input string.</p>
4783
     * @param string $search        <p>The needle to search for.</p>
4784
     * @param string $replacement   <p>The string to replace with.</p>
4785
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4786
     *
4787
     * @return string string after the replacements
4788
     */
4789 29
    public static function replace(
4790
        string $str,
4791
        string $search,
4792
        string $replacement,
4793
        bool $caseSensitive = true
4794
    ): string {
4795 29
        if ($caseSensitive) {
4796 22
            return \str_replace($search, $replacement, $str);
4797
        }
4798
4799 7
        return self::str_ireplace($search, $replacement, $str);
4800
    }
4801
4802
    /**
4803
     * Replaces all occurrences of $search in $str by $replacement.
4804
     *
4805
     * @param string       $str           <p>The input string.</p>
4806
     * @param array        $search        <p>The elements to search for.</p>
4807
     * @param array|string $replacement   <p>The string to replace with.</p>
4808
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4809
     *
4810
     * @return string string after the replacements
4811
     */
4812 30
    public static function replace_all(
4813
        string $str,
4814
        array $search,
4815
        $replacement,
4816
        bool $caseSensitive = true
4817
    ): string {
4818 30
        if ($caseSensitive) {
4819 23
            return \str_replace($search, $replacement, $str);
4820
        }
4821
4822 7
        return self::str_ireplace($search, $replacement, $str);
4823
    }
4824
4825
    /**
4826
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4827
     *
4828
     * @param string $str                <p>The input string</p>
4829
     * @param string $replacementChar    <p>The replacement character.</p>
4830
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4831
     *
4832
     * @return string
4833
     */
4834 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4835
    {
4836 62
        if ($str === '') {
4837 9
            return '';
4838
        }
4839
4840 62
        if ($processInvalidUtf8 === true) {
4841 62
            $replacementCharHelper = $replacementChar;
4842 62
            if ($replacementChar === '') {
4843 62
                $replacementCharHelper = 'none';
4844
            }
4845
4846 62
            if (self::$SUPPORT['mbstring'] === false) {
4847
                // if there is no native support for "mbstring",
4848
                // then we need to clean the string before ...
4849
                $str = self::clean($str);
4850
            }
4851
4852 62
            $save = \mb_substitute_character();
4853 62
            \mb_substitute_character($replacementCharHelper);
4854
            // the polyfill maybe return false, so cast to string
4855 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4856 62
            \mb_substitute_character($save);
4857
        }
4858
4859 62
        return \str_replace(
4860
            [
4861 62
                "\xEF\xBF\xBD",
4862
                '�',
4863
            ],
4864
            [
4865 62
                $replacementChar,
4866 62
                $replacementChar,
4867
            ],
4868 62
            $str
4869
        );
4870
    }
4871
4872
    /**
4873
     * Strip whitespace or other characters from end of a UTF-8 string.
4874
     *
4875
     * @param string      $str   <p>The string to be trimmed.</p>
4876
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4877
     *
4878
     * @return string the string with unwanted characters stripped from the right
4879
     */
4880 20
    public static function rtrim(string $str = '', string $chars = null): string
4881
    {
4882 20
        if ($str === '') {
4883 3
            return '';
4884
        }
4885
4886 19
        if ($chars) {
4887 8
            $chars = \preg_quote($chars, '/');
4888 8
            $pattern = "[${chars}]+\$";
4889
        } else {
4890 14
            $pattern = "[\s]+\$";
4891
        }
4892
4893 19
        if (self::$SUPPORT['mbstring'] === true) {
4894
            /** @noinspection PhpComposerExtensionStubsInspection */
4895 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4896
        }
4897
4898
        return self::regex_replace($str, $pattern, '', '', '/');
4899
    }
4900
4901
    /**
4902
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4903
     */
4904 2
    public static function showSupport()
4905
    {
4906 2
        echo '<pre>';
4907 2
        foreach (self::$SUPPORT as $key => &$value) {
4908 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4909
        }
4910 2
        unset($value);
4911 2
        echo '</pre>';
4912 2
    }
4913
4914
    /**
4915
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4916
     *
4917
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4918
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4919
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4920
     *
4921
     * @return string the HTML numbered entity
4922
     */
4923 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4924
    {
4925 2
        if ($char === '') {
4926 2
            return '';
4927
        }
4928
4929
        if (
4930 2
            $keepAsciiChars === true
4931
            &&
4932 2
            self::is_ascii($char) === true
4933
        ) {
4934 2
            return $char;
4935
        }
4936
4937 2
        return '&#' . self::ord($char, $encoding) . ';';
4938
    }
4939
4940
    /**
4941
     * @param string $str
4942
     * @param int    $tabLength
4943
     *
4944
     * @return string
4945
     */
4946 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4947
    {
4948 5
        if ($tabLength === 4) {
4949 3
            $tab = '    ';
4950 2
        } elseif ($tabLength === 2) {
4951 1
            $tab = '  ';
4952
        } else {
4953 1
            $tab = \str_repeat(' ', $tabLength);
4954
        }
4955
4956 5
        return \str_replace($tab, "\t", $str);
4957
    }
4958
4959
    /**
4960
     * Convert a string to an array of Unicode characters.
4961
     *
4962
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
4963
     * @param int                       $length             [optional] <p>Max character length of each array
4964
     *                                                      element.</p>
4965
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
4966
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
4967
     *                                                      "mb_substr"</p>
4968
     *
4969
     * @return array
4970
     *               <p>An array containing chunks of the input.</p>
4971
     */
4972 87
    public static function str_split(
4973
        $str,
4974
        int $length = 1,
4975
        bool $cleanUtf8 = false,
4976
        bool $tryToUseMbFunction = true
4977
    ): array {
4978 87
        if ($length <= 0) {
4979 3
            return [];
4980
        }
4981
4982 86
        if (\is_array($str) === true) {
4983 2
            foreach ($str as $k => &$v) {
4984 2
                $v = self::str_split(
4985 2
                    $v,
4986 2
                    $length,
4987 2
                    $cleanUtf8,
4988 2
                    $tryToUseMbFunction
4989
                );
4990
            }
4991
4992 2
            return $str;
4993
        }
4994
4995
        // init
4996 86
        $str = (string) $str;
4997
4998 86
        if ($str === '') {
4999 13
            return [];
5000
        }
5001
5002 83
        if ($cleanUtf8 === true) {
5003 19
            $str = self::clean($str);
5004
        }
5005
5006
        if (
5007 83
            $tryToUseMbFunction === true
5008
            &&
5009 83
            self::$SUPPORT['mbstring'] === true
5010
        ) {
5011 79
            $iMax = \mb_strlen($str);
5012 79
            if ($iMax <= 127) {
5013 73
                $ret = [];
5014 73
                for ($i = 0; $i < $iMax; ++$i) {
5015 73
                    $ret[] = \mb_substr($str, $i, 1);
5016
                }
5017
            } else {
5018 15
                $retArray = [];
5019 15
                \preg_match_all('/./us', $str, $retArray);
5020 79
                $ret = $retArray[0] ?? [];
5021
            }
5022 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5023 17
            $retArray = [];
5024 17
            \preg_match_all('/./us', $str, $retArray);
5025 17
            $ret = $retArray[0] ?? [];
5026
        } else {
5027
5028
            // fallback
5029
5030 8
            $ret = [];
5031 8
            $len = \strlen($str);
5032
5033
            /** @noinspection ForeachInvariantsInspection */
5034 8
            for ($i = 0; $i < $len; ++$i) {
5035 8
                if (($str[$i] & "\x80") === "\x00") {
5036 8
                    $ret[] = $str[$i];
5037
                } elseif (
5038 8
                    isset($str[$i + 1])
5039
                    &&
5040 8
                    ($str[$i] & "\xE0") === "\xC0"
5041
                ) {
5042 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5043 4
                        $ret[] = $str[$i] . $str[$i + 1];
5044
5045 4
                        ++$i;
5046
                    }
5047
                } elseif (
5048 6
                    isset($str[$i + 2])
5049
                    &&
5050 6
                    ($str[$i] & "\xF0") === "\xE0"
5051
                ) {
5052
                    if (
5053 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5054
                        &&
5055 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5056
                    ) {
5057 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5058
5059 6
                        $i += 2;
5060
                    }
5061
                } elseif (
5062
                    isset($str[$i + 3])
5063
                    &&
5064
                    ($str[$i] & "\xF8") === "\xF0"
5065
                ) {
5066
                    if (
5067
                        ($str[$i + 1] & "\xC0") === "\x80"
5068
                        &&
5069
                        ($str[$i + 2] & "\xC0") === "\x80"
5070
                        &&
5071
                        ($str[$i + 3] & "\xC0") === "\x80"
5072
                    ) {
5073
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5074
5075
                        $i += 3;
5076
                    }
5077
                }
5078
            }
5079
        }
5080
5081 83
        if ($length > 1) {
5082 11
            $ret = \array_chunk($ret, $length);
5083
5084 11
            return \array_map(
5085
                static function (array &$item): string {
5086 11
                    return \implode('', $item);
5087 11
                },
5088 11
                $ret
5089
            );
5090
        }
5091
5092 76
        if (isset($ret[0]) && $ret[0] === '') {
5093
            return [];
5094
        }
5095
5096 76
        return $ret;
5097
    }
5098
5099
    /**
5100
     * Returns a camelCase version of the string. Trims surrounding spaces,
5101
     * capitalizes letters following digits, spaces, dashes and underscores,
5102
     * and removes spaces, dashes, as well as underscores.
5103
     *
5104
     * @param string      $str                   <p>The input string.</p>
5105
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5106
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5107
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5108
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5109
     *
5110
     * @return string
5111
     */
5112 32
    public static function str_camelize(
5113
        string $str,
5114
        string $encoding = 'UTF-8',
5115
        bool $cleanUtf8 = false,
5116
        string $lang = null,
5117
        bool $tryToKeepStringLength = false
5118
    ): string {
5119 32
        if ($cleanUtf8 === true) {
5120
            $str = self::clean($str);
5121
        }
5122
5123 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5124 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5125
        }
5126
5127 32
        $str = self::lcfirst(
5128 32
            \trim($str),
5129 32
            $encoding,
5130 32
            false,
5131 32
            $lang,
5132 32
            $tryToKeepStringLength
5133
        );
5134 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5135
5136 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5137
5138 32
        $str = (string) \preg_replace_callback(
5139 32
            '/[-_\s]+(.)?/u',
5140
            /**
5141
             * @param array $match
5142
             *
5143
             * @return string
5144
             */
5145
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5146 27
                if (isset($match[1])) {
5147 27
                    if ($useMbFunction === true) {
5148 27
                        if ($encoding === 'UTF-8') {
5149 27
                            return \mb_strtoupper($match[1]);
5150
                        }
5151
5152
                        return \mb_strtoupper($match[1], $encoding);
5153
                    }
5154
5155
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5156
                }
5157
5158 1
                return '';
5159 32
            },
5160 32
            $str
5161
        );
5162
5163 32
        return (string) \preg_replace_callback(
5164 32
            '/[\d]+(.)?/u',
5165
            /**
5166
             * @param array $match
5167
             *
5168
             * @return string
5169
             */
5170
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5171 6
                if ($useMbFunction === true) {
5172 6
                    if ($encoding === 'UTF-8') {
5173 6
                        return \mb_strtoupper($match[0]);
5174
                    }
5175
5176
                    return \mb_strtoupper($match[0], $encoding);
5177
                }
5178
5179
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5180 32
            },
5181 32
            $str
5182
        );
5183
    }
5184
5185
    /**
5186
     * Returns the string with the first letter of each word capitalized,
5187
     * except for when the word is a name which shouldn't be capitalized.
5188
     *
5189
     * @param string $str
5190
     *
5191
     * @return string string with $str capitalized
5192
     */
5193 1
    public static function str_capitalize_name(string $str): string
5194
    {
5195 1
        return self::str_capitalize_name_helper(
5196 1
            self::str_capitalize_name_helper(
5197 1
                self::collapse_whitespace($str),
5198 1
                ' '
5199
            ),
5200 1
            '-'
5201
        );
5202
    }
5203
5204
    /**
5205
     * Returns true if the string contains $needle, false otherwise. By default
5206
     * the comparison is case-sensitive, but can be made insensitive by setting
5207
     * $caseSensitive to false.
5208
     *
5209
     * @param string $haystack      <p>The input string.</p>
5210
     * @param string $needle        <p>Substring to look for.</p>
5211
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5212
     *
5213
     * @return bool whether or not $haystack contains $needle
5214
     */
5215 21
    public static function str_contains(
5216
        string $haystack,
5217
        string $needle,
5218
        bool $caseSensitive = true
5219
    ): bool {
5220 21
        if ($caseSensitive) {
5221 11
            return \strpos($haystack, $needle) !== false;
5222
        }
5223
5224 10
        return \mb_stripos($haystack, $needle) !== false;
5225
    }
5226
5227
    /**
5228
     * Returns true if the string contains all $needles, false otherwise. By
5229
     * default the comparison is case-sensitive, but can be made insensitive by
5230
     * setting $caseSensitive to false.
5231
     *
5232
     * @param string $haystack      <p>The input string.</p>
5233
     * @param array  $needles       <p>SubStrings to look for.</p>
5234
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5235
     *
5236
     * @return bool whether or not $haystack contains $needle
5237
     */
5238 44
    public static function str_contains_all(
5239
        string $haystack,
5240
        array $needles,
5241
        bool $caseSensitive = true
5242
    ): bool {
5243 44
        if ($haystack === '' || $needles === []) {
5244 1
            return false;
5245
        }
5246
5247
        /** @noinspection LoopWhichDoesNotLoopInspection */
5248 43
        foreach ($needles as &$needle) {
5249 43
            if (!$needle) {
5250 1
                return false;
5251
            }
5252
5253 42
            if ($caseSensitive) {
5254 22
                return \strpos($haystack, $needle) !== false;
5255
            }
5256
5257 20
            return \mb_stripos($haystack, $needle) !== false;
5258
        }
5259
5260
        return true;
5261
    }
5262
5263
    /**
5264
     * Returns true if the string contains any $needles, false otherwise. By
5265
     * default the comparison is case-sensitive, but can be made insensitive by
5266
     * setting $caseSensitive to false.
5267
     *
5268
     * @param string $haystack      <p>The input string.</p>
5269
     * @param array  $needles       <p>SubStrings to look for.</p>
5270
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5271
     *
5272
     * @return bool
5273
     *              Whether or not $str contains $needle
5274
     */
5275 43
    public static function str_contains_any(
5276
        string $haystack,
5277
        array $needles,
5278
        bool $caseSensitive = true
5279
    ): bool {
5280 43
        if ($haystack === '' || $needles === []) {
5281 1
            return false;
5282
        }
5283
5284
        /** @noinspection LoopWhichDoesNotLoopInspection */
5285 42
        foreach ($needles as &$needle) {
5286 42
            if (!$needle) {
5287
                return false;
5288
            }
5289
5290 42
            if ($caseSensitive) {
5291 22
                return \strpos($haystack, $needle) !== false;
5292
            }
5293
5294 20
            return \mb_stripos($haystack, $needle) !== false;
5295
        }
5296
5297
        return false;
5298
    }
5299
5300
    /**
5301
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5302
     * inserted before uppercase characters (with the exception of the first
5303
     * character of the string), and in place of spaces as well as underscores.
5304
     *
5305
     * @param string $str      <p>The input string.</p>
5306
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5307
     *
5308
     * @return string
5309
     */
5310 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5311
    {
5312 19
        return self::str_delimit($str, '-', $encoding);
5313
    }
5314
5315
    /**
5316
     * Returns a lowercase and trimmed string separated by the given delimiter.
5317
     * Delimiters are inserted before uppercase characters (with the exception
5318
     * of the first character of the string), and in place of spaces, dashes,
5319
     * and underscores. Alpha delimiters are not converted to lowercase.
5320
     *
5321
     * @param string      $str                   <p>The input string.</p>
5322
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5323
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5324
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5325
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5326
     *                                           tr</p>
5327
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5328
     *                                           ß</p>
5329
     *
5330
     * @return string
5331
     */
5332 49
    public static function str_delimit(
5333
        string $str,
5334
        string $delimiter,
5335
        string $encoding = 'UTF-8',
5336
        bool $cleanUtf8 = false,
5337
        string $lang = null,
5338
        bool $tryToKeepStringLength = false
5339
    ): string {
5340 49
        if (self::$SUPPORT['mbstring'] === true) {
5341
            /** @noinspection PhpComposerExtensionStubsInspection */
5342 49
            $str = (string) \mb_ereg_replace('\B([A-Z])', '-\1', \trim($str));
5343
5344 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5345 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5346 22
                $str = \mb_strtolower($str);
5347
            } else {
5348 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5349
            }
5350
5351
            /** @noinspection PhpComposerExtensionStubsInspection */
5352 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5353
        }
5354
5355
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', \trim($str));
5356
5357
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5358
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5359
            $str = \mb_strtolower($str);
5360
        } else {
5361
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5362
        }
5363
5364
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5365
    }
5366
5367
    /**
5368
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5369
     *
5370
     * @param string $str <p>The input string.</p>
5371
     *
5372
     * @return false|string
5373
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5374
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5375
     */
5376 30
    public static function str_detect_encoding($str)
5377
    {
5378
        // init
5379 30
        $str = (string) $str;
5380
5381
        //
5382
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5383
        //
5384
5385 30
        if (self::is_binary($str, true) === true) {
5386 11
            $isUtf16 = self::is_utf16($str, false);
5387 11
            if ($isUtf16 === 1) {
5388 2
                return 'UTF-16LE';
5389
            }
5390 11
            if ($isUtf16 === 2) {
5391 2
                return 'UTF-16BE';
5392
            }
5393
5394 9
            $isUtf32 = self::is_utf32($str, false);
5395 9
            if ($isUtf32 === 1) {
5396
                return 'UTF-32LE';
5397
            }
5398 9
            if ($isUtf32 === 2) {
5399
                return 'UTF-32BE';
5400
            }
5401
5402
            // is binary but not "UTF-16" or "UTF-32"
5403 9
            return false;
5404
        }
5405
5406
        //
5407
        // 2.) simple check for ASCII chars
5408
        //
5409
5410 26
        if (self::is_ascii($str) === true) {
5411 9
            return 'ASCII';
5412
        }
5413
5414
        //
5415
        // 3.) simple check for UTF-8 chars
5416
        //
5417
5418 26
        if (self::is_utf8($str) === true) {
5419 19
            return 'UTF-8';
5420
        }
5421
5422
        //
5423
        // 4.) check via "mb_detect_encoding()"
5424
        //
5425
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5426
5427
        $detectOrder = [
5428 15
            'ISO-8859-1',
5429
            'ISO-8859-2',
5430
            'ISO-8859-3',
5431
            'ISO-8859-4',
5432
            'ISO-8859-5',
5433
            'ISO-8859-6',
5434
            'ISO-8859-7',
5435
            'ISO-8859-8',
5436
            'ISO-8859-9',
5437
            'ISO-8859-10',
5438
            'ISO-8859-13',
5439
            'ISO-8859-14',
5440
            'ISO-8859-15',
5441
            'ISO-8859-16',
5442
            'WINDOWS-1251',
5443
            'WINDOWS-1252',
5444
            'WINDOWS-1254',
5445
            'CP932',
5446
            'CP936',
5447
            'CP950',
5448
            'CP866',
5449
            'CP850',
5450
            'CP51932',
5451
            'CP50220',
5452
            'CP50221',
5453
            'CP50222',
5454
            'ISO-2022-JP',
5455
            'ISO-2022-KR',
5456
            'JIS',
5457
            'JIS-ms',
5458
            'EUC-CN',
5459
            'EUC-JP',
5460
        ];
5461
5462 15
        if (self::$SUPPORT['mbstring'] === true) {
5463
            // info: do not use the symfony polyfill here
5464 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5465 15
            if ($encoding) {
5466 15
                return $encoding;
5467
            }
5468
        }
5469
5470
        //
5471
        // 5.) check via "iconv()"
5472
        //
5473
5474
        if (self::$ENCODINGS === null) {
5475
            self::$ENCODINGS = self::getData('encodings');
5476
        }
5477
5478
        foreach (self::$ENCODINGS as $encodingTmp) {
5479
            // INFO: //IGNORE but still throw notice
5480
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5481
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5482
                return $encodingTmp;
5483
            }
5484
        }
5485
5486
        return false;
5487
    }
5488
5489
    /**
5490
     * Check if the string ends with the given substring.
5491
     *
5492
     * @param string $haystack <p>The string to search in.</p>
5493
     * @param string $needle   <p>The substring to search for.</p>
5494
     *
5495
     * @return bool
5496
     */
5497 9
    public static function str_ends_with(string $haystack, string $needle): bool
5498
    {
5499 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5500
    }
5501
5502
    /**
5503
     * Returns true if the string ends with any of $substrings, false otherwise.
5504
     *
5505
     * - case-sensitive
5506
     *
5507
     * @param string   $str        <p>The input string.</p>
5508
     * @param string[] $substrings <p>Substrings to look for.</p>
5509
     *
5510
     * @return bool whether or not $str ends with $substring
5511
     */
5512 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5513
    {
5514 7
        if ($substrings === []) {
5515
            return false;
5516
        }
5517
5518 7
        foreach ($substrings as &$substring) {
5519 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5520 7
                return true;
5521
            }
5522
        }
5523
5524 6
        return false;
5525
    }
5526
5527
    /**
5528
     * Ensures that the string begins with $substring. If it doesn't, it's
5529
     * prepended.
5530
     *
5531
     * @param string $str       <p>The input string.</p>
5532
     * @param string $substring <p>The substring to add if not present.</p>
5533
     *
5534
     * @return string
5535
     */
5536 10
    public static function str_ensure_left(string $str, string $substring): string
5537
    {
5538
        if (
5539 10
            $substring !== ''
5540
            &&
5541 10
            \strpos($str, $substring) === 0
5542
        ) {
5543 6
            return $str;
5544
        }
5545
5546 4
        return $substring . $str;
5547
    }
5548
5549
    /**
5550
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5551
     *
5552
     * @param string $str       <p>The input string.</p>
5553
     * @param string $substring <p>The substring to add if not present.</p>
5554
     *
5555
     * @return string
5556
     */
5557 10
    public static function str_ensure_right(string $str, string $substring): string
5558
    {
5559
        if (
5560 10
            $str === ''
5561
            ||
5562 10
            $substring === ''
5563
            ||
5564 10
            \substr($str, -\strlen($substring)) !== $substring
5565
        ) {
5566 4
            $str .= $substring;
5567
        }
5568
5569 10
        return $str;
5570
    }
5571
5572
    /**
5573
     * Capitalizes the first word of the string, replaces underscores with
5574
     * spaces, and strips '_id'.
5575
     *
5576
     * @param string $str
5577
     *
5578
     * @return string
5579
     */
5580 3
    public static function str_humanize($str): string
5581
    {
5582 3
        $str = \str_replace(
5583
            [
5584 3
                '_id',
5585
                '_',
5586
            ],
5587
            [
5588 3
                '',
5589
                ' ',
5590
            ],
5591 3
            $str
5592
        );
5593
5594 3
        return self::ucfirst(\trim($str));
5595
    }
5596
5597
    /**
5598
     * Check if the string ends with the given substring, case insensitive.
5599
     *
5600
     * @param string $haystack <p>The string to search in.</p>
5601
     * @param string $needle   <p>The substring to search for.</p>
5602
     *
5603
     * @return bool
5604
     */
5605 12
    public static function str_iends_with(string $haystack, string $needle): bool
5606
    {
5607 12
        if ($haystack === '' || $needle === '') {
5608 2
            return false;
5609
        }
5610
5611 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5612
    }
5613
5614
    /**
5615
     * Returns true if the string ends with any of $substrings, false otherwise.
5616
     *
5617
     * - case-insensitive
5618
     *
5619
     * @param string   $str        <p>The input string.</p>
5620
     * @param string[] $substrings <p>Substrings to look for.</p>
5621
     *
5622
     * @return bool whether or not $str ends with $substring
5623
     */
5624 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5625
    {
5626 4
        if ($substrings === []) {
5627
            return false;
5628
        }
5629
5630 4
        foreach ($substrings as &$substring) {
5631 4
            if (self::str_iends_with($str, $substring)) {
5632 4
                return true;
5633
            }
5634
        }
5635
5636
        return false;
5637
    }
5638
5639
    /**
5640
     * Returns the index of the first occurrence of $needle in the string,
5641
     * and false if not found. Accepts an optional offset from which to begin
5642
     * the search.
5643
     *
5644
     * @param string $str      <p>The input string.</p>
5645
     * @param string $needle   <p>Substring to look for.</p>
5646
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5647
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5648
     *
5649
     * @return false|int
5650
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5651
     */
5652 2
    public static function str_iindex_first(
5653
        string $str,
5654
        string $needle,
5655
        int $offset = 0,
5656
        string $encoding = 'UTF-8'
5657
    ) {
5658 2
        return self::stripos(
5659 2
            $str,
5660 2
            $needle,
5661 2
            $offset,
5662 2
            $encoding
5663
        );
5664
    }
5665
5666
    /**
5667
     * Returns the index of the last occurrence of $needle in the string,
5668
     * and false if not found. Accepts an optional offset from which to begin
5669
     * the search. Offsets may be negative to count from the last character
5670
     * in the string.
5671
     *
5672
     * @param string $str      <p>The input string.</p>
5673
     * @param string $needle   <p>Substring to look for.</p>
5674
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5675
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5676
     *
5677
     * @return false|int
5678
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5679
     */
5680
    public static function str_iindex_last(
5681
        string $str,
5682
        string $needle,
5683
        int $offset = 0,
5684
        string $encoding = 'UTF-8'
5685
    ) {
5686
        return self::strripos(
5687
            $str,
5688
            $needle,
5689
            $offset,
5690
            $encoding
5691
        );
5692
    }
5693
5694
    /**
5695
     * Returns the index of the first occurrence of $needle in the string,
5696
     * and false if not found. Accepts an optional offset from which to begin
5697
     * the search.
5698
     *
5699
     * @param string $str      <p>The input string.</p>
5700
     * @param string $needle   <p>Substring to look for.</p>
5701
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5702
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5703
     *
5704
     * @return false|int
5705
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5706
     */
5707 10
    public static function str_index_first(
5708
        string $str,
5709
        string $needle,
5710
        int $offset = 0,
5711
        string $encoding = 'UTF-8'
5712
    ) {
5713 10
        return self::strpos(
5714 10
            $str,
5715 10
            $needle,
5716 10
            $offset,
5717 10
            $encoding
5718
        );
5719
    }
5720
5721
    /**
5722
     * Returns the index of the last occurrence of $needle in the string,
5723
     * and false if not found. Accepts an optional offset from which to begin
5724
     * the search. Offsets may be negative to count from the last character
5725
     * in the string.
5726
     *
5727
     * @param string $str      <p>The input string.</p>
5728
     * @param string $needle   <p>Substring to look for.</p>
5729
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5730
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5731
     *
5732
     * @return false|int
5733
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5734
     */
5735 10
    public static function str_index_last(
5736
        string $str,
5737
        string $needle,
5738
        int $offset = 0,
5739
        string $encoding = 'UTF-8'
5740
    ) {
5741 10
        return self::strrpos(
5742 10
            $str,
5743 10
            $needle,
5744 10
            $offset,
5745 10
            $encoding
5746
        );
5747
    }
5748
5749
    /**
5750
     * Inserts $substring into the string at the $index provided.
5751
     *
5752
     * @param string $str       <p>The input string.</p>
5753
     * @param string $substring <p>String to be inserted.</p>
5754
     * @param int    $index     <p>The index at which to insert the substring.</p>
5755
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5756
     *
5757
     * @return string
5758
     */
5759 8
    public static function str_insert(
5760
        string $str,
5761
        string $substring,
5762
        int $index,
5763
        string $encoding = 'UTF-8'
5764
    ): string {
5765 8
        if ($encoding === 'UTF-8') {
5766 4
            $len = (int) \mb_strlen($str);
5767 4
            if ($index > $len) {
5768
                return $str;
5769
            }
5770
5771
            /** @noinspection UnnecessaryCastingInspection */
5772 4
            return (string) \mb_substr($str, 0, $index) .
5773 4
                   $substring .
5774 4
                   (string) \mb_substr($str, $index, $len);
5775
        }
5776
5777 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5778
5779 4
        $len = (int) self::strlen($str, $encoding);
5780 4
        if ($index > $len) {
5781 1
            return $str;
5782
        }
5783
5784 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5785 3
               $substring .
5786 3
               ((string) self::substr($str, $index, $len, $encoding));
5787
    }
5788
5789
    /**
5790
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5791
     *
5792
     * @see  http://php.net/manual/en/function.str-ireplace.php
5793
     *
5794
     * @param mixed $search  <p>
5795
     *                       Every replacement with search array is
5796
     *                       performed on the result of previous replacement.
5797
     *                       </p>
5798
     * @param mixed $replace <p>
5799
     *                       </p>
5800
     * @param mixed $subject <p>
5801
     *                       If subject is an array, then the search and
5802
     *                       replace is performed with every entry of
5803
     *                       subject, and the return value is an array as
5804
     *                       well.
5805
     *                       </p>
5806
     * @param int   $count   [optional] <p>
5807
     *                       The number of matched and replaced needles will
5808
     *                       be returned in count which is passed by
5809
     *                       reference.
5810
     *                       </p>
5811
     *
5812
     * @return mixed a string or an array of replacements
5813
     */
5814 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5815
    {
5816 29
        $search = (array) $search;
5817
5818
        /** @noinspection AlterInForeachInspection */
5819 29
        foreach ($search as &$s) {
5820 29
            $s = (string) $s;
5821 29
            if ($s === '') {
5822 6
                $s = '/^(?<=.)$/';
5823
            } else {
5824 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5825
            }
5826
        }
5827
5828 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5829 29
        $count = $replace; // used as reference parameter
5830
5831 29
        return $subject;
5832
    }
5833
5834
    /**
5835
     * Replaces $search from the beginning of string with $replacement.
5836
     *
5837
     * @param string $str         <p>The input string.</p>
5838
     * @param string $search      <p>The string to search for.</p>
5839
     * @param string $replacement <p>The replacement.</p>
5840
     *
5841
     * @return string string after the replacements
5842
     */
5843 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5844
    {
5845 17
        if ($str === '') {
5846 4
            if ($replacement === '') {
5847 2
                return '';
5848
            }
5849
5850 2
            if ($search === '') {
5851 2
                return $replacement;
5852
            }
5853
        }
5854
5855 13
        if ($search === '') {
5856 2
            return $str . $replacement;
5857
        }
5858
5859 11
        if (\stripos($str, $search) === 0) {
5860 10
            return $replacement . \substr($str, \strlen($search));
5861
        }
5862
5863 1
        return $str;
5864
    }
5865
5866
    /**
5867
     * Replaces $search from the ending of string with $replacement.
5868
     *
5869
     * @param string $str         <p>The input string.</p>
5870
     * @param string $search      <p>The string to search for.</p>
5871
     * @param string $replacement <p>The replacement.</p>
5872
     *
5873
     * @return string string after the replacements
5874
     */
5875 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5876
    {
5877 17
        if ($str === '') {
5878 4
            if ($replacement === '') {
5879 2
                return '';
5880
            }
5881
5882 2
            if ($search === '') {
5883 2
                return $replacement;
5884
            }
5885
        }
5886
5887 13
        if ($search === '') {
5888 2
            return $str . $replacement;
5889
        }
5890
5891 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5892 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5893
        }
5894
5895 11
        return $str;
5896
    }
5897
5898
    /**
5899
     * Check if the string starts with the given substring, case insensitive.
5900
     *
5901
     * @param string $haystack <p>The string to search in.</p>
5902
     * @param string $needle   <p>The substring to search for.</p>
5903
     *
5904
     * @return bool
5905
     */
5906 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5907
    {
5908 12
        if ($haystack === '' || $needle === '') {
5909 2
            return false;
5910
        }
5911
5912 12
        return self::stripos($haystack, $needle) === 0;
5913
    }
5914
5915
    /**
5916
     * Returns true if the string begins with any of $substrings, false otherwise.
5917
     *
5918
     * - case-insensitive
5919
     *
5920
     * @param string $str        <p>The input string.</p>
5921
     * @param array  $substrings <p>Substrings to look for.</p>
5922
     *
5923
     * @return bool whether or not $str starts with $substring
5924
     */
5925 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5926
    {
5927 4
        if ($str === '') {
5928
            return false;
5929
        }
5930
5931 4
        if ($substrings === []) {
5932
            return false;
5933
        }
5934
5935 4
        foreach ($substrings as &$substring) {
5936 4
            if (self::str_istarts_with($str, $substring)) {
5937 4
                return true;
5938
            }
5939
        }
5940
5941
        return false;
5942
    }
5943
5944
    /**
5945
     * Gets the substring after the first occurrence of a separator.
5946
     *
5947
     * @param string $str       <p>The input string.</p>
5948
     * @param string $separator <p>The string separator.</p>
5949
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5950
     *
5951
     * @return string
5952
     */
5953 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5954
    {
5955 1
        if ($separator === '' || $str === '') {
5956 1
            return '';
5957
        }
5958
5959 1
        $offset = self::str_iindex_first($str, $separator);
5960 1
        if ($offset === false) {
5961 1
            return '';
5962
        }
5963
5964 1
        if ($encoding === 'UTF-8') {
5965 1
            return (string) \mb_substr(
5966 1
                $str,
5967 1
                $offset + (int) \mb_strlen($separator)
5968
            );
5969
        }
5970
5971
        return (string) self::substr(
5972
            $str,
5973
            $offset + (int) self::strlen($separator, $encoding),
5974
            null,
5975
            $encoding
5976
        );
5977
    }
5978
5979
    /**
5980
     * Gets the substring after the last occurrence of a separator.
5981
     *
5982
     * @param string $str       <p>The input string.</p>
5983
     * @param string $separator <p>The string separator.</p>
5984
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5985
     *
5986
     * @return string
5987
     */
5988 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5989
    {
5990 1
        if ($separator === '' || $str === '') {
5991 1
            return '';
5992
        }
5993
5994 1
        $offset = self::strripos($str, $separator);
5995 1
        if ($offset === false) {
5996 1
            return '';
5997
        }
5998
5999 1
        if ($encoding === 'UTF-8') {
6000 1
            return (string) \mb_substr(
6001 1
                $str,
6002 1
                $offset + (int) self::strlen($separator)
6003
            );
6004
        }
6005
6006
        return (string) self::substr(
6007
            $str,
6008
            $offset + (int) self::strlen($separator, $encoding),
6009
            null,
6010
            $encoding
6011
        );
6012
    }
6013
6014
    /**
6015
     * Gets the substring before the first occurrence of a separator.
6016
     *
6017
     * @param string $str       <p>The input string.</p>
6018
     * @param string $separator <p>The string separator.</p>
6019
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6020
     *
6021
     * @return string
6022
     */
6023 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6024
    {
6025 1
        if ($separator === '' || $str === '') {
6026 1
            return '';
6027
        }
6028
6029 1
        $offset = self::str_iindex_first($str, $separator);
6030 1
        if ($offset === false) {
6031 1
            return '';
6032
        }
6033
6034 1
        if ($encoding === 'UTF-8') {
6035 1
            return (string) \mb_substr($str, 0, $offset);
6036
        }
6037
6038
        return (string) self::substr($str, 0, $offset, $encoding);
6039
    }
6040
6041
    /**
6042
     * Gets the substring before the last occurrence of a separator.
6043
     *
6044
     * @param string $str       <p>The input string.</p>
6045
     * @param string $separator <p>The string separator.</p>
6046
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6047
     *
6048
     * @return string
6049
     */
6050 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6051
    {
6052 1
        if ($separator === '' || $str === '') {
6053 1
            return '';
6054
        }
6055
6056 1
        if ($encoding === 'UTF-8') {
6057 1
            $offset = \mb_strripos($str, $separator);
6058 1
            if ($offset === false) {
6059 1
                return '';
6060
            }
6061
6062 1
            return (string) \mb_substr($str, 0, $offset);
6063
        }
6064
6065
        $offset = self::strripos($str, $separator, 0, $encoding);
6066
        if ($offset === false) {
6067
            return '';
6068
        }
6069
6070
        return (string) self::substr($str, 0, $offset, $encoding);
6071
    }
6072
6073
    /**
6074
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6075
     *
6076
     * @param string $str          <p>The input string.</p>
6077
     * @param string $needle       <p>The string to look for.</p>
6078
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6079
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6080
     *
6081
     * @return string
6082
     */
6083 2
    public static function str_isubstr_first(
6084
        string $str,
6085
        string $needle,
6086
        bool $beforeNeedle = false,
6087
        string $encoding = 'UTF-8'
6088
    ): string {
6089
        if (
6090 2
            $needle === ''
6091
            ||
6092 2
            $str === ''
6093
        ) {
6094 2
            return '';
6095
        }
6096
6097 2
        $part = self::stristr(
6098 2
            $str,
6099 2
            $needle,
6100 2
            $beforeNeedle,
6101 2
            $encoding
6102
        );
6103 2
        if ($part === false) {
6104 2
            return '';
6105
        }
6106
6107 2
        return $part;
6108
    }
6109
6110
    /**
6111
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6112
     *
6113
     * @param string $str          <p>The input string.</p>
6114
     * @param string $needle       <p>The string to look for.</p>
6115
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6116
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6117
     *
6118
     * @return string
6119
     */
6120 1
    public static function str_isubstr_last(
6121
        string $str,
6122
        string $needle,
6123
        bool $beforeNeedle = false,
6124
        string $encoding = 'UTF-8'
6125
    ): string {
6126
        if (
6127 1
            $needle === ''
6128
            ||
6129 1
            $str === ''
6130
        ) {
6131 1
            return '';
6132
        }
6133
6134 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6135 1
        if ($part === false) {
6136 1
            return '';
6137
        }
6138
6139 1
        return $part;
6140
    }
6141
6142
    /**
6143
     * Returns the last $n characters of the string.
6144
     *
6145
     * @param string $str      <p>The input string.</p>
6146
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6147
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6148
     *
6149
     * @return string
6150
     */
6151 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6152
    {
6153 12
        if ($str === '' || $n <= 0) {
6154 4
            return '';
6155
        }
6156
6157 8
        if ($encoding === 'UTF-8') {
6158 4
            return (string) \mb_substr($str, -$n);
6159
        }
6160
6161 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6162
6163 4
        return (string) self::substr($str, -$n, null, $encoding);
6164
    }
6165
6166
    /**
6167
     * Limit the number of characters in a string.
6168
     *
6169
     * @param string $str      <p>The input string.</p>
6170
     * @param int    $length   [optional] <p>Default: 100</p>
6171
     * @param string $strAddOn [optional] <p>Default: …</p>
6172
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6173
     *
6174
     * @return string
6175
     */
6176 2
    public static function str_limit(
6177
        string $str,
6178
        int $length = 100,
6179
        string $strAddOn = '…',
6180
        string $encoding = 'UTF-8'
6181
    ): string {
6182 2
        if ($str === '' || $length <= 0) {
6183 2
            return '';
6184
        }
6185
6186 2
        if ($encoding === 'UTF-8') {
6187 2
            if ((int) \mb_strlen($str) <= $length) {
6188 2
                return $str;
6189
            }
6190
6191
            /** @noinspection UnnecessaryCastingInspection */
6192 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6193
        }
6194
6195
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6196
6197
        if ((int) self::strlen($str, $encoding) <= $length) {
6198
            return $str;
6199
        }
6200
6201
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6202
    }
6203
6204
    /**
6205
     * Limit the number of characters in a string, but also after the next word.
6206
     *
6207
     * @param string $str      <p>The input string.</p>
6208
     * @param int    $length   [optional] <p>Default: 100</p>
6209
     * @param string $strAddOn [optional] <p>Default: …</p>
6210
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6211
     *
6212
     * @return string
6213
     */
6214 6
    public static function str_limit_after_word(
6215
        string $str,
6216
        int $length = 100,
6217
        string $strAddOn = '…',
6218
        string $encoding = 'UTF-8'
6219
    ): string {
6220 6
        if ($str === '' || $length <= 0) {
6221 2
            return '';
6222
        }
6223
6224 6
        if ($encoding === 'UTF-8') {
6225
            /** @noinspection UnnecessaryCastingInspection */
6226 2
            if ((int) \mb_strlen($str) <= $length) {
6227 2
                return $str;
6228
            }
6229
6230 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6231 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6232
            }
6233
6234 2
            $str = \mb_substr($str, 0, $length);
6235
6236 2
            $array = \explode(' ', $str);
6237 2
            \array_pop($array);
6238 2
            $new_str = \implode(' ', $array);
6239
6240 2
            if ($new_str === '') {
6241 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6242
            }
6243
        } else {
6244 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6245
                return $str;
6246
            }
6247
6248 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6249 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6250
            }
6251
6252 1
            $str = self::substr($str, 0, $length, $encoding);
6253 1
            if ($str === false) {
6254
                return '' . $strAddOn;
6255
            }
6256
6257 1
            $array = \explode(' ', $str);
6258 1
            \array_pop($array);
6259 1
            $new_str = \implode(' ', $array);
6260
6261 1
            if ($new_str === '') {
6262
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6263
            }
6264
        }
6265
6266 3
        return $new_str . $strAddOn;
6267
    }
6268
6269
    /**
6270
     * Returns the longest common prefix between the string and $otherStr.
6271
     *
6272
     * @param string $str      <p>The input sting.</p>
6273
     * @param string $otherStr <p>Second string for comparison.</p>
6274
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6275
     *
6276
     * @return string
6277
     */
6278 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6279
    {
6280
        // init
6281 10
        $longestCommonPrefix = '';
6282
6283 10
        if ($encoding === 'UTF-8') {
6284 5
            $maxLength = (int) \min(
6285 5
                \mb_strlen($str),
6286 5
                \mb_strlen($otherStr)
6287
            );
6288
6289 5
            for ($i = 0; $i < $maxLength; ++$i) {
6290 4
                $char = \mb_substr($str, $i, 1);
6291
6292
                if (
6293 4
                    $char !== false
6294
                    &&
6295 4
                    $char === \mb_substr($otherStr, $i, 1)
6296
                ) {
6297 3
                    $longestCommonPrefix .= $char;
6298
                } else {
6299 3
                    break;
6300
                }
6301
            }
6302
        } else {
6303 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6304
6305 5
            $maxLength = (int) \min(
6306 5
                self::strlen($str, $encoding),
6307 5
                self::strlen($otherStr, $encoding)
6308
            );
6309
6310 5
            for ($i = 0; $i < $maxLength; ++$i) {
6311 4
                $char = self::substr($str, $i, 1, $encoding);
6312
6313
                if (
6314 4
                    $char !== false
6315
                    &&
6316 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6317
                ) {
6318 3
                    $longestCommonPrefix .= $char;
6319
                } else {
6320 3
                    break;
6321
                }
6322
            }
6323
        }
6324
6325 10
        return $longestCommonPrefix;
6326
    }
6327
6328
    /**
6329
     * Returns the longest common substring between the string and $otherStr.
6330
     * In the case of ties, it returns that which occurs first.
6331
     *
6332
     * @param string $str
6333
     * @param string $otherStr <p>Second string for comparison.</p>
6334
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6335
     *
6336
     * @return string string with its $str being the longest common substring
6337
     */
6338 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6339
    {
6340 11
        if ($str === '' || $otherStr === '') {
6341 2
            return '';
6342
        }
6343
6344
        // Uses dynamic programming to solve
6345
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6346
6347 9
        if ($encoding === 'UTF-8') {
6348 4
            $strLength = (int) \mb_strlen($str);
6349 4
            $otherLength = (int) \mb_strlen($otherStr);
6350
        } else {
6351 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6352
6353 5
            $strLength = (int) self::strlen($str, $encoding);
6354 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6355
        }
6356
6357
        // Return if either string is empty
6358 9
        if ($strLength === 0 || $otherLength === 0) {
6359
            return '';
6360
        }
6361
6362 9
        $len = 0;
6363 9
        $end = 0;
6364 9
        $table = \array_fill(
6365 9
            0,
6366 9
            $strLength + 1,
6367 9
            \array_fill(0, $otherLength + 1, 0)
6368
        );
6369
6370 9
        if ($encoding === 'UTF-8') {
6371 9
            for ($i = 1; $i <= $strLength; ++$i) {
6372 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6373 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6374 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6375
6376 9
                    if ($strChar === $otherChar) {
6377 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6378 8
                        if ($table[$i][$j] > $len) {
6379 8
                            $len = $table[$i][$j];
6380 8
                            $end = $i;
6381
                        }
6382
                    } else {
6383 9
                        $table[$i][$j] = 0;
6384
                    }
6385
                }
6386
            }
6387
        } else {
6388
            for ($i = 1; $i <= $strLength; ++$i) {
6389
                for ($j = 1; $j <= $otherLength; ++$j) {
6390
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6391
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6392
6393
                    if ($strChar === $otherChar) {
6394
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6395
                        if ($table[$i][$j] > $len) {
6396
                            $len = $table[$i][$j];
6397
                            $end = $i;
6398
                        }
6399
                    } else {
6400
                        $table[$i][$j] = 0;
6401
                    }
6402
                }
6403
            }
6404
        }
6405
6406 9
        if ($encoding === 'UTF-8') {
6407 9
            return (string) \mb_substr($str, $end - $len, $len);
6408
        }
6409
6410
        return (string) self::substr($str, $end - $len, $len, $encoding);
6411
    }
6412
6413
    /**
6414
     * Returns the longest common suffix between the string and $otherStr.
6415
     *
6416
     * @param string $str
6417
     * @param string $otherStr <p>Second string for comparison.</p>
6418
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6419
     *
6420
     * @return string
6421
     */
6422 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6423
    {
6424 10
        if ($str === '' || $otherStr === '') {
6425 2
            return '';
6426
        }
6427
6428 8
        if ($encoding === 'UTF-8') {
6429 4
            $maxLength = (int) \min(
6430 4
                \mb_strlen($str, $encoding),
6431 4
                \mb_strlen($otherStr, $encoding)
6432
            );
6433
6434 4
            $longestCommonSuffix = '';
6435 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6436 4
                $char = \mb_substr($str, -$i, 1);
6437
6438
                if (
6439 4
                    $char !== false
6440
                    &&
6441 4
                    $char === \mb_substr($otherStr, -$i, 1)
6442
                ) {
6443 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6444
                } else {
6445 3
                    break;
6446
                }
6447
            }
6448
        } else {
6449 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6450
6451 4
            $maxLength = (int) \min(
6452 4
                self::strlen($str, $encoding),
6453 4
                self::strlen($otherStr, $encoding)
6454
            );
6455
6456 4
            $longestCommonSuffix = '';
6457 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6458 4
                $char = self::substr($str, -$i, 1, $encoding);
6459
6460
                if (
6461 4
                    $char !== false
6462
                    &&
6463 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6464
                ) {
6465 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6466
                } else {
6467 3
                    break;
6468
                }
6469
            }
6470
        }
6471
6472 8
        return $longestCommonSuffix;
6473
    }
6474
6475
    /**
6476
     * Returns true if $str matches the supplied pattern, false otherwise.
6477
     *
6478
     * @param string $str     <p>The input string.</p>
6479
     * @param string $pattern <p>Regex pattern to match against.</p>
6480
     *
6481
     * @return bool whether or not $str matches the pattern
6482
     */
6483
    public static function str_matches_pattern(string $str, string $pattern): bool
6484
    {
6485
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6486
    }
6487
6488
    /**
6489
     * Returns whether or not a character exists at an index. Offsets may be
6490
     * negative to count from the last character in the string. Implements
6491
     * part of the ArrayAccess interface.
6492
     *
6493
     * @param string $str      <p>The input string.</p>
6494
     * @param int    $offset   <p>The index to check.</p>
6495
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6496
     *
6497
     * @return bool whether or not the index exists
6498
     */
6499 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6500
    {
6501
        // init
6502 6
        $length = (int) self::strlen($str, $encoding);
6503
6504 6
        if ($offset >= 0) {
6505 3
            return $length > $offset;
6506
        }
6507
6508 3
        return $length >= \abs($offset);
6509
    }
6510
6511
    /**
6512
     * Returns the character at the given index. Offsets may be negative to
6513
     * count from the last character in the string. Implements part of the
6514
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6515
     * does not exist.
6516
     *
6517
     * @param string $str      <p>The input string.</p>
6518
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6519
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6520
     *
6521
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6522
     *
6523
     * @return string the character at the specified index
6524
     */
6525 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6526
    {
6527
        // init
6528 2
        $length = (int) self::strlen($str);
6529
6530
        if (
6531 2
            ($index >= 0 && $length <= $index)
6532
            ||
6533 2
            $length < \abs($index)
6534
        ) {
6535 1
            throw new \OutOfBoundsException('No character exists at the index');
6536
        }
6537
6538 1
        return self::char_at($str, $index, $encoding);
6539
    }
6540
6541
    /**
6542
     * Pad a UTF-8 string to given length with another string.
6543
     *
6544
     * @param string     $str        <p>The input string.</p>
6545
     * @param int        $pad_length <p>The length of return string.</p>
6546
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6547
     * @param int|string $pad_type   [optional] <p>
6548
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6549
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6550
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6551
     *                               </p>
6552
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6553
     *
6554
     * @return string returns the padded string
6555
     */
6556 41
    public static function str_pad(
6557
        string $str,
6558
        int $pad_length,
6559
        string $pad_string = ' ',
6560
        $pad_type = \STR_PAD_RIGHT,
6561
        string $encoding = 'UTF-8'
6562
    ): string {
6563 41
        if ($pad_length === 0 || $pad_string === '') {
6564 1
            return $str;
6565
        }
6566
6567 41
        if ($pad_type !== (int) $pad_type) {
6568 13
            if ($pad_type === 'left') {
6569 3
                $pad_type = \STR_PAD_LEFT;
6570 10
            } elseif ($pad_type === 'right') {
6571 6
                $pad_type = \STR_PAD_RIGHT;
6572 4
            } elseif ($pad_type === 'both') {
6573 3
                $pad_type = \STR_PAD_BOTH;
6574
            } else {
6575 1
                throw new \InvalidArgumentException(
6576 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6577
                );
6578
            }
6579
        }
6580
6581 40
        if ($encoding === 'UTF-8') {
6582 25
            $str_length = (int) \mb_strlen($str);
6583
6584 25
            if ($pad_length >= $str_length) {
6585
                switch ($pad_type) {
6586 25
                    case \STR_PAD_LEFT:
6587 8
                        $ps_length = (int) \mb_strlen($pad_string);
6588
6589 8
                        $diff = ($pad_length - $str_length);
6590
6591 8
                        $pre = (string) \mb_substr(
6592 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6593 8
                            0,
6594 8
                            $diff
6595
                        );
6596 8
                        $post = '';
6597
6598 8
                        break;
6599
6600 20
                    case \STR_PAD_BOTH:
6601 14
                        $diff = ($pad_length - $str_length);
6602
6603 14
                        $ps_length_left = (int) \floor($diff / 2);
6604
6605 14
                        $ps_length_right = (int) \ceil($diff / 2);
6606
6607 14
                        $pre = (string) \mb_substr(
6608 14
                            \str_repeat($pad_string, $ps_length_left),
6609 14
                            0,
6610 14
                            $ps_length_left
6611
                        );
6612 14
                        $post = (string) \mb_substr(
6613 14
                            \str_repeat($pad_string, $ps_length_right),
6614 14
                            0,
6615 14
                            $ps_length_right
6616
                        );
6617
6618 14
                        break;
6619
6620 9
                    case \STR_PAD_RIGHT:
6621
                    default:
6622 9
                        $ps_length = (int) \mb_strlen($pad_string);
6623
6624 9
                        $diff = ($pad_length - $str_length);
6625
6626 9
                        $post = (string) \mb_substr(
6627 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6628 9
                            0,
6629 9
                            $diff
6630
                        );
6631 9
                        $pre = '';
6632
                }
6633
6634 25
                return $pre . $str . $post;
6635
            }
6636
6637 3
            return $str;
6638
        }
6639
6640 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6641
6642 15
        $str_length = (int) self::strlen($str, $encoding);
6643
6644 15
        if ($pad_length >= $str_length) {
6645
            switch ($pad_type) {
6646 14
                case \STR_PAD_LEFT:
6647 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6648
6649 5
                    $diff = ($pad_length - $str_length);
6650
6651 5
                    $pre = (string) self::substr(
6652 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6653 5
                        0,
6654 5
                        $diff,
6655 5
                        $encoding
6656
                    );
6657 5
                    $post = '';
6658
6659 5
                    break;
6660
6661 9
                case \STR_PAD_BOTH:
6662 3
                    $diff = ($pad_length - $str_length);
6663
6664 3
                    $ps_length_left = (int) \floor($diff / 2);
6665
6666 3
                    $ps_length_right = (int) \ceil($diff / 2);
6667
6668 3
                    $pre = (string) self::substr(
6669 3
                        \str_repeat($pad_string, $ps_length_left),
6670 3
                        0,
6671 3
                        $ps_length_left,
6672 3
                        $encoding
6673
                    );
6674 3
                    $post = (string) self::substr(
6675 3
                        \str_repeat($pad_string, $ps_length_right),
6676 3
                        0,
6677 3
                        $ps_length_right,
6678 3
                        $encoding
6679
                    );
6680
6681 3
                    break;
6682
6683 6
                case \STR_PAD_RIGHT:
6684
                default:
6685 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6686
6687 6
                    $diff = ($pad_length - $str_length);
6688
6689 6
                    $post = (string) self::substr(
6690 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6691 6
                        0,
6692 6
                        $diff,
6693 6
                        $encoding
6694
                    );
6695 6
                    $pre = '';
6696
            }
6697
6698 14
            return $pre . $str . $post;
6699
        }
6700
6701 1
        return $str;
6702
    }
6703
6704
    /**
6705
     * Returns a new string of a given length such that both sides of the
6706
     * string are padded. Alias for pad() with a $padType of 'both'.
6707
     *
6708
     * @param string $str
6709
     * @param int    $length   <p>Desired string length after padding.</p>
6710
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6711
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6712
     *
6713
     * @return string string with padding applied
6714
     */
6715 11
    public static function str_pad_both(
6716
        string $str,
6717
        int $length,
6718
        string $padStr = ' ',
6719
        string $encoding = 'UTF-8'
6720
    ): string {
6721 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6722
    }
6723
6724
    /**
6725
     * Returns a new string of a given length such that the beginning of the
6726
     * string is padded. Alias for pad() with a $padType of 'left'.
6727
     *
6728
     * @param string $str
6729
     * @param int    $length   <p>Desired string length after padding.</p>
6730
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6731
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6732
     *
6733
     * @return string string with left padding
6734
     */
6735 7
    public static function str_pad_left(
6736
        string $str,
6737
        int $length,
6738
        string $padStr = ' ',
6739
        string $encoding = 'UTF-8'
6740
    ): string {
6741 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6742
    }
6743
6744
    /**
6745
     * Returns a new string of a given length such that the end of the string
6746
     * is padded. Alias for pad() with a $padType of 'right'.
6747
     *
6748
     * @param string $str
6749
     * @param int    $length   <p>Desired string length after padding.</p>
6750
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6751
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6752
     *
6753
     * @return string string with right padding
6754
     */
6755 7
    public static function str_pad_right(
6756
        string $str,
6757
        int $length,
6758
        string $padStr = ' ',
6759
        string $encoding = 'UTF-8'
6760
    ): string {
6761 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6762
    }
6763
6764
    /**
6765
     * Repeat a string.
6766
     *
6767
     * @param string $str        <p>
6768
     *                           The string to be repeated.
6769
     *                           </p>
6770
     * @param int    $multiplier <p>
6771
     *                           Number of time the input string should be
6772
     *                           repeated.
6773
     *                           </p>
6774
     *                           <p>
6775
     *                           multiplier has to be greater than or equal to 0.
6776
     *                           If the multiplier is set to 0, the function
6777
     *                           will return an empty string.
6778
     *                           </p>
6779
     *
6780
     * @return string the repeated string
6781
     */
6782 9
    public static function str_repeat(string $str, int $multiplier): string
6783
    {
6784 9
        $str = self::filter($str);
6785
6786 9
        return \str_repeat($str, $multiplier);
6787
    }
6788
6789
    /**
6790
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6791
     *
6792
     * Replace all occurrences of the search string with the replacement string
6793
     *
6794
     * @see http://php.net/manual/en/function.str-replace.php
6795
     *
6796
     * @param mixed $search  <p>
6797
     *                       The value being searched for, otherwise known as the needle.
6798
     *                       An array may be used to designate multiple needles.
6799
     *                       </p>
6800
     * @param mixed $replace <p>
6801
     *                       The replacement value that replaces found search
6802
     *                       values. An array may be used to designate multiple replacements.
6803
     *                       </p>
6804
     * @param mixed $subject <p>
6805
     *                       The string or array being searched and replaced on,
6806
     *                       otherwise known as the haystack.
6807
     *                       </p>
6808
     *                       <p>
6809
     *                       If subject is an array, then the search and
6810
     *                       replace is performed with every entry of
6811
     *                       subject, and the return value is an array as
6812
     *                       well.
6813
     *                       </p>
6814
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6815
     *
6816
     * @return mixed this function returns a string or an array with the replaced values
6817
     */
6818 12
    public static function str_replace(
6819
        $search,
6820
        $replace,
6821
        $subject,
6822
        int &$count = null
6823
    ) {
6824
        /** @psalm-suppress PossiblyNullArgument */
6825 12
        return \str_replace($search, $replace, $subject, $count);
6826
    }
6827
6828
    /**
6829
     * Replaces $search from the beginning of string with $replacement.
6830
     *
6831
     * @param string $str         <p>The input string.</p>
6832
     * @param string $search      <p>The string to search for.</p>
6833
     * @param string $replacement <p>The replacement.</p>
6834
     *
6835
     * @return string string after the replacements
6836
     */
6837 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6838
    {
6839 17
        if ($str === '') {
6840 4
            if ($replacement === '') {
6841 2
                return '';
6842
            }
6843
6844 2
            if ($search === '') {
6845 2
                return $replacement;
6846
            }
6847
        }
6848
6849 13
        if ($search === '') {
6850 2
            return $str . $replacement;
6851
        }
6852
6853 11
        if (\strpos($str, $search) === 0) {
6854 9
            return $replacement . \substr($str, \strlen($search));
6855
        }
6856
6857 2
        return $str;
6858
    }
6859
6860
    /**
6861
     * Replaces $search from the ending of string with $replacement.
6862
     *
6863
     * @param string $str         <p>The input string.</p>
6864
     * @param string $search      <p>The string to search for.</p>
6865
     * @param string $replacement <p>The replacement.</p>
6866
     *
6867
     * @return string string after the replacements
6868
     */
6869 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6870
    {
6871 17
        if ($str === '') {
6872 4
            if ($replacement === '') {
6873 2
                return '';
6874
            }
6875
6876 2
            if ($search === '') {
6877 2
                return $replacement;
6878
            }
6879
        }
6880
6881 13
        if ($search === '') {
6882 2
            return $str . $replacement;
6883
        }
6884
6885 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6886 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6887
        }
6888
6889 11
        return $str;
6890
    }
6891
6892
    /**
6893
     * Replace the first "$search"-term with the "$replace"-term.
6894
     *
6895
     * @param string $search
6896
     * @param string $replace
6897
     * @param string $subject
6898
     *
6899
     * @return string
6900
     *
6901
     * @psalm-suppress InvalidReturnType
6902
     */
6903 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6904
    {
6905 2
        $pos = self::strpos($subject, $search);
6906
6907 2
        if ($pos !== false) {
6908
            /** @psalm-suppress InvalidReturnStatement */
6909 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6910
        }
6911
6912 2
        return $subject;
6913
    }
6914
6915
    /**
6916
     * Replace the last "$search"-term with the "$replace"-term.
6917
     *
6918
     * @param string $search
6919
     * @param string $replace
6920
     * @param string $subject
6921
     *
6922
     * @return string
6923
     *
6924
     * @psalm-suppress InvalidReturnType
6925
     */
6926 2
    public static function str_replace_last(
6927
        string $search,
6928
        string $replace,
6929
        string $subject
6930
    ): string {
6931 2
        $pos = self::strrpos($subject, $search);
6932 2
        if ($pos !== false) {
6933
            /** @psalm-suppress InvalidReturnStatement */
6934 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6935
        }
6936
6937 2
        return $subject;
6938
    }
6939
6940
    /**
6941
     * Shuffles all the characters in the string.
6942
     *
6943
     * PS: uses random algorithm which is weak for cryptography purposes
6944
     *
6945
     * @param string $str      <p>The input string</p>
6946
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6947
     *
6948
     * @return string the shuffled string
6949
     */
6950 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6951
    {
6952 5
        if ($encoding === 'UTF-8') {
6953 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6954
            /** @noinspection NonSecureShuffleUsageInspection */
6955 5
            \shuffle($indexes);
6956
6957
            // init
6958 5
            $shuffledStr = '';
6959
6960 5
            foreach ($indexes as &$i) {
6961 5
                $tmpSubStr = \mb_substr($str, $i, 1);
6962 5
                if ($tmpSubStr !== false) {
6963 5
                    $shuffledStr .= $tmpSubStr;
6964
                }
6965
            }
6966
        } else {
6967
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6968
6969
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6970
            /** @noinspection NonSecureShuffleUsageInspection */
6971
            \shuffle($indexes);
6972
6973
            // init
6974
            $shuffledStr = '';
6975
6976
            foreach ($indexes as &$i) {
6977
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
6978
                if ($tmpSubStr !== false) {
6979
                    $shuffledStr .= $tmpSubStr;
6980
                }
6981
            }
6982
        }
6983
6984 5
        return $shuffledStr;
6985
    }
6986
6987
    /**
6988
     * Returns the substring beginning at $start, and up to, but not including
6989
     * the index specified by $end. If $end is omitted, the function extracts
6990
     * the remaining string. If $end is negative, it is computed from the end
6991
     * of the string.
6992
     *
6993
     * @param string $str
6994
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6995
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6996
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6997
     *
6998
     * @return false|string
6999
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7000
     *                      characters long, <b>FALSE</b> will be returned.
7001
     */
7002 18
    public static function str_slice(
7003
        string $str,
7004
        int $start,
7005
        int $end = null,
7006
        string $encoding = 'UTF-8'
7007
    ) {
7008 18
        if ($encoding === 'UTF-8') {
7009 7
            if ($end === null) {
7010 1
                $length = (int) \mb_strlen($str);
7011 6
            } elseif ($end >= 0 && $end <= $start) {
7012 2
                return '';
7013 4
            } elseif ($end < 0) {
7014 1
                $length = (int) \mb_strlen($str) + $end - $start;
7015
            } else {
7016 3
                $length = $end - $start;
7017
            }
7018
7019 5
            return \mb_substr($str, $start, $length);
7020
        }
7021
7022 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7023
7024 11
        if ($end === null) {
7025 5
            $length = (int) self::strlen($str, $encoding);
7026 6
        } elseif ($end >= 0 && $end <= $start) {
7027 2
            return '';
7028 4
        } elseif ($end < 0) {
7029 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7030
        } else {
7031 3
            $length = $end - $start;
7032
        }
7033
7034 9
        return self::substr($str, $start, $length, $encoding);
7035
    }
7036
7037
    /**
7038
     * Convert a string to e.g.: "snake_case"
7039
     *
7040
     * @param string $str
7041
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7042
     *
7043
     * @return string string in snake_case
7044
     */
7045 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7046
    {
7047 20
        if ($str === '') {
7048
            return '';
7049
        }
7050
7051 20
        $str = \str_replace(
7052 20
            '-',
7053 20
            '_',
7054 20
            self::normalize_whitespace($str)
7055
        );
7056
7057 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7058 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7059
        }
7060
7061 20
        $str = (string) \preg_replace_callback(
7062 20
            '/([\d|A-Z])/u',
7063
            /**
7064
             * @param string[] $matches
7065
             *
7066
             * @return string
7067
             */
7068
            static function (array $matches) use ($encoding): string {
7069 8
                $match = $matches[1];
7070 8
                $matchInt = (int) $match;
7071
7072 8
                if ((string) $matchInt === $match) {
7073 4
                    return '_' . $match . '_';
7074
                }
7075
7076 4
                if ($encoding === 'UTF-8') {
7077 4
                    return '_' . \mb_strtolower($match);
7078
                }
7079
7080
                return '_' . self::strtolower($match, $encoding);
7081 20
            },
7082 20
            $str
7083
        );
7084
7085 20
        $str = (string) \preg_replace(
7086
            [
7087 20
                '/\s+/',        // convert spaces to "_"
7088
                '/^\s+|\s+$/',  // trim leading & trailing spaces
7089
                '/_+/',         // remove double "_"
7090
            ],
7091
            [
7092 20
                '_',
7093
                '',
7094
                '_',
7095
            ],
7096 20
            $str
7097
        );
7098
7099 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7100
    }
7101
7102
    /**
7103
     * Sort all characters according to code points.
7104
     *
7105
     * @param string $str    <p>A UTF-8 string.</p>
7106
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7107
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7108
     *
7109
     * @return string string of sorted characters
7110
     */
7111 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7112
    {
7113 2
        $array = self::codepoints($str);
7114
7115 2
        if ($unique) {
7116 2
            $array = \array_flip(\array_flip($array));
7117
        }
7118
7119 2
        if ($desc) {
7120 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7120
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7121
        } else {
7122 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7122
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7123
        }
7124
7125 2
        return self::string($array);
7126
    }
7127
7128
    /**
7129
     * alias for "UTF8::str_split()"
7130
     *
7131
     * @see UTF8::str_split()
7132
     *
7133
     * @param string|string[] $str
7134
     * @param int             $length
7135
     * @param bool            $cleanUtf8
7136
     *
7137
     * @return string[]
7138
     */
7139 9
    public static function split(
7140
        $str,
7141
        int $length = 1,
7142
        bool $cleanUtf8 = false
7143
    ): array {
7144 9
        return self::str_split($str, $length, $cleanUtf8);
7145
    }
7146
7147
    /**
7148
     * Splits the string with the provided regular expression, returning an
7149
     * array of Stringy objects. An optional integer $limit will truncate the
7150
     * results.
7151
     *
7152
     * @param string $str
7153
     * @param string $pattern <p>The regex with which to split the string.</p>
7154
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7155
     *
7156
     * @return string[] an array of strings
7157
     */
7158 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7159
    {
7160 16
        if ($limit === 0) {
7161 2
            return [];
7162
        }
7163
7164 14
        if ($pattern === '') {
7165 1
            return [$str];
7166
        }
7167
7168 13
        if (self::$SUPPORT['mbstring'] === true) {
7169 13
            if ($limit >= 0) {
7170
                /** @noinspection PhpComposerExtensionStubsInspection */
7171 8
                return \array_filter(
7172 8
                    \mb_split($pattern, $str),
7173
                    static function () use (&$limit): bool {
7174 8
                        return --$limit >= 0;
7175 8
                    }
7176
                );
7177
            }
7178
7179
            /** @noinspection PhpComposerExtensionStubsInspection */
7180 5
            return \mb_split($pattern, $str);
7181
        }
7182
7183
        if ($limit > 0) {
7184
            ++$limit;
7185
        } else {
7186
            $limit = -1;
7187
        }
7188
7189
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7190
7191
        if ($array === false) {
7192
            return [];
7193
        }
7194
7195
        if ($limit > 0 && \count($array) === $limit) {
7196
            \array_pop($array);
7197
        }
7198
7199
        return $array;
7200
    }
7201
7202
    /**
7203
     * Check if the string starts with the given substring.
7204
     *
7205
     * @param string $haystack <p>The string to search in.</p>
7206
     * @param string $needle   <p>The substring to search for.</p>
7207
     *
7208
     * @return bool
7209
     */
7210 19
    public static function str_starts_with(string $haystack, string $needle): bool
7211
    {
7212 19
        return \strpos($haystack, $needle) === 0;
7213
    }
7214
7215
    /**
7216
     * Returns true if the string begins with any of $substrings, false otherwise.
7217
     *
7218
     * - case-sensitive
7219
     *
7220
     * @param string $str        <p>The input string.</p>
7221
     * @param array  $substrings <p>Substrings to look for.</p>
7222
     *
7223
     * @return bool whether or not $str starts with $substring
7224
     */
7225 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7226
    {
7227 8
        if ($str === '') {
7228
            return false;
7229
        }
7230
7231 8
        if ($substrings === []) {
7232
            return false;
7233
        }
7234
7235 8
        foreach ($substrings as &$substring) {
7236 8
            if (self::str_starts_with($str, $substring)) {
7237 8
                return true;
7238
            }
7239
        }
7240
7241 6
        return false;
7242
    }
7243
7244
    /**
7245
     * Gets the substring after the first occurrence of a separator.
7246
     *
7247
     * @param string $str       <p>The input string.</p>
7248
     * @param string $separator <p>The string separator.</p>
7249
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7250
     *
7251
     * @return string
7252
     */
7253 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7254
    {
7255 1
        if ($separator === '' || $str === '') {
7256 1
            return '';
7257
        }
7258
7259 1
        if ($encoding === 'UTF-8') {
7260 1
            $offset = \mb_strpos($str, $separator);
7261 1
            if ($offset === false) {
7262 1
                return '';
7263
            }
7264
7265 1
            return (string) \mb_substr(
7266 1
                $str,
7267 1
                $offset + (int) \mb_strlen($separator)
7268
            );
7269
        }
7270
7271
        $offset = self::strpos($str, $separator, 0, $encoding);
7272
        if ($offset === false) {
7273
            return '';
7274
        }
7275
7276
        return (string) \mb_substr(
7277
            $str,
7278
            $offset + (int) self::strlen($separator, $encoding),
7279
            null,
7280
            $encoding
7281
        );
7282
    }
7283
7284
    /**
7285
     * Gets the substring after the last occurrence of a separator.
7286
     *
7287
     * @param string $str       <p>The input string.</p>
7288
     * @param string $separator <p>The string separator.</p>
7289
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7290
     *
7291
     * @return string
7292
     */
7293 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7294
    {
7295 1
        if ($separator === '' || $str === '') {
7296 1
            return '';
7297
        }
7298
7299 1
        if ($encoding === 'UTF-8') {
7300 1
            $offset = \mb_strrpos($str, $separator);
7301 1
            if ($offset === false) {
7302 1
                return '';
7303
            }
7304
7305 1
            return (string) \mb_substr(
7306 1
                $str,
7307 1
                $offset + (int) \mb_strlen($separator)
7308
            );
7309
        }
7310
7311
        $offset = self::strrpos($str, $separator, 0, $encoding);
7312
        if ($offset === false) {
7313
            return '';
7314
        }
7315
7316
        return (string) self::substr(
7317
            $str,
7318
            $offset + (int) self::strlen($separator, $encoding),
7319
            null,
7320
            $encoding
7321
        );
7322
    }
7323
7324
    /**
7325
     * Gets the substring before the first occurrence of a separator.
7326
     *
7327
     * @param string $str       <p>The input string.</p>
7328
     * @param string $separator <p>The string separator.</p>
7329
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7330
     *
7331
     * @return string
7332
     */
7333 1
    public static function str_substr_before_first_separator(
7334
        string $str,
7335
        string $separator,
7336
        string $encoding = 'UTF-8'
7337
    ): string {
7338 1
        if ($separator === '' || $str === '') {
7339 1
            return '';
7340
        }
7341
7342 1
        if ($encoding === 'UTF-8') {
7343 1
            $offset = \mb_strpos($str, $separator);
7344 1
            if ($offset === false) {
7345 1
                return '';
7346
            }
7347
7348 1
            return (string) \mb_substr(
7349 1
                $str,
7350 1
                0,
7351 1
                $offset
7352
            );
7353
        }
7354
7355
        $offset = self::strpos($str, $separator, 0, $encoding);
7356
        if ($offset === false) {
7357
            return '';
7358
        }
7359
7360
        return (string) self::substr(
7361
            $str,
7362
            0,
7363
            $offset,
7364
            $encoding
7365
        );
7366
    }
7367
7368
    /**
7369
     * Gets the substring before the last occurrence of a separator.
7370
     *
7371
     * @param string $str       <p>The input string.</p>
7372
     * @param string $separator <p>The string separator.</p>
7373
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7374
     *
7375
     * @return string
7376
     */
7377 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7378
    {
7379 1
        if ($separator === '' || $str === '') {
7380 1
            return '';
7381
        }
7382
7383 1
        if ($encoding === 'UTF-8') {
7384 1
            $offset = \mb_strrpos($str, $separator);
7385 1
            if ($offset === false) {
7386 1
                return '';
7387
            }
7388
7389 1
            return (string) \mb_substr(
7390 1
                $str,
7391 1
                0,
7392 1
                $offset
7393
            );
7394
        }
7395
7396
        $offset = self::strrpos($str, $separator, 0, $encoding);
7397
        if ($offset === false) {
7398
            return '';
7399
        }
7400
7401
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7402
7403
        return (string) self::substr(
7404
            $str,
7405
            0,
7406
            $offset,
7407
            $encoding
7408
        );
7409
    }
7410
7411
    /**
7412
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7413
     *
7414
     * @param string $str          <p>The input string.</p>
7415
     * @param string $needle       <p>The string to look for.</p>
7416
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7417
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7418
     *
7419
     * @return string
7420
     */
7421 2
    public static function str_substr_first(
7422
        string $str,
7423
        string $needle,
7424
        bool $beforeNeedle = false,
7425
        string $encoding = 'UTF-8'
7426
    ): string {
7427 2
        if ($str === '' || $needle === '') {
7428 2
            return '';
7429
        }
7430
7431 2
        if ($encoding === 'UTF-8') {
7432 2
            if ($beforeNeedle === true) {
7433 1
                $part = \mb_strstr(
7434 1
                    $str,
7435 1
                    $needle,
7436 1
                    $beforeNeedle
7437
                );
7438
            } else {
7439 1
                $part = \mb_strstr(
7440 1
                    $str,
7441 2
                    $needle
7442
                );
7443
            }
7444
        } else {
7445
            $part = self::strstr(
7446
                $str,
7447
                $needle,
7448
                $beforeNeedle,
7449
                $encoding
7450
            );
7451
        }
7452
7453 2
        return $part === false ? '' : $part;
7454
    }
7455
7456
    /**
7457
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7458
     *
7459
     * @param string $str          <p>The input string.</p>
7460
     * @param string $needle       <p>The string to look for.</p>
7461
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7462
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7463
     *
7464
     * @return string
7465
     */
7466 2
    public static function str_substr_last(
7467
        string $str,
7468
        string $needle,
7469
        bool $beforeNeedle = false,
7470
        string $encoding = 'UTF-8'
7471
    ): string {
7472 2
        if ($str === '' || $needle === '') {
7473 2
            return '';
7474
        }
7475
7476 2
        if ($encoding === 'UTF-8') {
7477 2
            if ($beforeNeedle === true) {
7478 1
                $part = \mb_strrchr(
7479 1
                    $str,
7480 1
                    $needle,
7481 1
                    $beforeNeedle
7482
                );
7483
            } else {
7484 1
                $part = \mb_strrchr(
7485 1
                    $str,
7486 2
                    $needle
7487
                );
7488
            }
7489
        } else {
7490
            $part = self::strrchr(
7491
                $str,
7492
                $needle,
7493
                $beforeNeedle,
7494
                $encoding
7495
            );
7496
        }
7497
7498 2
        return $part === false ? '' : $part;
7499
    }
7500
7501
    /**
7502
     * Surrounds $str with the given substring.
7503
     *
7504
     * @param string $str
7505
     * @param string $substring <p>The substring to add to both sides.</P>
7506
     *
7507
     * @return string string with the substring both prepended and appended
7508
     */
7509 5
    public static function str_surround(string $str, string $substring): string
7510
    {
7511 5
        return $substring . $str . $substring;
7512
    }
7513
7514
    /**
7515
     * Returns a trimmed string with the first letter of each word capitalized.
7516
     * Also accepts an array, $ignore, allowing you to list words not to be
7517
     * capitalized.
7518
     *
7519
     * @param string              $str
7520
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7521
     *                                                   Default: null</p>
7522
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7523
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7524
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7525
     *                                                   tr</p>
7526
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7527
     *                                                   ß</p>
7528
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7529
     *
7530
     * @return string the titleized string
7531
     */
7532 5
    public static function str_titleize(
7533
        string $str,
7534
        array $ignore = null,
7535
        string $encoding = 'UTF-8',
7536
        bool $cleanUtf8 = false,
7537
        string $lang = null,
7538
        bool $tryToKeepStringLength = false,
7539
        bool $useTrimFirst = true
7540
    ): string {
7541 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7542 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7543
        }
7544
7545 5
        if ($useTrimFirst === true) {
7546 5
            $str = \trim($str);
7547
        }
7548
7549 5
        if ($cleanUtf8 === true) {
7550
            $str = self::clean($str);
7551
        }
7552
7553 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7554
7555 5
        return (string) \preg_replace_callback(
7556 5
            '/([\S]+)/u',
7557
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7558 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7559 2
                    return $match[0];
7560
                }
7561
7562 5
                if ($useMbFunction === true) {
7563 5
                    if ($encoding === 'UTF-8') {
7564 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7565 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7566
                    }
7567
7568
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7569
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7570
                }
7571
7572
                return self::ucfirst(
7573
                    self::strtolower(
7574
                        $match[0],
7575
                        $encoding,
7576
                        false,
7577
                        $lang,
7578
                        $tryToKeepStringLength
7579
                    ),
7580
                    $encoding,
7581
                    false,
7582
                    $lang,
7583
                    $tryToKeepStringLength
7584
                );
7585 5
            },
7586 5
            $str
7587
        );
7588
    }
7589
7590
    /**
7591
     * Returns a trimmed string in proper title case.
7592
     *
7593
     * Also accepts an array, $ignore, allowing you to list words not to be
7594
     * capitalized.
7595
     *
7596
     * Adapted from John Gruber's script.
7597
     *
7598
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7599
     *
7600
     * @param string $str
7601
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7602
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7603
     *
7604
     * @return string the titleized string
7605
     */
7606 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7607
    {
7608 35
        $smallWords = \array_merge(
7609
            [
7610 35
                '(?<!q&)a',
7611
                'an',
7612
                'and',
7613
                'as',
7614
                'at(?!&t)',
7615
                'but',
7616
                'by',
7617
                'en',
7618
                'for',
7619
                'if',
7620
                'in',
7621
                'of',
7622
                'on',
7623
                'or',
7624
                'the',
7625
                'to',
7626
                'v[.]?',
7627
                'via',
7628
                'vs[.]?',
7629
            ],
7630 35
            $ignore
7631
        );
7632
7633 35
        $smallWordsRx = \implode('|', $smallWords);
7634 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7635
7636 35
        $str = \trim($str);
7637
7638 35
        if (self::has_lowercase($str) === false) {
7639 2
            $str = self::strtolower($str, $encoding);
7640
        }
7641
7642
        // the main substitutions
7643 35
        $str = (string) \preg_replace_callback(
7644
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7645
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7646 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7647
                        |
7648 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7649
                        |
7650 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7651
                        |
7652 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7653
                      ) (_*) \b                                                           # 6. With trailing underscore
7654
                    ~ux',
7655
            /**
7656
             * @param string[] $matches
7657
             *
7658
             * @return string
7659
             */
7660
            static function (array $matches) use ($encoding): string {
7661
                // preserve leading underscore
7662 35
                $str = $matches[1];
7663 35
                if ($matches[2]) {
7664
                    // preserve URLs, domains, emails and file paths
7665 5
                    $str .= $matches[2];
7666 35
                } elseif ($matches[3]) {
7667
                    // lower-case small words
7668 25
                    $str .= self::strtolower($matches[3], $encoding);
7669 35
                } elseif ($matches[4]) {
7670
                    // capitalize word w/o internal caps
7671 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7672
                } else {
7673
                    // preserve other kinds of word (iPhone)
7674 7
                    $str .= $matches[5];
7675
                }
7676
                // Preserve trailing underscore
7677 35
                $str .= $matches[6];
7678
7679 35
                return $str;
7680 35
            },
7681 35
            $str
7682
        );
7683
7684
        // Exceptions for small words: capitalize at start of title...
7685 35
        $str = (string) \preg_replace_callback(
7686
            '~(  \A [[:punct:]]*                # start of title...
7687
                      |  [:.;?!][ ]+               # or of subsentence...
7688
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7689 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7690
                     ~uxi',
7691
            /**
7692
             * @param string[] $matches
7693
             *
7694
             * @return string
7695
             */
7696
            static function (array $matches) use ($encoding): string {
7697 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7698 35
            },
7699 35
            $str
7700
        );
7701
7702
        // ...and end of title
7703 35
        $str = (string) \preg_replace_callback(
7704 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7705
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7706
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7707
                     ~uxi',
7708
            /**
7709
             * @param string[] $matches
7710
             *
7711
             * @return string
7712
             */
7713
            static function (array $matches) use ($encoding): string {
7714 3
                return static::str_upper_first($matches[1], $encoding);
7715 35
            },
7716 35
            $str
7717
        );
7718
7719
        // Exceptions for small words in hyphenated compound words.
7720
        // e.g. "in-flight" -> In-Flight
7721 35
        $str = (string) \preg_replace_callback(
7722
            '~\b
7723
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7724 35
                        ( ' . $smallWordsRx . ' )
7725
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7726
                       ~uxi',
7727
            /**
7728
             * @param string[] $matches
7729
             *
7730
             * @return string
7731
             */
7732
            static function (array $matches) use ($encoding): string {
7733
                return static::str_upper_first($matches[1], $encoding);
7734 35
            },
7735 35
            $str
7736
        );
7737
7738
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7739 35
        $str = (string) \preg_replace_callback(
7740
            '~\b
7741
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7742
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7743 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7744
                      (?!	- )                   # Negative lookahead for another -
7745
                     ~uxi',
7746
            /**
7747
             * @param string[] $matches
7748
             *
7749
             * @return string
7750
             */
7751
            static function (array $matches) use ($encoding): string {
7752
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7753 35
            },
7754 35
            $str
7755
        );
7756
7757 35
        return $str;
7758
    }
7759
7760
    /**
7761
     * Get a binary representation of a specific string.
7762
     *
7763
     * @param string $str <p>The input string.</p>
7764
     *
7765
     * @return string
7766
     */
7767 2
    public static function str_to_binary(string $str): string
7768
    {
7769 2
        $value = \unpack('H*', $str);
7770
7771 2
        return \base_convert($value[1], 16, 2);
7772
    }
7773
7774
    /**
7775
     * @param string   $str
7776
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7777
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7778
     *
7779
     * @return string[]
7780
     */
7781 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7782
    {
7783 17
        if ($str === '') {
7784 1
            return $removeEmptyValues === true ? [] : [''];
7785
        }
7786
7787 16
        if (self::$SUPPORT['mbstring'] === true) {
7788
            /** @noinspection PhpComposerExtensionStubsInspection */
7789 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7790
        } else {
7791
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7792
        }
7793
7794 16
        if ($return === false) {
7795
            return $removeEmptyValues === true ? [] : [''];
7796
        }
7797
7798
        if (
7799 16
            $removeShortValues === null
7800
            &&
7801 16
            $removeEmptyValues === false
7802
        ) {
7803 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7804
        }
7805
7806
        return self::reduce_string_array(
7807
            $return,
7808
            $removeEmptyValues,
7809
            $removeShortValues
7810
        );
7811
    }
7812
7813
    /**
7814
     * Convert a string into an array of words.
7815
     *
7816
     * @param string   $str
7817
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7818
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7819
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7820
     *
7821
     * @return string[]
7822
     */
7823 13
    public static function str_to_words(
7824
        string $str,
7825
        string $charList = '',
7826
        bool $removeEmptyValues = false,
7827
        int $removeShortValues = null
7828
    ): array {
7829 13
        if ($str === '') {
7830 4
            return $removeEmptyValues === true ? [] : [''];
7831
        }
7832
7833 13
        $charList = self::rxClass($charList, '\pL');
7834
7835 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7836 13
        if ($return === false) {
7837
            return $removeEmptyValues === true ? [] : [''];
7838
        }
7839
7840
        if (
7841 13
            $removeShortValues === null
7842
            &&
7843 13
            $removeEmptyValues === false
7844
        ) {
7845 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7846
        }
7847
7848 2
        $tmpReturn = self::reduce_string_array(
7849 2
            $return,
7850 2
            $removeEmptyValues,
7851 2
            $removeShortValues
7852
        );
7853
7854 2
        foreach ($tmpReturn as &$item) {
7855 2
            $item = (string) $item;
7856
        }
7857
7858 2
        return $tmpReturn;
7859
    }
7860
7861
    /**
7862
     * alias for "UTF8::to_ascii()"
7863
     *
7864
     * @see UTF8::to_ascii()
7865
     *
7866
     * @param string $str
7867
     * @param string $unknown
7868
     * @param bool   $strict
7869
     *
7870
     * @return string
7871
     */
7872 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7873
    {
7874 8
        return self::to_ascii($str, $unknown, $strict);
7875
    }
7876
7877
    /**
7878
     * Truncates the string to a given length. If $substring is provided, and
7879
     * truncating occurs, the string is further truncated so that the substring
7880
     * may be appended without exceeding the desired length.
7881
     *
7882
     * @param string $str
7883
     * @param int    $length    <p>Desired length of the truncated string.</p>
7884
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7885
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7886
     *
7887
     * @return string string after truncating
7888
     */
7889 22
    public static function str_truncate(
7890
        string $str,
7891
        int $length,
7892
        string $substring = '',
7893
        string $encoding = 'UTF-8'
7894
    ): string {
7895 22
        if ($str === '') {
7896
            return '';
7897
        }
7898
7899 22
        if ($encoding === 'UTF-8') {
7900 10
            if ($length >= (int) \mb_strlen($str)) {
7901 2
                return $str;
7902
            }
7903
7904 8
            if ($substring !== '') {
7905 4
                $length -= (int) \mb_strlen($substring);
7906
7907
                /** @noinspection UnnecessaryCastingInspection */
7908 4
                return (string) \mb_substr($str, 0, $length) . $substring;
7909
            }
7910
7911
            /** @noinspection UnnecessaryCastingInspection */
7912 4
            return (string) \mb_substr($str, 0, $length);
7913
        }
7914
7915 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7916
7917 12
        if ($length >= (int) self::strlen($str, $encoding)) {
7918 2
            return $str;
7919
        }
7920
7921 10
        if ($substring !== '') {
7922 6
            $length -= (int) self::strlen($substring, $encoding);
7923
        }
7924
7925 10
        return ((string) self::substr(
7926 10
                $str,
7927 10
                0,
7928 10
                $length,
7929 10
                $encoding
7930 10
            )) . $substring;
7931
    }
7932
7933
    /**
7934
     * Truncates the string to a given length, while ensuring that it does not
7935
     * split words. If $substring is provided, and truncating occurs, the
7936
     * string is further truncated so that the substring may be appended without
7937
     * exceeding the desired length.
7938
     *
7939
     * @param string $str
7940
     * @param int    $length                          <p>Desired length of the truncated string.</p>
7941
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
7942
     *                                                ''</p>
7943
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
7944
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7945
     *
7946
     * @return string string after truncating
7947
     */
7948 47
    public static function str_truncate_safe(
7949
        string $str,
7950
        int $length,
7951
        string $substring = '',
7952
        string $encoding = 'UTF-8',
7953
        bool $ignoreDoNotSplitWordsForOneWord = false
7954
    ): string {
7955 47
        if ($str === '' || $length <= 0) {
7956 1
            return $substring;
7957
        }
7958
7959 47
        if ($encoding === 'UTF-8') {
7960 21
            if ($length >= (int) \mb_strlen($str)) {
7961 5
                return $str;
7962
            }
7963
7964
            // need to further trim the string so we can append the substring
7965 17
            $length -= (int) \mb_strlen($substring);
7966 17
            if ($length <= 0) {
7967 1
                return $substring;
7968
            }
7969
7970 17
            $truncated = \mb_substr($str, 0, $length);
7971
7972 17
            if ($truncated === false) {
7973
                return '';
7974
            }
7975
7976
            // if the last word was truncated
7977 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
7978 17
            if ($strPosSpace !== $length) {
7979
                // find pos of the last occurrence of a space, get up to that
7980 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
7981
7982
                if (
7983 13
                    $lastPos !== false
7984
                    ||
7985 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7986
                ) {
7987 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
7988
                }
7989
            }
7990
        } else {
7991 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7992
7993 26
            if ($length >= (int) self::strlen($str, $encoding)) {
7994 4
                return $str;
7995
            }
7996
7997
            // need to further trim the string so we can append the substring
7998 22
            $length -= (int) self::strlen($substring, $encoding);
7999 22
            if ($length <= 0) {
8000
                return $substring;
8001
            }
8002
8003 22
            $truncated = self::substr($str, 0, $length, $encoding);
8004
8005 22
            if ($truncated === false) {
8006
                return '';
8007
            }
8008
8009
            // if the last word was truncated
8010 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8011 22
            if ($strPosSpace !== $length) {
8012
                // find pos of the last occurrence of a space, get up to that
8013 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8014
8015
                if (
8016 12
                    $lastPos !== false
8017
                    ||
8018 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8019
                ) {
8020 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8021
                }
8022
            }
8023
        }
8024
8025 39
        return $truncated . $substring;
8026
    }
8027
8028
    /**
8029
     * Returns a lowercase and trimmed string separated by underscores.
8030
     * Underscores are inserted before uppercase characters (with the exception
8031
     * of the first character of the string), and in place of spaces as well as
8032
     * dashes.
8033
     *
8034
     * @param string $str
8035
     *
8036
     * @return string the underscored string
8037
     */
8038 16
    public static function str_underscored(string $str): string
8039
    {
8040 16
        return self::str_delimit($str, '_');
8041
    }
8042
8043
    /**
8044
     * Returns an UpperCamelCase version of the supplied string. It trims
8045
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8046
     * and underscores, and removes spaces, dashes, underscores.
8047
     *
8048
     * @param string      $str                   <p>The input string.</p>
8049
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8050
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8051
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8052
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8053
     *
8054
     * @return string string in UpperCamelCase
8055
     */
8056 13
    public static function str_upper_camelize(
8057
        string $str,
8058
        string $encoding = 'UTF-8',
8059
        bool $cleanUtf8 = false,
8060
        string $lang = null,
8061
        bool $tryToKeepStringLength = false
8062
    ): string {
8063 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8064
    }
8065
8066
    /**
8067
     * alias for "UTF8::ucfirst()"
8068
     *
8069
     * @see UTF8::ucfirst()
8070
     *
8071
     * @param string      $str
8072
     * @param string      $encoding
8073
     * @param bool        $cleanUtf8
8074
     * @param string|null $lang
8075
     * @param bool        $tryToKeepStringLength
8076
     *
8077
     * @return string
8078
     */
8079 39
    public static function str_upper_first(
8080
        string $str,
8081
        string $encoding = 'UTF-8',
8082
        bool $cleanUtf8 = false,
8083
        string $lang = null,
8084
        bool $tryToKeepStringLength = false
8085
    ): string {
8086 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8087
    }
8088
8089
    /**
8090
     * Counts number of words in the UTF-8 string.
8091
     *
8092
     * @param string $str      <p>The input string.</p>
8093
     * @param int    $format   [optional] <p>
8094
     *                         <strong>0</strong> => return a number of words (default)<br>
8095
     *                         <strong>1</strong> => return an array of words<br>
8096
     *                         <strong>2</strong> => return an array of words with word-offset as key
8097
     *                         </p>
8098
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8099
     *
8100
     * @return int|string[] The number of words in the string
8101
     */
8102 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8103
    {
8104 2
        $strParts = self::str_to_words($str, $charlist);
8105
8106 2
        $len = \count($strParts);
8107
8108 2
        if ($format === 1) {
8109 2
            $numberOfWords = [];
8110 2
            for ($i = 1; $i < $len; $i += 2) {
8111 2
                $numberOfWords[] = $strParts[$i];
8112
            }
8113 2
        } elseif ($format === 2) {
8114 2
            $numberOfWords = [];
8115 2
            $offset = (int) self::strlen($strParts[0]);
8116 2
            for ($i = 1; $i < $len; $i += 2) {
8117 2
                $numberOfWords[$offset] = $strParts[$i];
8118 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8119
            }
8120
        } else {
8121 2
            $numberOfWords = (int) (($len - 1) / 2);
8122
        }
8123
8124 2
        return $numberOfWords;
8125
    }
8126
8127
    /**
8128
     * Case-insensitive string comparison.
8129
     *
8130
     * INFO: Case-insensitive version of UTF8::strcmp()
8131
     *
8132
     * @param string $str1     <p>The first string.</p>
8133
     * @param string $str2     <p>The second string.</p>
8134
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8135
     *
8136
     * @return int
8137
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8138
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8139
     *             <strong>0</strong> if they are equal
8140
     */
8141 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8142
    {
8143 23
        return self::strcmp(
8144 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8145 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8146
        );
8147
    }
8148
8149
    /**
8150
     * alias for "UTF8::strstr()"
8151
     *
8152
     * @see UTF8::strstr()
8153
     *
8154
     * @param string $haystack
8155
     * @param string $needle
8156
     * @param bool   $before_needle
8157
     * @param string $encoding
8158
     * @param bool   $cleanUtf8
8159
     *
8160
     * @return false|string
8161
     */
8162 2
    public static function strchr(
8163
        string $haystack,
8164
        string $needle,
8165
        bool $before_needle = false,
8166
        string $encoding = 'UTF-8',
8167
        bool $cleanUtf8 = false
8168
    ) {
8169 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8170
    }
8171
8172
    /**
8173
     * Case-sensitive string comparison.
8174
     *
8175
     * @param string $str1 <p>The first string.</p>
8176
     * @param string $str2 <p>The second string.</p>
8177
     *
8178
     * @return int
8179
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8180
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8181
     *             <strong>0</strong> if they are equal
8182
     */
8183 29
    public static function strcmp(string $str1, string $str2): int
8184
    {
8185 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8186 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8187 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8188
        );
8189
    }
8190
8191
    /**
8192
     * Find length of initial segment not matching mask.
8193
     *
8194
     * @param string $str
8195
     * @param string $charList
8196
     * @param int    $offset
8197
     * @param int    $length
8198
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8199
     *
8200
     * @return int
8201
     */
8202 12
    public static function strcspn(
8203
        string $str,
8204
        string $charList,
8205
        int $offset = null,
8206
        int $length = null,
8207
        string $encoding = 'UTF-8'
8208
    ): int {
8209 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8210
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8211
        }
8212
8213 12
        if ($charList === '') {
8214 2
            return (int) self::strlen($str, $encoding);
8215
        }
8216
8217 11
        if ($offset !== null || $length !== null) {
8218 3
            if ($encoding === 'UTF-8') {
8219 3
                if ($length === null) {
8220
                    /** @noinspection UnnecessaryCastingInspection */
8221 2
                    $strTmp = \mb_substr($str, (int) $offset);
8222
                } else {
8223
                    /** @noinspection UnnecessaryCastingInspection */
8224 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8225
                }
8226
            } else {
8227
                /** @noinspection UnnecessaryCastingInspection */
8228
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8229
            }
8230 3
            if ($strTmp === false) {
8231
                return 0;
8232
            }
8233 3
            $str = $strTmp;
8234
        }
8235
8236 11
        if ($str === '') {
8237 2
            return 0;
8238
        }
8239
8240 10
        $matches = [];
8241 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8242 9
            $return = self::strlen($matches[1], $encoding);
8243 9
            if ($return === false) {
8244
                return 0;
8245
            }
8246
8247 9
            return $return;
8248
        }
8249
8250 2
        return (int) self::strlen($str, $encoding);
8251
    }
8252
8253
    /**
8254
     * alias for "UTF8::stristr()"
8255
     *
8256
     * @see UTF8::stristr()
8257
     *
8258
     * @param string $haystack
8259
     * @param string $needle
8260
     * @param bool   $before_needle
8261
     * @param string $encoding
8262
     * @param bool   $cleanUtf8
8263
     *
8264
     * @return false|string
8265
     */
8266 1
    public static function strichr(
8267
        string $haystack,
8268
        string $needle,
8269
        bool $before_needle = false,
8270
        string $encoding = 'UTF-8',
8271
        bool $cleanUtf8 = false
8272
    ) {
8273 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8274
    }
8275
8276
    /**
8277
     * Create a UTF-8 string from code points.
8278
     *
8279
     * INFO: opposite to UTF8::codepoints()
8280
     *
8281
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8282
     *
8283
     * @return string UTF-8 encoded string
8284
     */
8285 4
    public static function string(array $array): string
8286
    {
8287 4
        return \implode(
8288 4
            '',
8289 4
            \array_map(
8290
                [
8291 4
                    self::class,
8292
                    'chr',
8293
                ],
8294 4
                $array
8295
            )
8296
        );
8297
    }
8298
8299
    /**
8300
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8301
     *
8302
     * @param string $str <p>The input string.</p>
8303
     *
8304
     * @return bool
8305
     *              <strong>true</strong> if the string has BOM at the start,<br>
8306
     *              <strong>false</strong> otherwise
8307
     */
8308 6
    public static function string_has_bom(string $str): bool
8309
    {
8310
        /** @noinspection PhpUnusedLocalVariableInspection */
8311 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8312 6
            if (\strpos($str, $bomString) === 0) {
8313 6
                return true;
8314
            }
8315
        }
8316
8317 6
        return false;
8318
    }
8319
8320
    /**
8321
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8322
     *
8323
     * @see http://php.net/manual/en/function.strip-tags.php
8324
     *
8325
     * @param string $str            <p>
8326
     *                               The input string.
8327
     *                               </p>
8328
     * @param string $allowable_tags [optional] <p>
8329
     *                               You can use the optional second parameter to specify tags which should
8330
     *                               not be stripped.
8331
     *                               </p>
8332
     *                               <p>
8333
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8334
     *                               can not be changed with allowable_tags.
8335
     *                               </p>
8336
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8337
     *
8338
     * @return string the stripped string
8339
     */
8340 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8341
    {
8342 4
        if ($str === '') {
8343 1
            return '';
8344
        }
8345
8346 4
        if ($cleanUtf8 === true) {
8347 2
            $str = self::clean($str);
8348
        }
8349
8350 4
        if ($allowable_tags === null) {
8351 4
            return \strip_tags($str);
8352
        }
8353
8354 2
        return \strip_tags($str, $allowable_tags);
8355
    }
8356
8357
    /**
8358
     * Strip all whitespace characters. This includes tabs and newline
8359
     * characters, as well as multibyte whitespace such as the thin space
8360
     * and ideographic space.
8361
     *
8362
     * @param string $str
8363
     *
8364
     * @return string
8365
     */
8366 36
    public static function strip_whitespace(string $str): string
8367
    {
8368 36
        if ($str === '') {
8369 3
            return '';
8370
        }
8371
8372 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8373
    }
8374
8375
    /**
8376
     * Finds position of first occurrence of a string within another, case insensitive.
8377
     *
8378
     * @see http://php.net/manual/en/function.mb-stripos.php
8379
     *
8380
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8381
     * @param string $needle    <p>The string to find in haystack.</p>
8382
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8383
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8384
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8385
     *
8386
     * @return false|int
8387
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8388
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8389
     */
8390 24
    public static function stripos(
8391
        string $haystack,
8392
        string $needle,
8393
        int $offset = 0,
8394
        $encoding = 'UTF-8',
8395
        bool $cleanUtf8 = false
8396
    ) {
8397 24
        if ($haystack === '' || $needle === '') {
8398 5
            return false;
8399
        }
8400
8401 23
        if ($cleanUtf8 === true) {
8402
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8403
            // if invalid characters are found in $haystack before $needle
8404 1
            $haystack = self::clean($haystack);
8405 1
            $needle = self::clean($needle);
8406
        }
8407
8408 23
        if (self::$SUPPORT['mbstring'] === true) {
8409 23
            if ($encoding === 'UTF-8') {
8410 23
                return \mb_stripos($haystack, $needle, $offset);
8411
            }
8412
8413 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8414
8415 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8416
        }
8417
8418 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8419
8420
        if (
8421 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8422
            &&
8423 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8424
            &&
8425 2
            self::$SUPPORT['intl'] === true
8426
        ) {
8427
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8428
            if ($returnTmp !== false) {
8429
                return $returnTmp;
8430
            }
8431
        }
8432
8433
        //
8434
        // fallback for ascii only
8435
        //
8436
8437 2
        if (self::is_ascii($haystack . $needle)) {
8438
            return \stripos($haystack, $needle, $offset);
8439
        }
8440
8441
        //
8442
        // fallback via vanilla php
8443
        //
8444
8445 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8446 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8447
8448 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8449
    }
8450
8451
    /**
8452
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8453
     *
8454
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8455
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8456
     * @param bool   $before_needle [optional] <p>
8457
     *                              If <b>TRUE</b>, it returns the part of the
8458
     *                              haystack before the first occurrence of the needle (excluding the needle).
8459
     *                              </p>
8460
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8461
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8462
     *
8463
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8464
     */
8465 12
    public static function stristr(
8466
        string $haystack,
8467
        string $needle,
8468
        bool $before_needle = false,
8469
        string $encoding = 'UTF-8',
8470
        bool $cleanUtf8 = false
8471
    ) {
8472 12
        if ($haystack === '' || $needle === '') {
8473 3
            return false;
8474
        }
8475
8476 9
        if ($cleanUtf8 === true) {
8477
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8478
            // if invalid characters are found in $haystack before $needle
8479 1
            $needle = self::clean($needle);
8480 1
            $haystack = self::clean($haystack);
8481
        }
8482
8483 9
        if (!$needle) {
8484
            return $haystack;
8485
        }
8486
8487 9
        if (self::$SUPPORT['mbstring'] === true) {
8488 9
            if ($encoding === 'UTF-8') {
8489 9
                return \mb_stristr($haystack, $needle, $before_needle);
8490
            }
8491
8492 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8493
8494 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8495
        }
8496
8497
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8498
8499
        if (
8500
            $encoding !== 'UTF-8'
8501
            &&
8502
            self::$SUPPORT['mbstring'] === false
8503
        ) {
8504
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8505
        }
8506
8507
        if (
8508
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8509
            &&
8510
            self::$SUPPORT['intl'] === true
8511
        ) {
8512
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8513
            if ($returnTmp !== false) {
8514
                return $returnTmp;
8515
            }
8516
        }
8517
8518
        if (self::is_ascii($needle . $haystack)) {
8519
            return \stristr($haystack, $needle, $before_needle);
8520
        }
8521
8522
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8523
8524
        if (!isset($match[1])) {
8525
            return false;
8526
        }
8527
8528
        if ($before_needle) {
8529
            return $match[1];
8530
        }
8531
8532
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8533
    }
8534
8535
    /**
8536
     * Get the string length, not the byte-length!
8537
     *
8538
     * @see     http://php.net/manual/en/function.mb-strlen.php
8539
     *
8540
     * @param string $str       <p>The string being checked for length.</p>
8541
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8542
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8543
     *
8544
     * @return false|int
8545
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8546
     *                   $encoding.
8547
     *                   (One multi-byte character counted as +1).
8548
     *                   <br>
8549
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8550
     *                   chars.
8551
     */
8552 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8553
    {
8554 173
        if ($str === '') {
8555 21
            return 0;
8556
        }
8557
8558 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8559 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8560
        }
8561
8562 171
        if ($cleanUtf8 === true) {
8563
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8564
            // if invalid characters are found in $str
8565 4
            $str = self::clean($str);
8566
        }
8567
8568
        //
8569
        // fallback via mbstring
8570
        //
8571
8572 171
        if (self::$SUPPORT['mbstring'] === true) {
8573 165
            if ($encoding === 'UTF-8') {
8574 165
                return \mb_strlen($str);
8575
            }
8576
8577 4
            return \mb_strlen($str, $encoding);
8578
        }
8579
8580
        //
8581
        // fallback for binary || ascii only
8582
        //
8583
8584
        if (
8585 8
            $encoding === 'CP850'
8586
            ||
8587 8
            $encoding === 'ASCII'
8588
        ) {
8589
            return \strlen($str);
8590
        }
8591
8592
        if (
8593 8
            $encoding !== 'UTF-8'
8594
            &&
8595 8
            self::$SUPPORT['mbstring'] === false
8596
            &&
8597 8
            self::$SUPPORT['iconv'] === false
8598
        ) {
8599 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8600
        }
8601
8602
        //
8603
        // fallback via iconv
8604
        //
8605
8606 8
        if (self::$SUPPORT['iconv'] === true) {
8607
            $returnTmp = \iconv_strlen($str, $encoding);
8608
            if ($returnTmp !== false) {
8609
                return $returnTmp;
8610
            }
8611
        }
8612
8613
        //
8614
        // fallback via intl
8615
        //
8616
8617
        if (
8618 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8619
            &&
8620 8
            self::$SUPPORT['intl'] === true
8621
        ) {
8622
            $returnTmp = \grapheme_strlen($str);
8623
            if ($returnTmp !== null) {
8624
                return $returnTmp;
8625
            }
8626
        }
8627
8628
        //
8629
        // fallback for ascii only
8630
        //
8631
8632 8
        if (self::is_ascii($str)) {
8633 4
            return \strlen($str);
8634
        }
8635
8636
        //
8637
        // fallback via vanilla php
8638
        //
8639
8640 8
        \preg_match_all('/./us', $str, $parts);
8641
8642 8
        $returnTmp = \count($parts[0]);
8643 8
        if ($returnTmp === 0) {
8644
            return false;
8645
        }
8646
8647 8
        return $returnTmp;
8648
    }
8649
8650
    /**
8651
     * Get string length in byte.
8652
     *
8653
     * @param string $str
8654
     *
8655
     * @return int
8656
     */
8657
    public static function strlen_in_byte(string $str): int
8658
    {
8659
        if ($str === '') {
8660
            return 0;
8661
        }
8662
8663
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8664
            // "mb_" is available if overload is used, so use it ...
8665
            return \mb_strlen($str, 'CP850'); // 8-BIT
8666
        }
8667
8668
        return \strlen($str);
8669
    }
8670
8671
    /**
8672
     * Case insensitive string comparisons using a "natural order" algorithm.
8673
     *
8674
     * INFO: natural order version of UTF8::strcasecmp()
8675
     *
8676
     * @param string $str1     <p>The first string.</p>
8677
     * @param string $str2     <p>The second string.</p>
8678
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8679
     *
8680
     * @return int
8681
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8682
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8683
     *             <strong>0</strong> if they are equal
8684
     */
8685 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8686
    {
8687 2
        return self::strnatcmp(
8688 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8689 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8690
        );
8691
    }
8692
8693
    /**
8694
     * String comparisons using a "natural order" algorithm
8695
     *
8696
     * INFO: natural order version of UTF8::strcmp()
8697
     *
8698
     * @see  http://php.net/manual/en/function.strnatcmp.php
8699
     *
8700
     * @param string $str1 <p>The first string.</p>
8701
     * @param string $str2 <p>The second string.</p>
8702
     *
8703
     * @return int
8704
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8705
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8706
     *             <strong>0</strong> if they are equal
8707
     */
8708 4
    public static function strnatcmp(string $str1, string $str2): int
8709
    {
8710 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8711
    }
8712
8713
    /**
8714
     * Case-insensitive string comparison of the first n characters.
8715
     *
8716
     * @see  http://php.net/manual/en/function.strncasecmp.php
8717
     *
8718
     * @param string $str1     <p>The first string.</p>
8719
     * @param string $str2     <p>The second string.</p>
8720
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8721
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8722
     *
8723
     * @return int
8724
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8725
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8726
     *             <strong>0</strong> if they are equal
8727
     */
8728 2
    public static function strncasecmp(
8729
        string $str1,
8730
        string $str2,
8731
        int $len,
8732
        string $encoding = 'UTF-8'
8733
    ): int {
8734 2
        return self::strncmp(
8735 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8736 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8737 2
            $len
8738
        );
8739
    }
8740
8741
    /**
8742
     * String comparison of the first n characters.
8743
     *
8744
     * @see  http://php.net/manual/en/function.strncmp.php
8745
     *
8746
     * @param string $str1     <p>The first string.</p>
8747
     * @param string $str2     <p>The second string.</p>
8748
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8749
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8750
     *
8751
     * @return int
8752
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8753
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8754
     *             <strong>0</strong> if they are equal
8755
     */
8756 4
    public static function strncmp(
8757
        string $str1,
8758
        string $str2,
8759
        int $len,
8760
        string $encoding = 'UTF-8'
8761
    ): int {
8762 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8763
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8764
        }
8765
8766 4
        if ($encoding === 'UTF-8') {
8767 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8768 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8769
        } else {
8770
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8771
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8772
        }
8773
8774 4
        return self::strcmp($str1, $str2);
8775
    }
8776
8777
    /**
8778
     * Search a string for any of a set of characters.
8779
     *
8780
     * @see  http://php.net/manual/en/function.strpbrk.php
8781
     *
8782
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8783
     * @param string $char_list <p>This parameter is case sensitive.</p>
8784
     *
8785
     * @return false|string string starting from the character found, or false if it is not found
8786
     */
8787 2
    public static function strpbrk(string $haystack, string $char_list)
8788
    {
8789 2
        if ($haystack === '' || $char_list === '') {
8790 2
            return false;
8791
        }
8792
8793 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8794 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8795
        }
8796
8797 2
        return false;
8798
    }
8799
8800
    /**
8801
     * Find position of first occurrence of string in a string.
8802
     *
8803
     * @see http://php.net/manual/en/function.mb-strpos.php
8804
     *
8805
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8806
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8807
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8808
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8809
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8810
     *
8811
     * @return false|int
8812
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8813
     *                   string.<br> If needle is not found it returns false.
8814
     */
8815 53
    public static function strpos(
8816
        string $haystack,
8817
        $needle,
8818
        int $offset = 0,
8819
        $encoding = 'UTF-8',
8820
        bool $cleanUtf8 = false
8821
    ) {
8822 53
        if ($haystack === '') {
8823 4
            return false;
8824
        }
8825
8826
        // iconv and mbstring do not support integer $needle
8827 52
        if ((int) $needle === $needle) {
8828
            $needle = (string) self::chr($needle);
8829
        }
8830 52
        $needle = (string) $needle;
8831
8832 52
        if ($needle === '') {
8833 2
            return false;
8834
        }
8835
8836 52
        if ($cleanUtf8 === true) {
8837
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8838
            // if invalid characters are found in $haystack before $needle
8839 3
            $needle = self::clean($needle);
8840 3
            $haystack = self::clean($haystack);
8841
        }
8842
8843 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8844 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8845
        }
8846
8847
        //
8848
        // fallback via mbstring
8849
        //
8850
8851 52
        if (self::$SUPPORT['mbstring'] === true) {
8852 50
            if ($encoding === 'UTF-8') {
8853 50
                return \mb_strpos($haystack, $needle, $offset);
8854
            }
8855
8856 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8857
        }
8858
8859
        //
8860
        // fallback for binary || ascii only
8861
        //
8862
        if (
8863 4
            $encoding === 'CP850'
8864
            ||
8865 4
            $encoding === 'ASCII'
8866
        ) {
8867 2
            return \strpos($haystack, $needle, $offset);
8868
        }
8869
8870
        if (
8871 4
            $encoding !== 'UTF-8'
8872
            &&
8873 4
            self::$SUPPORT['iconv'] === false
8874
            &&
8875 4
            self::$SUPPORT['mbstring'] === false
8876
        ) {
8877 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8878
        }
8879
8880
        //
8881
        // fallback via intl
8882
        //
8883
8884
        if (
8885 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8886
            &&
8887 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8888
            &&
8889 4
            self::$SUPPORT['intl'] === true
8890
        ) {
8891
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8892
            if ($returnTmp !== false) {
8893
                return $returnTmp;
8894
            }
8895
        }
8896
8897
        //
8898
        // fallback via iconv
8899
        //
8900
8901
        if (
8902 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
8903
            &&
8904 4
            self::$SUPPORT['iconv'] === true
8905
        ) {
8906
            // ignore invalid negative offset to keep compatibility
8907
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8908
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8909
            if ($returnTmp !== false) {
8910
                return $returnTmp;
8911
            }
8912
        }
8913
8914
        //
8915
        // fallback for ascii only
8916
        //
8917
8918 4
        if (self::is_ascii($haystack . $needle)) {
8919 2
            return \strpos($haystack, $needle, $offset);
8920
        }
8921
8922
        //
8923
        // fallback via vanilla php
8924
        //
8925
8926 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8927 4
        if ($haystackTmp === false) {
8928
            $haystackTmp = '';
8929
        }
8930 4
        $haystack = (string) $haystackTmp;
8931
8932 4
        if ($offset < 0) {
8933
            $offset = 0;
8934
        }
8935
8936 4
        $pos = \strpos($haystack, $needle);
8937 4
        if ($pos === false) {
8938 2
            return false;
8939
        }
8940
8941 4
        if ($pos) {
8942 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8943
        }
8944
8945 2
        return $offset + 0;
8946
    }
8947
8948
    /**
8949
     * Find position of first occurrence of string in a string.
8950
     *
8951
     * @param string $haystack <p>
8952
     *                         The string being checked.
8953
     *                         </p>
8954
     * @param string $needle   <p>
8955
     *                         The position counted from the beginning of haystack.
8956
     *                         </p>
8957
     * @param int    $offset   [optional] <p>
8958
     *                         The search offset. If it is not specified, 0 is used.
8959
     *                         </p>
8960
     *
8961
     * @return false|int The numeric position of the first occurrence of needle in the
8962
     *                   haystack string. If needle is not found, it returns false.
8963
     */
8964
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8965
    {
8966
        if ($haystack === '' || $needle === '') {
8967
            return false;
8968
        }
8969
8970
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8971
            // "mb_" is available if overload is used, so use it ...
8972
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8973
        }
8974
8975
        return \strpos($haystack, $needle, $offset);
8976
    }
8977
8978
    /**
8979
     * Finds the last occurrence of a character in a string within another.
8980
     *
8981
     * @see http://php.net/manual/en/function.mb-strrchr.php
8982
     *
8983
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8984
     * @param string $needle        <p>The string to find in haystack</p>
8985
     * @param bool   $before_needle [optional] <p>
8986
     *                              Determines which portion of haystack
8987
     *                              this function returns.
8988
     *                              If set to true, it returns all of haystack
8989
     *                              from the beginning to the last occurrence of needle.
8990
     *                              If set to false, it returns all of haystack
8991
     *                              from the last occurrence of needle to the end,
8992
     *                              </p>
8993
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8994
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8995
     *
8996
     * @return false|string the portion of haystack or false if needle is not found
8997
     */
8998 2
    public static function strrchr(
8999
        string $haystack,
9000
        string $needle,
9001
        bool $before_needle = false,
9002
        string $encoding = 'UTF-8',
9003
        bool $cleanUtf8 = false
9004
    ) {
9005 2
        if ($haystack === '' || $needle === '') {
9006 2
            return false;
9007
        }
9008
9009 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9010 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9011
        }
9012
9013 2
        if ($cleanUtf8 === true) {
9014
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9015
            // if invalid characters are found in $haystack before $needle
9016 2
            $needle = self::clean($needle);
9017 2
            $haystack = self::clean($haystack);
9018
        }
9019
9020
        //
9021
        // fallback via mbstring
9022
        //
9023
9024 2
        if (self::$SUPPORT['mbstring'] === true) {
9025 2
            if ($encoding === 'UTF-8') {
9026 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9027
            }
9028
9029 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9030
        }
9031
9032
        //
9033
        // fallback for binary || ascii only
9034
        //
9035
9036
        if (
9037
            $before_needle === false
9038
            &&
9039
            (
9040
                $encoding === 'CP850'
9041
                ||
9042
                $encoding === 'ASCII'
9043
            )
9044
        ) {
9045
            return \strrchr($haystack, $needle);
9046
        }
9047
9048
        if (
9049
            $encoding !== 'UTF-8'
9050
            &&
9051
            self::$SUPPORT['mbstring'] === false
9052
        ) {
9053
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9054
        }
9055
9056
        //
9057
        // fallback via iconv
9058
        //
9059
9060
        if (self::$SUPPORT['iconv'] === true) {
9061
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9062
            if ($needleTmp === false) {
9063
                return false;
9064
            }
9065
            $needle = (string) $needleTmp;
9066
9067
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9068
            if ($pos === false) {
9069
                return false;
9070
            }
9071
9072
            if ($before_needle) {
9073
                return self::substr($haystack, 0, $pos, $encoding);
9074
            }
9075
9076
            return self::substr($haystack, $pos, null, $encoding);
9077
        }
9078
9079
        //
9080
        // fallback via vanilla php
9081
        //
9082
9083
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9084
        if ($needleTmp === false) {
9085
            return false;
9086
        }
9087
        $needle = (string) $needleTmp;
9088
9089
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9090
        if ($pos === false) {
9091
            return false;
9092
        }
9093
9094
        if ($before_needle) {
9095
            return self::substr($haystack, 0, $pos, $encoding);
9096
        }
9097
9098
        return self::substr($haystack, $pos, null, $encoding);
9099
    }
9100
9101
    /**
9102
     * Reverses characters order in the string.
9103
     *
9104
     * @param string $str      <p>The input string.</p>
9105
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9106
     *
9107
     * @return string the string with characters in the reverse sequence
9108
     */
9109 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9110
    {
9111 10
        if ($str === '') {
9112 4
            return '';
9113
        }
9114
9115
        // init
9116 8
        $reversed = '';
9117
9118 8
        if ($encoding === 'UTF-8') {
9119 8
            $i = (int) \mb_strlen($str);
9120 8
            while ($i--) {
9121 8
                $reversedTmp = \mb_substr($str, $i, 1);
9122 8
                if ($reversedTmp !== false) {
9123 8
                    $reversed .= $reversedTmp;
9124
                }
9125
            }
9126
        } else {
9127
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9128
9129
            $i = (int) self::strlen($str, $encoding);
9130
            while ($i--) {
9131
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9132
                if ($reversedTmp !== false) {
9133
                    $reversed .= $reversedTmp;
9134
                }
9135
            }
9136
        }
9137
9138 8
        return $reversed;
9139
    }
9140
9141
    /**
9142
     * Finds the last occurrence of a character in a string within another, case insensitive.
9143
     *
9144
     * @see http://php.net/manual/en/function.mb-strrichr.php
9145
     *
9146
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9147
     * @param string $needle        <p>The string to find in haystack.</p>
9148
     * @param bool   $before_needle [optional] <p>
9149
     *                              Determines which portion of haystack
9150
     *                              this function returns.
9151
     *                              If set to true, it returns all of haystack
9152
     *                              from the beginning to the last occurrence of needle.
9153
     *                              If set to false, it returns all of haystack
9154
     *                              from the last occurrence of needle to the end,
9155
     *                              </p>
9156
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9157
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9158
     *
9159
     * @return false|string the portion of haystack or<br>false if needle is not found
9160
     */
9161 3
    public static function strrichr(
9162
        string $haystack,
9163
        string $needle,
9164
        bool $before_needle = false,
9165
        string $encoding = 'UTF-8',
9166
        bool $cleanUtf8 = false
9167
    ) {
9168 3
        if ($haystack === '' || $needle === '') {
9169 2
            return false;
9170
        }
9171
9172 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9173 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9174
        }
9175
9176 3
        if ($cleanUtf8 === true) {
9177
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9178
            // if invalid characters are found in $haystack before $needle
9179 2
            $needle = self::clean($needle);
9180 2
            $haystack = self::clean($haystack);
9181
        }
9182
9183
        //
9184
        // fallback via mbstring
9185
        //
9186
9187 3
        if (self::$SUPPORT['mbstring'] === true) {
9188 3
            if ($encoding === 'UTF-8') {
9189 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9190
            }
9191
9192 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9193
        }
9194
9195
        //
9196
        // fallback via vanilla php
9197
        //
9198
9199
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9200
        if ($needleTmp === false) {
9201
            return false;
9202
        }
9203
        $needle = (string) $needleTmp;
9204
9205
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9206
        if ($pos === false) {
9207
            return false;
9208
        }
9209
9210
        if ($before_needle) {
9211
            return self::substr($haystack, 0, $pos, $encoding);
9212
        }
9213
9214
        return self::substr($haystack, $pos, null, $encoding);
9215
    }
9216
9217
    /**
9218
     * Find position of last occurrence of a case-insensitive string.
9219
     *
9220
     * @param string     $haystack  <p>The string to look in.</p>
9221
     * @param int|string $needle    <p>The string to look for.</p>
9222
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9223
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9224
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9225
     *
9226
     * @return false|int
9227
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9228
     *                   string.<br>If needle is not found, it returns false.
9229
     */
9230 3
    public static function strripos(
9231
        string $haystack,
9232
        $needle,
9233
        int $offset = 0,
9234
        string $encoding = 'UTF-8',
9235
        bool $cleanUtf8 = false
9236
    ) {
9237 3
        if ($haystack === '') {
9238
            return false;
9239
        }
9240
9241
        // iconv and mbstring do not support integer $needle
9242 3
        if ((int) $needle === $needle && $needle >= 0) {
9243
            $needle = (string) self::chr($needle);
9244
        }
9245 3
        $needle = (string) $needle;
9246
9247 3
        if ($needle === '') {
9248
            return false;
9249
        }
9250
9251 3
        if ($cleanUtf8 === true) {
9252
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9253 2
            $needle = self::clean($needle);
9254 2
            $haystack = self::clean($haystack);
9255
        }
9256
9257 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9258 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9259
        }
9260
9261
        //
9262
        // fallback via mbstrig
9263
        //
9264
9265 3
        if (self::$SUPPORT['mbstring'] === true) {
9266 3
            if ($encoding === 'UTF-8') {
9267 3
                return \mb_strripos($haystack, $needle, $offset);
9268
            }
9269
9270
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9271
        }
9272
9273
        //
9274
        // fallback for binary || ascii only
9275
        //
9276
9277
        if (
9278
            $encoding === 'CP850'
9279
            ||
9280
            $encoding === 'ASCII'
9281
        ) {
9282
            return \strripos($haystack, $needle, $offset);
9283
        }
9284
9285
        if (
9286
            $encoding !== 'UTF-8'
9287
            &&
9288
            self::$SUPPORT['mbstring'] === false
9289
        ) {
9290
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9291
        }
9292
9293
        //
9294
        // fallback via intl
9295
        //
9296
9297
        if (
9298
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9299
            &&
9300
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9301
            &&
9302
            self::$SUPPORT['intl'] === true
9303
        ) {
9304
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9305
            if ($returnTmp !== false) {
9306
                return $returnTmp;
9307
            }
9308
        }
9309
9310
        //
9311
        // fallback for ascii only
9312
        //
9313
9314
        if (self::is_ascii($haystack . $needle)) {
9315
            return \strripos($haystack, $needle, $offset);
9316
        }
9317
9318
        //
9319
        // fallback via vanilla php
9320
        //
9321
9322
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9323
        $needle = self::strtocasefold($needle, true, false, $encoding);
9324
9325
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9326
    }
9327
9328
    /**
9329
     * Finds position of last occurrence of a string within another, case insensitive.
9330
     *
9331
     * @param string $haystack <p>
9332
     *                         The string from which to get the position of the last occurrence
9333
     *                         of needle.
9334
     *                         </p>
9335
     * @param string $needle   <p>
9336
     *                         The string to find in haystack.
9337
     *                         </p>
9338
     * @param int    $offset   [optional] <p>
9339
     *                         The position in haystack
9340
     *                         to start searching.
9341
     *                         </p>
9342
     *
9343
     * @return false|int return the numeric position of the last occurrence of needle in the
9344
     *                   haystack string, or false if needle is not found
9345
     */
9346
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9347
    {
9348
        if ($haystack === '' || $needle === '') {
9349
            return false;
9350
        }
9351
9352
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9353
            // "mb_" is available if overload is used, so use it ...
9354
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9355
        }
9356
9357
        return \strripos($haystack, $needle, $offset);
9358
    }
9359
9360
    /**
9361
     * Find position of last occurrence of a string in a string.
9362
     *
9363
     * @see http://php.net/manual/en/function.mb-strrpos.php
9364
     *
9365
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9366
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9367
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9368
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9369
     *                              the end of the string.
9370
     *                              </p>
9371
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9372
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9373
     *
9374
     * @return false|int
9375
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9376
     *                   string.<br>If needle is not found, it returns false.
9377
     */
9378 35
    public static function strrpos(
9379
        string $haystack,
9380
        $needle,
9381
        int $offset = 0,
9382
        string $encoding = 'UTF-8',
9383
        bool $cleanUtf8 = false
9384
    ) {
9385 35
        if ($haystack === '') {
9386 3
            return false;
9387
        }
9388
9389
        // iconv and mbstring do not support integer $needle
9390 34
        if ((int) $needle === $needle && $needle >= 0) {
9391 2
            $needle = (string) self::chr($needle);
9392
        }
9393 34
        $needle = (string) $needle;
9394
9395 34
        if ($needle === '' || $haystack === '') {
9396 2
            return false;
9397
        }
9398
9399 34
        if ($cleanUtf8 === true) {
9400
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9401 4
            $needle = self::clean($needle);
9402 4
            $haystack = self::clean($haystack);
9403
        }
9404
9405 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9406 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9407
        }
9408
9409
        //
9410
        // fallback via mbstring
9411
        //
9412
9413 34
        if (self::$SUPPORT['mbstring'] === true) {
9414 34
            if ($encoding === 'UTF-8') {
9415 34
                return \mb_strrpos($haystack, $needle, $offset);
9416
            }
9417
9418 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9419
        }
9420
9421
        //
9422
        // fallback for binary || ascii only
9423
        //
9424
9425
        if (
9426
            $encoding === 'CP850'
9427
            ||
9428
            $encoding === 'ASCII'
9429
        ) {
9430
            return \strrpos($haystack, $needle, $offset);
9431
        }
9432
9433
        if (
9434
            $encoding !== 'UTF-8'
9435
            &&
9436
            self::$SUPPORT['mbstring'] === false
9437
        ) {
9438
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9439
        }
9440
9441
        //
9442
        // fallback via intl
9443
        //
9444
9445
        if (
9446
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9447
            &&
9448
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9449
            &&
9450
            self::$SUPPORT['intl'] === true
9451
        ) {
9452
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9453
            if ($returnTmp !== false) {
9454
                return $returnTmp;
9455
            }
9456
        }
9457
9458
        //
9459
        // fallback for ascii only
9460
        //
9461
9462
        if (self::is_ascii($haystack . $needle)) {
9463
            return \strrpos($haystack, $needle, $offset);
9464
        }
9465
9466
        //
9467
        // fallback via vanilla php
9468
        //
9469
9470
        $haystackTmp = null;
9471
        if ($offset > 0) {
9472
            $haystackTmp = self::substr($haystack, $offset);
9473
        } elseif ($offset < 0) {
9474
            $haystackTmp = self::substr($haystack, 0, $offset);
9475
            $offset = 0;
9476
        }
9477
9478
        if ($haystackTmp !== null) {
9479
            if ($haystackTmp === false) {
9480
                $haystackTmp = '';
9481
            }
9482
            $haystack = (string) $haystackTmp;
9483
        }
9484
9485
        $pos = \strrpos($haystack, $needle);
9486
        if ($pos === false) {
9487
            return false;
9488
        }
9489
9490
        $strTmp = \substr($haystack, 0, $pos);
9491
        if ($strTmp === false) {
9492
            return false;
9493
        }
9494
9495
        return $offset + (int) self::strlen($strTmp);
9496
    }
9497
9498
    /**
9499
     * Find position of last occurrence of a string in a string.
9500
     *
9501
     * @param string $haystack <p>
9502
     *                         The string being checked, for the last occurrence
9503
     *                         of needle.
9504
     *                         </p>
9505
     * @param string $needle   <p>
9506
     *                         The string to find in haystack.
9507
     *                         </p>
9508
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9509
     *                         the string. Negative values will stop searching at an arbitrary point
9510
     *                         prior to the end of the string.
9511
     *
9512
     * @return false|int The numeric position of the last occurrence of needle in the
9513
     *                   haystack string. If needle is not found, it returns false.
9514
     */
9515
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9516
    {
9517
        if ($haystack === '' || $needle === '') {
9518
            return false;
9519
        }
9520
9521
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9522
            // "mb_" is available if overload is used, so use it ...
9523
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9524
        }
9525
9526
        return \strrpos($haystack, $needle, $offset);
9527
    }
9528
9529
    /**
9530
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9531
     * mask.
9532
     *
9533
     * @param string $str      <p>The input string.</p>
9534
     * @param string $mask     <p>The mask of chars</p>
9535
     * @param int    $offset   [optional]
9536
     * @param int    $length   [optional]
9537
     * @param string $encoding [optional] <p>Set the charset.</p>
9538
     *
9539
     * @return false|int
9540
     */
9541 10
    public static function strspn(
9542
        string $str,
9543
        string $mask,
9544
        int $offset = 0,
9545
        int $length = null,
9546
        string $encoding = 'UTF-8'
9547
    ) {
9548 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9549
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9550
        }
9551
9552 10
        if ($offset || $length !== null) {
9553 2
            if ($encoding === 'UTF-8') {
9554 2
                if ($length === null) {
9555
                    $str = (string) \mb_substr($str, $offset);
9556
                } else {
9557 2
                    $str = (string) \mb_substr($str, $offset, $length);
9558
                }
9559
            } else {
9560
                $str = (string) self::substr($str, $offset, $length, $encoding);
9561
            }
9562
        }
9563
9564 10
        if ($str === '' || $mask === '') {
9565 2
            return 0;
9566
        }
9567
9568 8
        $matches = [];
9569
9570 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9571
    }
9572
9573
    /**
9574
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9575
     *
9576
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9577
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9578
     * @param bool   $before_needle [optional] <p>
9579
     *                              If <b>TRUE</b>, strstr() returns the part of the
9580
     *                              haystack before the first occurrence of the needle (excluding the needle).
9581
     *                              </p>
9582
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9583
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9584
     *
9585
     * @return false|string
9586
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9587
     */
9588 3
    public static function strstr(
9589
        string $haystack,
9590
        string $needle,
9591
        bool $before_needle = false,
9592
        string $encoding = 'UTF-8',
9593
        $cleanUtf8 = false
9594
    ) {
9595 3
        if ($haystack === '' || $needle === '') {
9596 2
            return false;
9597
        }
9598
9599 3
        if ($cleanUtf8 === true) {
9600
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9601
            // if invalid characters are found in $haystack before $needle
9602
            $needle = self::clean($needle);
9603
            $haystack = self::clean($haystack);
9604
        }
9605
9606 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9607 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9608
        }
9609
9610
        //
9611
        // fallback via mbstring
9612
        //
9613
9614 3
        if (self::$SUPPORT['mbstring'] === true) {
9615 3
            if ($encoding === 'UTF-8') {
9616 3
                return \mb_strstr($haystack, $needle, $before_needle);
9617
            }
9618
9619 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9620
        }
9621
9622
        //
9623
        // fallback for binary || ascii only
9624
        //
9625
9626
        if (
9627
            $encoding === 'CP850'
9628
            ||
9629
            $encoding === 'ASCII'
9630
        ) {
9631
            return \strstr($haystack, $needle, $before_needle);
9632
        }
9633
9634
        if (
9635
            $encoding !== 'UTF-8'
9636
            &&
9637
            self::$SUPPORT['mbstring'] === false
9638
        ) {
9639
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9640
        }
9641
9642
        //
9643
        // fallback via intl
9644
        //
9645
9646
        if (
9647
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9648
            &&
9649
            self::$SUPPORT['intl'] === true
9650
        ) {
9651
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9652
            if ($returnTmp !== false) {
9653
                return $returnTmp;
9654
            }
9655
        }
9656
9657
        //
9658
        // fallback for ascii only
9659
        //
9660
9661
        if (self::is_ascii($haystack . $needle)) {
9662
            return \strstr($haystack, $needle, $before_needle);
9663
        }
9664
9665
        //
9666
        // fallback via vanilla php
9667
        //
9668
9669
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9670
9671
        if (!isset($match[1])) {
9672
            return false;
9673
        }
9674
9675
        if ($before_needle) {
9676
            return $match[1];
9677
        }
9678
9679
        return self::substr($haystack, (int) self::strlen($match[1]));
9680
    }
9681
9682
    /**
9683
     *  * Finds first occurrence of a string within another.
9684
     *
9685
     * @param string $haystack      <p>
9686
     *                              The string from which to get the first occurrence
9687
     *                              of needle.
9688
     *                              </p>
9689
     * @param string $needle        <p>
9690
     *                              The string to find in haystack.
9691
     *                              </p>
9692
     * @param bool   $before_needle [optional] <p>
9693
     *                              Determines which portion of haystack
9694
     *                              this function returns.
9695
     *                              If set to true, it returns all of haystack
9696
     *                              from the beginning to the first occurrence of needle.
9697
     *                              If set to false, it returns all of haystack
9698
     *                              from the first occurrence of needle to the end,
9699
     *                              </p>
9700
     *
9701
     * @return false|string the portion of haystack,
9702
     *                      or false if needle is not found
9703
     */
9704
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9705
    {
9706
        if ($haystack === '' || $needle === '') {
9707
            return false;
9708
        }
9709
9710
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9711
            // "mb_" is available if overload is used, so use it ...
9712
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9713
        }
9714
9715
        return \strstr($haystack, $needle, $before_needle);
9716
    }
9717
9718
    /**
9719
     * Unicode transformation for case-less matching.
9720
     *
9721
     * @see http://unicode.org/reports/tr21/tr21-5.html
9722
     *
9723
     * @param string      $str       <p>The input string.</p>
9724
     * @param bool        $full      [optional] <p>
9725
     *                               <b>true</b>, replace full case folding chars (default)<br>
9726
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9727
     *                               </p>
9728
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9729
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9730
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9731
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9732
     *                               is for some languages better ...</p>
9733
     *
9734
     * @return string
9735
     */
9736 32
    public static function strtocasefold(
9737
        string $str,
9738
        bool $full = true,
9739
        bool $cleanUtf8 = false,
9740
        string $encoding = 'UTF-8',
9741
        string $lang = null,
9742
        $lower = true
9743
    ): string {
9744 32
        if ($str === '') {
9745 5
            return '';
9746
        }
9747
9748 31
        if ($cleanUtf8 === true) {
9749
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9750
            // if invalid characters are found in $haystack before $needle
9751 2
            $str = self::clean($str);
9752
        }
9753
9754 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9755
9756 31
        if ($lang === null && $encoding === 'UTF-8') {
9757 31
            if ($lower === true) {
9758 2
                return \mb_strtolower($str);
9759
            }
9760
9761 29
            return \mb_strtoupper($str);
9762
        }
9763
9764 2
        if ($lower === true) {
9765
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9766
        }
9767
9768 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9769
    }
9770
9771
    /**
9772
     * Make a string lowercase.
9773
     *
9774
     * @see http://php.net/manual/en/function.mb-strtolower.php
9775
     *
9776
     * @param string      $str                   <p>The string being lowercased.</p>
9777
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9778
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9779
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9780
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9781
     *
9782
     * @return string
9783
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9784
     */
9785 73
    public static function strtolower(
9786
        $str,
9787
        string $encoding = 'UTF-8',
9788
        bool $cleanUtf8 = false,
9789
        string $lang = null,
9790
        bool $tryToKeepStringLength = false
9791
    ): string {
9792
        // init
9793 73
        $str = (string) $str;
9794
9795 73
        if ($str === '') {
9796 1
            return '';
9797
        }
9798
9799 72
        if ($cleanUtf8 === true) {
9800
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9801
            // if invalid characters are found in $haystack before $needle
9802 2
            $str = self::clean($str);
9803
        }
9804
9805
        // hack for old php version or for the polyfill ...
9806 72
        if ($tryToKeepStringLength === true) {
9807
            $str = self::fixStrCaseHelper($str, true);
9808
        }
9809
9810 72
        if ($lang === null && $encoding === 'UTF-8') {
9811 13
            return \mb_strtolower($str);
9812
        }
9813
9814 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9815
9816 61
        if ($lang !== null) {
9817 2
            if (self::$SUPPORT['intl'] === true) {
9818 2
                $langCode = $lang . '-Lower';
9819 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9820
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9821
9822
                    $langCode = 'Any-Lower';
9823
                }
9824
9825
                /** @noinspection PhpComposerExtensionStubsInspection */
9826
                /** @noinspection UnnecessaryCastingInspection */
9827 2
                return (string) \transliterator_transliterate($langCode, $str);
9828
            }
9829
9830
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9831
        }
9832
9833
        // always fallback via symfony polyfill
9834 61
        return \mb_strtolower($str, $encoding);
9835
    }
9836
9837
    /**
9838
     * Make a string uppercase.
9839
     *
9840
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9841
     *
9842
     * @param string      $str                   <p>The string being uppercased.</p>
9843
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9844
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9845
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9846
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9847
     *
9848
     * @return string
9849
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9850
     */
9851 17
    public static function strtoupper(
9852
        $str,
9853
        string $encoding = 'UTF-8',
9854
        bool $cleanUtf8 = false,
9855
        string $lang = null,
9856
        bool $tryToKeepStringLength = false
9857
    ): string {
9858
        // init
9859 17
        $str = (string) $str;
9860
9861 17
        if ($str === '') {
9862 1
            return '';
9863
        }
9864
9865 16
        if ($cleanUtf8 === true) {
9866
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9867
            // if invalid characters are found in $haystack before $needle
9868 2
            $str = self::clean($str);
9869
        }
9870
9871
        // hack for old php version or for the polyfill ...
9872 16
        if ($tryToKeepStringLength === true) {
9873 2
            $str = self::fixStrCaseHelper($str, false);
9874
        }
9875
9876 16
        if ($lang === null && $encoding === 'UTF-8') {
9877 8
            return \mb_strtoupper($str);
9878
        }
9879
9880 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9881
9882 10
        if ($lang !== null) {
9883 2
            if (self::$SUPPORT['intl'] === true) {
9884 2
                $langCode = $lang . '-Upper';
9885 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9886
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9887
9888
                    $langCode = 'Any-Upper';
9889
                }
9890
9891
                /** @noinspection PhpComposerExtensionStubsInspection */
9892
                /** @noinspection UnnecessaryCastingInspection */
9893 2
                return (string) \transliterator_transliterate($langCode, $str);
9894
            }
9895
9896
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9897
        }
9898
9899
        // always fallback via symfony polyfill
9900 10
        return \mb_strtoupper($str, $encoding);
9901
    }
9902
9903
    /**
9904
     * Translate characters or replace sub-strings.
9905
     *
9906
     * @see  http://php.net/manual/en/function.strtr.php
9907
     *
9908
     * @param string          $str  <p>The string being translated.</p>
9909
     * @param string|string[] $from <p>The string replacing from.</p>
9910
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
9911
     *
9912
     * @return string
9913
     *                This function returns a copy of str, translating all occurrences of each character in from to the
9914
     *                corresponding character in to
9915
     */
9916 2
    public static function strtr(string $str, $from, $to = ''): string
9917
    {
9918 2
        if ($str === '') {
9919
            return '';
9920
        }
9921
9922 2
        if ($from === $to) {
9923
            return $str;
9924
        }
9925
9926 2
        if ($to !== '') {
9927 2
            $from = self::str_split($from);
9928 2
            $to = self::str_split($to);
9929 2
            $countFrom = \count($from);
9930 2
            $countTo = \count($to);
9931
9932 2
            if ($countFrom > $countTo) {
9933 2
                $from = \array_slice($from, 0, $countTo);
9934 2
            } elseif ($countFrom < $countTo) {
9935 2
                $to = \array_slice($to, 0, $countFrom);
9936
            }
9937
9938 2
            $from = \array_combine($from, $to);
9939 2
            if ($from === false) {
9940
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
9941
            }
9942
        }
9943
9944 2
        if (\is_string($from)) {
9945 2
            return \str_replace($from, '', $str);
9946
        }
9947
9948 2
        return \strtr($str, $from);
9949
    }
9950
9951
    /**
9952
     * Return the width of a string.
9953
     *
9954
     * @param string $str       <p>The input string.</p>
9955
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9956
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9957
     *
9958
     * @return int
9959
     */
9960 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9961
    {
9962 2
        if ($str === '') {
9963 2
            return 0;
9964
        }
9965
9966 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9967 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9968
        }
9969
9970 2
        if ($cleanUtf8 === true) {
9971
            // iconv and mbstring are not tolerant to invalid encoding
9972
            // further, their behaviour is inconsistent with that of PHP's substr
9973 2
            $str = self::clean($str);
9974
        }
9975
9976
        //
9977
        // fallback via mbstring
9978
        //
9979
9980 2
        if (self::$SUPPORT['mbstring'] === true) {
9981 2
            if ($encoding === 'UTF-8') {
9982 2
                return \mb_strwidth($str);
9983
            }
9984
9985
            return \mb_strwidth($str, $encoding);
9986
        }
9987
9988
        //
9989
        // fallback via vanilla php
9990
        //
9991
9992
        if ($encoding !== 'UTF-8') {
9993
            $str = self::encode('UTF-8', $str, false, $encoding);
9994
        }
9995
9996
        $wide = 0;
9997
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9998
9999
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10000
    }
10001
10002
    /**
10003
     * Get part of a string.
10004
     *
10005
     * @see http://php.net/manual/en/function.mb-substr.php
10006
     *
10007
     * @param string $str       <p>The string being checked.</p>
10008
     * @param int    $offset    <p>The first position used in str.</p>
10009
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10010
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10011
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10012
     *
10013
     * @return false|string
10014
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10015
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10016
     *                      characters long, <b>FALSE</b> will be returned.
10017
     */
10018 172
    public static function substr(
10019
        string $str,
10020
        int $offset = 0,
10021
        int $length = null,
10022
        string $encoding = 'UTF-8',
10023
        bool $cleanUtf8 = false
10024
    ) {
10025
        // empty string
10026 172
        if ($str === '' || $length === 0) {
10027 8
            return '';
10028
        }
10029
10030 168
        if ($cleanUtf8 === true) {
10031
            // iconv and mbstring are not tolerant to invalid encoding
10032
            // further, their behaviour is inconsistent with that of PHP's substr
10033 2
            $str = self::clean($str);
10034
        }
10035
10036
        // whole string
10037 168
        if (!$offset && $length === null) {
10038 7
            return $str;
10039
        }
10040
10041 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10042 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10043
        }
10044
10045
        //
10046
        // fallback via mbstring
10047
        //
10048
10049 163
        if (self::$SUPPORT['mbstring'] === true) {
10050 161
            if ($encoding === 'UTF-8') {
10051 161
                if ($length === null) {
10052 64
                    return \mb_substr($str, $offset);
10053
                }
10054
10055 102
                return \mb_substr($str, $offset, $length);
10056
            }
10057
10058
            return self::substr($str, $offset, $length, $encoding);
10059
        }
10060
10061
        //
10062
        // fallback for binary || ascii only
10063
        //
10064
10065
        if (
10066 4
            $encoding === 'CP850'
10067
            ||
10068 4
            $encoding === 'ASCII'
10069
        ) {
10070
            if ($length === null) {
10071
                return \substr($str, $offset);
10072
            }
10073
10074
            return \substr($str, $offset, $length);
10075
        }
10076
10077
        // otherwise we need the string-length
10078 4
        $str_length = 0;
10079 4
        if ($offset || $length === null) {
10080 4
            $str_length = self::strlen($str, $encoding);
10081
        }
10082
10083
        // e.g.: invalid chars + mbstring not installed
10084 4
        if ($str_length === false) {
10085
            return false;
10086
        }
10087
10088
        // empty string
10089 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10090
            return '';
10091
        }
10092
10093
        // impossible
10094 4
        if ($offset && $offset > $str_length) {
10095
            return '';
10096
        }
10097
10098 4
        if ($length === null) {
10099 4
            $length = (int) $str_length;
10100
        } else {
10101 2
            $length = (int) $length;
10102
        }
10103
10104
        if (
10105 4
            $encoding !== 'UTF-8'
10106
            &&
10107 4
            self::$SUPPORT['mbstring'] === false
10108
        ) {
10109 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10110
        }
10111
10112
        //
10113
        // fallback via intl
10114
        //
10115
10116
        if (
10117 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10118
            &&
10119 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10120
            &&
10121 4
            self::$SUPPORT['intl'] === true
10122
        ) {
10123
            $returnTmp = \grapheme_substr($str, $offset, $length);
10124
            if ($returnTmp !== false) {
10125
                return $returnTmp;
10126
            }
10127
        }
10128
10129
        //
10130
        // fallback via iconv
10131
        //
10132
10133
        if (
10134 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10135
            &&
10136 4
            self::$SUPPORT['iconv'] === true
10137
        ) {
10138
            $returnTmp = \iconv_substr($str, $offset, $length);
10139
            if ($returnTmp !== false) {
10140
                return $returnTmp;
10141
            }
10142
        }
10143
10144
        //
10145
        // fallback for ascii only
10146
        //
10147
10148 4
        if (self::is_ascii($str)) {
10149
            return \substr($str, $offset, $length);
10150
        }
10151
10152
        //
10153
        // fallback via vanilla php
10154
        //
10155
10156
        // split to array, and remove invalid characters
10157 4
        $array = self::str_split($str);
10158
10159
        // extract relevant part, and join to make sting again
10160 4
        return \implode('', \array_slice($array, $offset, $length));
10161
    }
10162
10163
    /**
10164
     * Binary safe comparison of two strings from an offset, up to length characters.
10165
     *
10166
     * @param string   $str1               <p>The main string being compared.</p>
10167
     * @param string   $str2               <p>The secondary string being compared.</p>
10168
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10169
     *                                     counting from the end of the string.</p>
10170
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10171
     *                                     of the length of the str compared to the length of main_str less the
10172
     *                                     offset.</p>
10173
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10174
     *                                     insensitive.</p>
10175
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10176
     *
10177
     * @return int
10178
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10179
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10180
     *             <strong>0</strong> if they are equal
10181
     */
10182 2
    public static function substr_compare(
10183
        string $str1,
10184
        string $str2,
10185
        int $offset = 0,
10186
        int $length = null,
10187
        bool $case_insensitivity = false,
10188
        string $encoding = 'UTF-8'
10189
    ): int {
10190
        if (
10191 2
            $offset !== 0
10192
            ||
10193 2
            $length !== null
10194
        ) {
10195 2
            if ($encoding === 'UTF-8') {
10196 2
                if ($length === null) {
10197 2
                    $str1 = (string) \mb_substr($str1, $offset);
10198
                } else {
10199 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10200
                }
10201 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10202
            } else {
10203
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10204
10205
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10206
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10207
            }
10208
        }
10209
10210 2
        if ($case_insensitivity === true) {
10211 2
            return self::strcasecmp($str1, $str2, $encoding);
10212
        }
10213
10214 2
        return self::strcmp($str1, $str2);
10215
    }
10216
10217
    /**
10218
     * Count the number of substring occurrences.
10219
     *
10220
     * @see  http://php.net/manual/en/function.substr-count.php
10221
     *
10222
     * @param string $haystack  <p>The string to search in.</p>
10223
     * @param string $needle    <p>The substring to search for.</p>
10224
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10225
     * @param int    $length    [optional] <p>
10226
     *                          The maximum length after the specified offset to search for the
10227
     *                          substring. It outputs a warning if the offset plus the length is
10228
     *                          greater than the haystack length.
10229
     *                          </p>
10230
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10231
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10232
     *
10233
     * @return false|int this functions returns an integer or false if there isn't a string
10234
     */
10235 5
    public static function substr_count(
10236
        string $haystack,
10237
        string $needle,
10238
        int $offset = 0,
10239
        int $length = null,
10240
        string $encoding = 'UTF-8',
10241
        bool $cleanUtf8 = false
10242
    ) {
10243 5
        if ($haystack === '' || $needle === '') {
10244 2
            return false;
10245
        }
10246
10247 5
        if ($length === 0) {
10248 2
            return 0;
10249
        }
10250
10251 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10252 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10253
        }
10254
10255 5
        if ($cleanUtf8 === true) {
10256
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10257
            // if invalid characters are found in $haystack before $needle
10258
            $needle = self::clean($needle);
10259
            $haystack = self::clean($haystack);
10260
        }
10261
10262 5
        if ($offset || $length > 0) {
10263 2
            if ($length === null) {
10264 2
                $lengthTmp = self::strlen($haystack, $encoding);
10265 2
                if ($lengthTmp === false) {
10266
                    return false;
10267
                }
10268 2
                $length = (int) $lengthTmp;
10269
            }
10270
10271 2
            if ($encoding === 'UTF-8') {
10272 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10273
            } else {
10274 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10275
            }
10276
        }
10277
10278
        if (
10279 5
            $encoding !== 'UTF-8'
10280
            &&
10281 5
            self::$SUPPORT['mbstring'] === false
10282
        ) {
10283
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10284
        }
10285
10286 5
        if (self::$SUPPORT['mbstring'] === true) {
10287 5
            if ($encoding === 'UTF-8') {
10288 5
                return \mb_substr_count($haystack, $needle);
10289
            }
10290
10291 2
            return \mb_substr_count($haystack, $needle, $encoding);
10292
        }
10293
10294
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10295
10296
        return \count($matches);
10297
    }
10298
10299
    /**
10300
     * Count the number of substring occurrences.
10301
     *
10302
     * @param string $haystack <p>
10303
     *                         The string being checked.
10304
     *                         </p>
10305
     * @param string $needle   <p>
10306
     *                         The string being found.
10307
     *                         </p>
10308
     * @param int    $offset   [optional] <p>
10309
     *                         The offset where to start counting
10310
     *                         </p>
10311
     * @param int    $length   [optional] <p>
10312
     *                         The maximum length after the specified offset to search for the
10313
     *                         substring. It outputs a warning if the offset plus the length is
10314
     *                         greater than the haystack length.
10315
     *                         </p>
10316
     *
10317
     * @return false|int the number of times the
10318
     *                   needle substring occurs in the
10319
     *                   haystack string
10320
     */
10321
    public static function substr_count_in_byte(
10322
        string $haystack,
10323
        string $needle,
10324
        int $offset = 0,
10325
        int $length = null
10326
    ) {
10327
        if ($haystack === '' || $needle === '') {
10328
            return 0;
10329
        }
10330
10331
        if (
10332
            ($offset || $length !== null)
10333
            &&
10334
            self::$SUPPORT['mbstring_func_overload'] === true
10335
        ) {
10336
            if ($length === null) {
10337
                $lengthTmp = self::strlen($haystack);
10338
                if ($lengthTmp === false) {
10339
                    return false;
10340
                }
10341
                $length = (int) $lengthTmp;
10342
            }
10343
10344
            if (
10345
                (
10346
                    $length !== 0
10347
                    &&
10348
                    $offset !== 0
10349
                )
10350
                &&
10351
                ($length + $offset) <= 0
10352
                &&
10353
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10354
            ) {
10355
                return false;
10356
            }
10357
10358
            $haystackTmp = \substr($haystack, $offset, $length);
10359
            if ($haystackTmp === false) {
10360
                $haystackTmp = '';
10361
            }
10362
            $haystack = (string) $haystackTmp;
10363
        }
10364
10365
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10366
            // "mb_" is available if overload is used, so use it ...
10367
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10368
        }
10369
10370
        if ($length === null) {
10371
            return \substr_count($haystack, $needle, $offset);
10372
        }
10373
10374
        return \substr_count($haystack, $needle, $offset, $length);
10375
    }
10376
10377
    /**
10378
     * Returns the number of occurrences of $substring in the given string.
10379
     * By default, the comparison is case-sensitive, but can be made insensitive
10380
     * by setting $caseSensitive to false.
10381
     *
10382
     * @param string $str           <p>The input string.</p>
10383
     * @param string $substring     <p>The substring to search for.</p>
10384
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10385
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10386
     *
10387
     * @return int
10388
     */
10389 15
    public static function substr_count_simple(
10390
        string $str,
10391
        string $substring,
10392
        bool $caseSensitive = true,
10393
        string $encoding = 'UTF-8'
10394
    ): int {
10395 15
        if ($str === '' || $substring === '') {
10396 2
            return 0;
10397
        }
10398
10399 13
        if ($encoding === 'UTF-8') {
10400 7
            if ($caseSensitive) {
10401
                return (int) \mb_substr_count($str, $substring);
10402
            }
10403
10404 7
            return (int) \mb_substr_count(
10405 7
                \mb_strtoupper($str),
10406 7
                \mb_strtoupper($substring)
10407
10408
            );
10409
        }
10410
10411 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10412
10413 6
        if ($caseSensitive) {
10414 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10415
        }
10416
10417 3
        return (int) \mb_substr_count(
10418 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10419 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10420 3
            $encoding
10421
        );
10422
    }
10423
10424
    /**
10425
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10426
     *
10427
     * @param string $haystack <p>The string to search in.</p>
10428
     * @param string $needle   <p>The substring to search for.</p>
10429
     *
10430
     * @return string return the sub-string
10431
     */
10432 2
    public static function substr_ileft(string $haystack, string $needle): string
10433
    {
10434 2
        if ($haystack === '') {
10435 2
            return '';
10436
        }
10437
10438 2
        if ($needle === '') {
10439 2
            return $haystack;
10440
        }
10441
10442 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10443 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10444
        }
10445
10446 2
        return $haystack;
10447
    }
10448
10449
    /**
10450
     * Get part of a string process in bytes.
10451
     *
10452
     * @param string $str    <p>The string being checked.</p>
10453
     * @param int    $offset <p>The first position used in str.</p>
10454
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10455
     *
10456
     * @return false|string
10457
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10458
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10459
     *                      characters long, <b>FALSE</b> will be returned.
10460
     */
10461
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10462
    {
10463
        // empty string
10464
        if ($str === '' || $length === 0) {
10465
            return '';
10466
        }
10467
10468
        // whole string
10469
        if (!$offset && $length === null) {
10470
            return $str;
10471
        }
10472
10473
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10474
            // "mb_" is available if overload is used, so use it ...
10475
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10476
        }
10477
10478
        return \substr($str, $offset, $length ?? 2147483647);
10479
    }
10480
10481
    /**
10482
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10483
     *
10484
     * @param string $haystack <p>The string to search in.</p>
10485
     * @param string $needle   <p>The substring to search for.</p>
10486
     *
10487
     * @return string return the sub-string
10488
     */
10489 2
    public static function substr_iright(string $haystack, string $needle): string
10490
    {
10491 2
        if ($haystack === '') {
10492 2
            return '';
10493
        }
10494
10495 2
        if ($needle === '') {
10496 2
            return $haystack;
10497
        }
10498
10499 2
        if (self::str_iends_with($haystack, $needle) === true) {
10500 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10501
        }
10502
10503 2
        return $haystack;
10504
    }
10505
10506
    /**
10507
     * Removes an prefix ($needle) from start of the string ($haystack).
10508
     *
10509
     * @param string $haystack <p>The string to search in.</p>
10510
     * @param string $needle   <p>The substring to search for.</p>
10511
     *
10512
     * @return string return the sub-string
10513
     */
10514 2
    public static function substr_left(string $haystack, string $needle): string
10515
    {
10516 2
        if ($haystack === '') {
10517 2
            return '';
10518
        }
10519
10520 2
        if ($needle === '') {
10521 2
            return $haystack;
10522
        }
10523
10524 2
        if (self::str_starts_with($haystack, $needle) === true) {
10525 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10526
        }
10527
10528 2
        return $haystack;
10529
    }
10530
10531
    /**
10532
     * Replace text within a portion of a string.
10533
     *
10534
     * source: https://gist.github.com/stemar/8287074
10535
     *
10536
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10537
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10538
     * @param int|int[]       $offset      <p>
10539
     *                                     If start is positive, the replacing will begin at the start'th offset
10540
     *                                     into string.
10541
     *                                     <br><br>
10542
     *                                     If start is negative, the replacing will begin at the start'th character
10543
     *                                     from the end of string.
10544
     *                                     </p>
10545
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10546
     *                                     portion of string which is to be replaced. If it is negative, it
10547
     *                                     represents the number of characters from the end of string at which to
10548
     *                                     stop replacing. If it is not given, then it will default to strlen(
10549
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10550
     *                                     length is zero then this function will have the effect of inserting
10551
     *                                     replacement into string at the given start offset.</p>
10552
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10553
     *
10554
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10555
     */
10556 10
    public static function substr_replace(
10557
        $str,
10558
        $replacement,
10559
        $offset,
10560
        $length = null,
10561
        string $encoding = 'UTF-8'
10562
    ) {
10563 10
        if (\is_array($str) === true) {
10564 1
            $num = \count($str);
10565
10566
            // the replacement
10567 1
            if (\is_array($replacement) === true) {
10568 1
                $replacement = \array_slice($replacement, 0, $num);
10569
            } else {
10570 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10571
            }
10572
10573
            // the offset
10574 1
            if (\is_array($offset) === true) {
10575 1
                $offset = \array_slice($offset, 0, $num);
10576 1
                foreach ($offset as &$valueTmp) {
10577 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10578
                }
10579 1
                unset($valueTmp);
10580
            } else {
10581 1
                $offset = \array_pad([$offset], $num, $offset);
10582
            }
10583
10584
            // the length
10585 1
            if ($length === null) {
10586 1
                $length = \array_fill(0, $num, 0);
10587 1
            } elseif (\is_array($length) === true) {
10588 1
                $length = \array_slice($length, 0, $num);
10589 1
                foreach ($length as &$valueTmpV2) {
10590 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10591
                }
10592 1
                unset($valueTmpV2);
10593
            } else {
10594 1
                $length = \array_pad([$length], $num, $length);
10595
            }
10596
10597
            // recursive call
10598 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10599
        }
10600
10601 10
        if (\is_array($replacement) === true) {
10602 1
            if (\count($replacement) > 0) {
10603 1
                $replacement = $replacement[0];
10604
            } else {
10605 1
                $replacement = '';
10606
            }
10607
        }
10608
10609
        // init
10610 10
        $str = (string) $str;
10611 10
        $replacement = (string) $replacement;
10612
10613 10
        if (\is_array($length) === true) {
10614
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10615
        }
10616
10617 10
        if (\is_array($offset) === true) {
10618
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10619
        }
10620
10621 10
        if ($str === '') {
10622 1
            return $replacement;
10623
        }
10624
10625 9
        if (self::$SUPPORT['mbstring'] === true) {
10626 9
            $string_length = (int) self::strlen($str, $encoding);
10627
10628 9
            if ($offset < 0) {
10629 1
                $offset = (int) \max(0, $string_length + $offset);
10630 9
            } elseif ($offset > $string_length) {
10631 1
                $offset = $string_length;
10632
            }
10633
10634 9
            if ($length !== null && $length < 0) {
10635 1
                $length = (int) \max(0, $string_length - $offset + $length);
10636 9
            } elseif ($length === null || $length > $string_length) {
10637 4
                $length = $string_length;
10638
            }
10639
10640
            /** @noinspection AdditionOperationOnArraysInspection */
10641 9
            if (($offset + $length) > $string_length) {
10642 4
                $length = $string_length - $offset;
10643
            }
10644
10645
            /** @noinspection AdditionOperationOnArraysInspection */
10646 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10647 9
                   $replacement .
10648 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10649
        }
10650
10651
        //
10652
        // fallback for ascii only
10653
        //
10654
10655
        if (self::is_ascii($str)) {
10656
            return ($length === null) ?
10657
                \substr_replace($str, $replacement, $offset) :
10658
                \substr_replace($str, $replacement, $offset, $length);
10659
        }
10660
10661
        //
10662
        // fallback via vanilla php
10663
        //
10664
10665
        \preg_match_all('/./us', $str, $smatches);
10666
        \preg_match_all('/./us', $replacement, $rmatches);
10667
10668
        if ($length === null) {
10669
            $lengthTmp = self::strlen($str, $encoding);
10670
            if ($lengthTmp === false) {
10671
                // e.g.: non mbstring support + invalid chars
10672
                return '';
10673
            }
10674
            $length = (int) $lengthTmp;
10675
        }
10676
10677
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10678
10679
        return \implode('', $smatches[0]);
10680
    }
10681
10682
    /**
10683
     * Removes an suffix ($needle) from end of the string ($haystack).
10684
     *
10685
     * @param string $haystack <p>The string to search in.</p>
10686
     * @param string $needle   <p>The substring to search for.</p>
10687
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10688
     *
10689
     * @return string return the sub-string
10690
     */
10691 2
    public static function substr_right(
10692
        string $haystack,
10693
        string $needle,
10694
        string $encoding = 'UTF-8'
10695
    ): string {
10696 2
        if ($haystack === '') {
10697 2
            return '';
10698
        }
10699
10700 2
        if ($needle === '') {
10701 2
            return $haystack;
10702
        }
10703
10704
        if (
10705 2
            $encoding === 'UTF-8'
10706
            &&
10707 2
            \substr($haystack, -\strlen($needle)) === $needle
10708
        ) {
10709 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10710
        }
10711
10712 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10713
            return (string) self::substr(
10714
                $haystack,
10715
                0,
10716
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10717
                $encoding
10718
            );
10719
        }
10720
10721 2
        return $haystack;
10722
    }
10723
10724
    /**
10725
     * Returns a case swapped version of the string.
10726
     *
10727
     * @param string $str       <p>The input string.</p>
10728
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10729
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10730
     *
10731
     * @return string each character's case swapped
10732
     */
10733 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10734
    {
10735 6
        if ($str === '') {
10736 1
            return '';
10737
        }
10738
10739 6
        if ($cleanUtf8 === true) {
10740
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10741
            // if invalid characters are found in $haystack before $needle
10742 2
            $str = self::clean($str);
10743
        }
10744
10745 6
        if ($encoding === 'UTF-8') {
10746 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10747
        }
10748
10749 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10750
    }
10751
10752
    /**
10753
     * Checks whether symfony-polyfills are used.
10754
     *
10755
     * @return bool
10756
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10757
     */
10758
    public static function symfony_polyfill_used(): bool
10759
    {
10760
        // init
10761
        $return = false;
10762
10763
        $returnTmp = \extension_loaded('mbstring');
10764
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10765
            $return = true;
10766
        }
10767
10768
        $returnTmp = \extension_loaded('iconv');
10769
        if ($returnTmp === false && \function_exists('iconv')) {
10770
            $return = true;
10771
        }
10772
10773
        return $return;
10774
    }
10775
10776
    /**
10777
     * @param string $str
10778
     * @param int    $tabLength
10779
     *
10780
     * @return string
10781
     */
10782 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10783
    {
10784 6
        if ($tabLength === 4) {
10785 3
            $spaces = '    ';
10786 3
        } elseif ($tabLength === 2) {
10787 1
            $spaces = '  ';
10788
        } else {
10789 2
            $spaces = \str_repeat(' ', $tabLength);
10790
        }
10791
10792 6
        return \str_replace("\t", $spaces, $str);
10793
    }
10794
10795
    /**
10796
     * Converts the first character of each word in the string to uppercase
10797
     * and all other chars to lowercase.
10798
     *
10799
     * @param string      $str                   <p>The input string.</p>
10800
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10801
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10802
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10803
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10804
     *
10805
     * @return string string with all characters of $str being title-cased
10806
     */
10807 5
    public static function titlecase(
10808
        string $str,
10809
        string $encoding = 'UTF-8',
10810
        bool $cleanUtf8 = false,
10811
        string $lang = null,
10812
        bool $tryToKeepStringLength = false
10813
    ): string {
10814 5
        if ($cleanUtf8 === true) {
10815
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10816
            // if invalid characters are found in $haystack before $needle
10817
            $str = self::clean($str);
10818
        }
10819
10820 5
        if ($lang === null && $tryToKeepStringLength === false) {
10821 5
            if ($encoding === 'UTF-8') {
10822 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10823
            }
10824
10825 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10826
10827 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10828
        }
10829
10830
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10831
    }
10832
10833
    /**
10834
     * alias for "UTF8::to_ascii()"
10835
     *
10836
     * @see        UTF8::to_ascii()
10837
     *
10838
     * @param string $str
10839
     * @param string $subst_chr
10840
     * @param bool   $strict
10841
     *
10842
     * @return string
10843
     *
10844
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10845
     */
10846 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10847
    {
10848 7
        return self::to_ascii($str, $subst_chr, $strict);
10849
    }
10850
10851
    /**
10852
     * alias for "UTF8::to_iso8859()"
10853
     *
10854
     * @see        UTF8::to_iso8859()
10855
     *
10856
     * @param string|string[] $str
10857
     *
10858
     * @return string|string[]
10859
     *
10860
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10861
     */
10862 2
    public static function toIso8859($str)
10863
    {
10864 2
        return self::to_iso8859($str);
10865
    }
10866
10867
    /**
10868
     * alias for "UTF8::to_latin1()"
10869
     *
10870
     * @see        UTF8::to_latin1()
10871
     *
10872
     * @param string|string[] $str
10873
     *
10874
     * @return string|string[]
10875
     *
10876
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10877
     */
10878 2
    public static function toLatin1($str)
10879
    {
10880 2
        return self::to_latin1($str);
10881
    }
10882
10883
    /**
10884
     * alias for "UTF8::to_utf8()"
10885
     *
10886
     * @see        UTF8::to_utf8()
10887
     *
10888
     * @param string|string[] $str
10889
     *
10890
     * @return string|string[]
10891
     *
10892
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10893
     */
10894 2
    public static function toUTF8($str)
10895
    {
10896 2
        return self::to_utf8($str);
10897
    }
10898
10899
    /**
10900
     * Convert a string into ASCII.
10901
     *
10902
     * @param string $str     <p>The input string.</p>
10903
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10904
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10905
     *                        performance</p>
10906
     *
10907
     * @return string
10908
     */
10909 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10910
    {
10911 38
        static $UTF8_TO_ASCII;
10912
10913 38
        if ($str === '') {
10914 3
            return '';
10915
        }
10916
10917
        // check if we only have ASCII, first (better performance)
10918 35
        if (self::is_ascii($str) === true) {
10919 9
            return $str;
10920
        }
10921
10922 28
        $str = self::clean(
10923 28
            $str,
10924 28
            true,
10925 28
            true,
10926 28
            true,
10927 28
            false,
10928 28
            true,
10929 28
            true
10930
        );
10931
10932
        // check again, if we only have ASCII, now ...
10933 28
        if (self::is_ascii($str) === true) {
10934 10
            return $str;
10935
        }
10936
10937
        if (
10938 19
            $strict === true
10939
            &&
10940 19
            self::$SUPPORT['intl'] === true
10941
        ) {
10942
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10943
            /** @noinspection PhpComposerExtensionStubsInspection */
10944
            /** @noinspection UnnecessaryCastingInspection */
10945 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10946
10947
            // check again, if we only have ASCII, now ...
10948 1
            if (self::is_ascii($str) === true) {
10949 1
                return $str;
10950
            }
10951
        }
10952
10953 19
        if (self::$ORD === null) {
10954
            self::$ORD = self::getData('ord');
10955
        }
10956
10957 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10958 19
        $chars = $ar[0];
10959 19
        $ord = null;
10960 19
        foreach ($chars as &$c) {
10961 19
            $ordC0 = self::$ORD[$c[0]];
10962
10963 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
10964 15
                continue;
10965
            }
10966
10967 19
            $ordC1 = self::$ORD[$c[1]];
10968
10969
            // ASCII - next please
10970 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
10971 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10972
            }
10973
10974 19
            if ($ordC0 >= 224) {
10975 8
                $ordC2 = self::$ORD[$c[2]];
10976
10977 8
                if ($ordC0 <= 239) {
10978 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10979
                }
10980
10981 8
                if ($ordC0 >= 240) {
10982 2
                    $ordC3 = self::$ORD[$c[3]];
10983
10984 2
                    if ($ordC0 <= 247) {
10985 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10986
                    }
10987
10988 2
                    if ($ordC0 >= 248) {
10989
                        $ordC4 = self::$ORD[$c[4]];
10990
10991
                        if ($ordC0 <= 251) {
10992
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10993
                        }
10994
10995
                        if ($ordC0 >= 252) {
10996
                            $ordC5 = self::$ORD[$c[5]];
10997
10998
                            if ($ordC0 <= 253) {
10999
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11000
                            }
11001
                        }
11002
                    }
11003
                }
11004
            }
11005
11006 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11007
                $c = $unknown;
11008
11009
                continue;
11010
            }
11011
11012 19
            if ($ord === null) {
11013
                $c = $unknown;
11014
11015
                continue;
11016
            }
11017
11018 19
            $bank = $ord >> 8;
11019 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11020 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11021 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11022 2
                    $UTF8_TO_ASCII[$bank] = [];
11023
                }
11024
            }
11025
11026 19
            $newchar = $ord & 255;
11027
11028
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11029 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11030
11031
                // keep for debugging
11032
                /*
11033
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11034
                echo "char: " . $c . "\n";
11035
                echo "ord: " . $ord . "\n";
11036
                echo "newchar: " . $newchar . "\n";
11037
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11038
                echo "bank:" . $bank . "\n\n";
11039
                 */
11040
11041 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11042
            } else {
11043
11044
                // keep for debugging missing chars
11045
                /*
11046
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11047
                echo "char: " . $c . "\n";
11048
                echo "ord: " . $ord . "\n";
11049
                echo "newchar: " . $newchar . "\n";
11050
                echo "bank:" . $bank . "\n\n";
11051
                 */
11052
11053 19
                $c = $unknown;
11054
            }
11055
        }
11056
11057 19
        return \implode('', $chars);
11058
    }
11059
11060
    /**
11061
     * @param mixed $str
11062
     *
11063
     * @return bool
11064
     */
11065 19
    public static function to_boolean($str): bool
11066
    {
11067
        // init
11068 19
        $str = (string) $str;
11069
11070 19
        if ($str === '') {
11071 2
            return false;
11072
        }
11073
11074
        // Info: http://php.net/manual/en/filter.filters.validate.php
11075
        $map = [
11076 17
            'true'  => true,
11077
            '1'     => true,
11078
            'on'    => true,
11079
            'yes'   => true,
11080
            'false' => false,
11081
            '0'     => false,
11082
            'off'   => false,
11083
            'no'    => false,
11084
        ];
11085
11086 17
        if (isset($map[$str])) {
11087 11
            return $map[$str];
11088
        }
11089
11090 6
        $key = \strtolower($str);
11091 6
        if (isset($map[$key])) {
11092 2
            return $map[$key];
11093
        }
11094
11095 4
        if (\is_numeric($str)) {
11096 2
            return ((float) $str + 0) > 0;
11097
        }
11098
11099 2
        return (bool) \trim($str);
11100
    }
11101
11102
    /**
11103
     * Convert given string to safe filename (and keep string case).
11104
     *
11105
     * @param string $string
11106
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11107
     *                                  simply replaced with hyphen.
11108
     * @param string $fallback_char
11109
     *
11110
     * @return string
11111
     */
11112 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11113
    {
11114 1
        if ($use_transliterate === true) {
11115 1
            $string = self::str_transliterate($string, $fallback_char);
11116
        }
11117
11118 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11119
11120 1
        $string = (string) \preg_replace(
11121
            [
11122 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11123 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
11124 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
11125
            ],
11126
            [
11127 1
                '',
11128 1
                $fallback_char,
11129 1
                $fallback_char,
11130
            ],
11131 1
            $string
11132
        );
11133
11134
        // trim "$fallback_char" from beginning and end of the string
11135 1
        return \trim($string, $fallback_char);
11136
    }
11137
11138
    /**
11139
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11140
     *
11141
     * @param string|string[] $str
11142
     *
11143
     * @return string|string[]
11144
     */
11145 8
    public static function to_iso8859($str)
11146
    {
11147 8
        if (\is_array($str) === true) {
11148 2
            foreach ($str as $k => &$v) {
11149 2
                $v = self::to_iso8859($v);
11150
            }
11151
11152 2
            return $str;
11153
        }
11154
11155 8
        $str = (string) $str;
11156 8
        if ($str === '') {
11157 2
            return '';
11158
        }
11159
11160 8
        return self::utf8_decode($str);
11161
    }
11162
11163
    /**
11164
     * alias for "UTF8::to_iso8859()"
11165
     *
11166
     * @see UTF8::to_iso8859()
11167
     *
11168
     * @param string|string[] $str
11169
     *
11170
     * @return string|string[]
11171
     */
11172 2
    public static function to_latin1($str)
11173
    {
11174 2
        return self::to_iso8859($str);
11175
    }
11176
11177
    /**
11178
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11179
     *
11180
     * <ul>
11181
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11182
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11183
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11184
     * case.</li>
11185
     * </ul>
11186
     *
11187
     * @param string|string[] $str                    <p>Any string or array.</p>
11188
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11189
     *
11190
     * @return string|string[] the UTF-8 encoded string
11191
     */
11192 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11193
    {
11194 37
        if (\is_array($str) === true) {
11195 4
            foreach ($str as $k => &$v) {
11196 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11197
            }
11198
11199 4
            return $str;
11200
        }
11201
11202 37
        $str = (string) $str;
11203 37
        if ($str === '') {
11204 6
            return $str;
11205
        }
11206
11207 37
        $max = \strlen($str);
11208 37
        $buf = '';
11209
11210 37
        for ($i = 0; $i < $max; ++$i) {
11211 37
            $c1 = $str[$i];
11212
11213 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11214
11215 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11216
11217 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11218
11219 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11220 16
                        $buf .= $c1 . $c2;
11221 16
                        ++$i;
11222
                    } else { // not valid UTF8 - convert it
11223 30
                        $buf .= self::to_utf8_convert_helper($c1);
11224
                    }
11225 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11226
11227 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11228 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11229
11230 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11231 14
                        $buf .= $c1 . $c2 . $c3;
11232 14
                        $i += 2;
11233
                    } else { // not valid UTF8 - convert it
11234 32
                        $buf .= self::to_utf8_convert_helper($c1);
11235
                    }
11236 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11237
11238 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11239 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11240 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11241
11242 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11243 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11244 8
                        $i += 3;
11245
                    } else { // not valid UTF8 - convert it
11246 25
                        $buf .= self::to_utf8_convert_helper($c1);
11247
                    }
11248
                } else { // doesn't look like UTF8, but should be converted
11249 33
                    $buf .= self::to_utf8_convert_helper($c1);
11250
                }
11251 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11252
11253 3
                $buf .= self::to_utf8_convert_helper($c1);
11254
            } else { // it doesn't need conversion
11255 34
                $buf .= $c1;
11256
            }
11257
        }
11258
11259
        // decode unicode escape sequences
11260 37
        $buf = \preg_replace_callback(
11261 37
            '/\\\\u([0-9a-f]{4})/i',
11262
            /**
11263
             * @param array $match
11264
             *
11265
             * @return string
11266
             */
11267
            static function (array $match): string {
11268 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
11269 37
            },
11270 37
            $buf
11271
        );
11272
11273 37
        if ($buf === null) {
11274
            return '';
11275
        }
11276
11277
        // decode UTF-8 codepoints
11278 37
        if ($decodeHtmlEntityToUtf8 === true) {
11279 2
            $buf = self::html_entity_decode($buf);
11280
        }
11281
11282 37
        return $buf;
11283
    }
11284
11285
    /**
11286
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11287
     *
11288
     * INFO: This is slower then "trim()"
11289
     *
11290
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11291
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11292
     *
11293
     * @param string      $str   <p>The string to be trimmed</p>
11294
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11295
     *
11296
     * @return string the trimmed string
11297
     */
11298 55
    public static function trim(string $str = '', string $chars = null): string
11299
    {
11300 55
        if ($str === '') {
11301 9
            return '';
11302
        }
11303
11304 48
        if ($chars) {
11305 27
            $chars = \preg_quote($chars, '/');
11306 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11307
        } else {
11308 21
            $pattern = "^[\s]+|[\s]+\$";
11309
        }
11310
11311 48
        if (self::$SUPPORT['mbstring'] === true) {
11312
            /** @noinspection PhpComposerExtensionStubsInspection */
11313 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11314
        }
11315
11316 8
        return self::regex_replace($str, $pattern, '', '', '/');
11317
    }
11318
11319
    /**
11320
     * Makes string's first char uppercase.
11321
     *
11322
     * @param string      $str                   <p>The input string.</p>
11323
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11324
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11325
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11326
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11327
     *
11328
     * @return string the resulting string
11329
     */
11330 69
    public static function ucfirst(
11331
        string $str,
11332
        string $encoding = 'UTF-8',
11333
        bool $cleanUtf8 = false,
11334
        string $lang = null,
11335
        bool $tryToKeepStringLength = false
11336
    ): string {
11337 69
        if ($str === '') {
11338 3
            return '';
11339
        }
11340
11341 68
        if ($cleanUtf8 === true) {
11342
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11343
            // if invalid characters are found in $haystack before $needle
11344 1
            $str = self::clean($str);
11345
        }
11346
11347 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11348
11349 68
        if ($encoding === 'UTF-8') {
11350 22
            $strPartTwo = (string) \mb_substr($str, 1);
11351
11352 22
            if ($useMbFunction === true) {
11353 22
                $strPartOne = \mb_strtoupper(
11354 22
                    (string) \mb_substr($str, 0, 1)
11355
                );
11356
            } else {
11357
                $strPartOne = self::strtoupper(
11358
                    (string) \mb_substr($str, 0, 1),
11359
                    $encoding,
11360
                    false,
11361
                    $lang,
11362 22
                    $tryToKeepStringLength
11363
                );
11364
            }
11365
        } else {
11366 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11367
11368 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11369
11370 47
            if ($useMbFunction === true) {
11371 47
                $strPartOne = \mb_strtoupper(
11372 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11373 47
                    $encoding
11374
                );
11375
            } else {
11376
                $strPartOne = self::strtoupper(
11377
                    (string) self::substr($str, 0, 1, $encoding),
11378
                    $encoding,
11379
                    false,
11380
                    $lang,
11381
                    $tryToKeepStringLength
11382
                );
11383
            }
11384
        }
11385
11386 68
        return $strPartOne . $strPartTwo;
11387
    }
11388
11389
    /**
11390
     * alias for "UTF8::ucfirst()"
11391
     *
11392
     * @see UTF8::ucfirst()
11393
     *
11394
     * @param string $str
11395
     * @param string $encoding
11396
     * @param bool   $cleanUtf8
11397
     *
11398
     * @return string
11399
     */
11400 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11401
    {
11402 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11403
    }
11404
11405
    /**
11406
     * Uppercase for all words in the string.
11407
     *
11408
     * @param string   $str        <p>The input string.</p>
11409
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11410
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11411
     *                             word.</p>
11412
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11413
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11414
     *
11415
     * @return string
11416
     */
11417 8
    public static function ucwords(
11418
        string $str,
11419
        array $exceptions = [],
11420
        string $charlist = '',
11421
        string $encoding = 'UTF-8',
11422
        bool $cleanUtf8 = false
11423
    ): string {
11424 8
        if (!$str) {
11425 2
            return '';
11426
        }
11427
11428
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11429
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11430
11431 7
        if ($cleanUtf8 === true) {
11432
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11433
            // if invalid characters are found in $haystack before $needle
11434 1
            $str = self::clean($str);
11435
        }
11436
11437 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11438
11439
        if (
11440 7
            $usePhpDefaultFunctions === true
11441
            &&
11442 7
            self::is_ascii($str) === true
11443
        ) {
11444
            return \ucwords($str);
11445
        }
11446
11447 7
        $words = self::str_to_words($str, $charlist);
11448 7
        $useExceptions = \count($exceptions) > 0;
11449
11450 7
        foreach ($words as &$word) {
11451 7
            if (!$word) {
11452 7
                continue;
11453
            }
11454
11455
            if (
11456 7
                $useExceptions === false
11457
                ||
11458 7
                !\in_array($word, $exceptions, true)
11459
            ) {
11460 7
                $word = self::ucfirst($word, $encoding);
11461
            }
11462
        }
11463
11464 7
        return \implode('', $words);
11465
    }
11466
11467
    /**
11468
     * Multi decode html entity & fix urlencoded-win1252-chars.
11469
     *
11470
     * e.g:
11471
     * 'test+test'                     => 'test test'
11472
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11473
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11474
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11475
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11476
     * 'Düsseldorf'                   => 'Düsseldorf'
11477
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11478
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11479
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11480
     *
11481
     * @param string $str          <p>The input string.</p>
11482
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11483
     *
11484
     * @return string
11485
     */
11486 2
    public static function urldecode(string $str, bool $multi_decode = true): string
11487
    {
11488 2
        if ($str === '') {
11489 2
            return '';
11490
        }
11491
11492 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
11493 2
        if (\preg_match($pattern, $str)) {
11494 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11495
        }
11496
11497 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11498
11499 2
        if ($multi_decode === true) {
11500
            do {
11501 2
                $str_compare = $str;
11502
11503
                /**
11504
                 * @psalm-suppress PossiblyInvalidArgument
11505
                 */
11506 2
                $str = self::fix_simple_utf8(
11507 2
                    \urldecode(
11508 2
                        self::html_entity_decode(
11509 2
                            self::to_utf8($str),
11510 2
                            $flags
11511
                        )
11512
                    )
11513
                );
11514 2
            } while ($str_compare !== $str);
11515
        }
11516
11517 2
        return $str;
11518
    }
11519
11520
    /**
11521
     * Return a array with "urlencoded"-win1252 -> UTF-8
11522
     *
11523
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11524
     *
11525
     * @return string[]
11526
     */
11527 2
    public static function urldecode_fix_win1252_chars(): array
11528
    {
11529
        return [
11530 2
            '%20' => ' ',
11531
            '%21' => '!',
11532
            '%22' => '"',
11533
            '%23' => '#',
11534
            '%24' => '$',
11535
            '%25' => '%',
11536
            '%26' => '&',
11537
            '%27' => "'",
11538
            '%28' => '(',
11539
            '%29' => ')',
11540
            '%2A' => '*',
11541
            '%2B' => '+',
11542
            '%2C' => ',',
11543
            '%2D' => '-',
11544
            '%2E' => '.',
11545
            '%2F' => '/',
11546
            '%30' => '0',
11547
            '%31' => '1',
11548
            '%32' => '2',
11549
            '%33' => '3',
11550
            '%34' => '4',
11551
            '%35' => '5',
11552
            '%36' => '6',
11553
            '%37' => '7',
11554
            '%38' => '8',
11555
            '%39' => '9',
11556
            '%3A' => ':',
11557
            '%3B' => ';',
11558
            '%3C' => '<',
11559
            '%3D' => '=',
11560
            '%3E' => '>',
11561
            '%3F' => '?',
11562
            '%40' => '@',
11563
            '%41' => 'A',
11564
            '%42' => 'B',
11565
            '%43' => 'C',
11566
            '%44' => 'D',
11567
            '%45' => 'E',
11568
            '%46' => 'F',
11569
            '%47' => 'G',
11570
            '%48' => 'H',
11571
            '%49' => 'I',
11572
            '%4A' => 'J',
11573
            '%4B' => 'K',
11574
            '%4C' => 'L',
11575
            '%4D' => 'M',
11576
            '%4E' => 'N',
11577
            '%4F' => 'O',
11578
            '%50' => 'P',
11579
            '%51' => 'Q',
11580
            '%52' => 'R',
11581
            '%53' => 'S',
11582
            '%54' => 'T',
11583
            '%55' => 'U',
11584
            '%56' => 'V',
11585
            '%57' => 'W',
11586
            '%58' => 'X',
11587
            '%59' => 'Y',
11588
            '%5A' => 'Z',
11589
            '%5B' => '[',
11590
            '%5C' => '\\',
11591
            '%5D' => ']',
11592
            '%5E' => '^',
11593
            '%5F' => '_',
11594
            '%60' => '`',
11595
            '%61' => 'a',
11596
            '%62' => 'b',
11597
            '%63' => 'c',
11598
            '%64' => 'd',
11599
            '%65' => 'e',
11600
            '%66' => 'f',
11601
            '%67' => 'g',
11602
            '%68' => 'h',
11603
            '%69' => 'i',
11604
            '%6A' => 'j',
11605
            '%6B' => 'k',
11606
            '%6C' => 'l',
11607
            '%6D' => 'm',
11608
            '%6E' => 'n',
11609
            '%6F' => 'o',
11610
            '%70' => 'p',
11611
            '%71' => 'q',
11612
            '%72' => 'r',
11613
            '%73' => 's',
11614
            '%74' => 't',
11615
            '%75' => 'u',
11616
            '%76' => 'v',
11617
            '%77' => 'w',
11618
            '%78' => 'x',
11619
            '%79' => 'y',
11620
            '%7A' => 'z',
11621
            '%7B' => '{',
11622
            '%7C' => '|',
11623
            '%7D' => '}',
11624
            '%7E' => '~',
11625
            '%7F' => '',
11626
            '%80' => '`',
11627
            '%81' => '',
11628
            '%82' => '‚',
11629
            '%83' => 'ƒ',
11630
            '%84' => '„',
11631
            '%85' => '…',
11632
            '%86' => '†',
11633
            '%87' => '‡',
11634
            '%88' => 'ˆ',
11635
            '%89' => '‰',
11636
            '%8A' => 'Š',
11637
            '%8B' => '‹',
11638
            '%8C' => 'Œ',
11639
            '%8D' => '',
11640
            '%8E' => 'Ž',
11641
            '%8F' => '',
11642
            '%90' => '',
11643
            '%91' => '‘',
11644
            '%92' => '’',
11645
            '%93' => '“',
11646
            '%94' => '”',
11647
            '%95' => '•',
11648
            '%96' => '–',
11649
            '%97' => '—',
11650
            '%98' => '˜',
11651
            '%99' => '™',
11652
            '%9A' => 'š',
11653
            '%9B' => '›',
11654
            '%9C' => 'œ',
11655
            '%9D' => '',
11656
            '%9E' => 'ž',
11657
            '%9F' => 'Ÿ',
11658
            '%A0' => '',
11659
            '%A1' => '¡',
11660
            '%A2' => '¢',
11661
            '%A3' => '£',
11662
            '%A4' => '¤',
11663
            '%A5' => '¥',
11664
            '%A6' => '¦',
11665
            '%A7' => '§',
11666
            '%A8' => '¨',
11667
            '%A9' => '©',
11668
            '%AA' => 'ª',
11669
            '%AB' => '«',
11670
            '%AC' => '¬',
11671
            '%AD' => '',
11672
            '%AE' => '®',
11673
            '%AF' => '¯',
11674
            '%B0' => '°',
11675
            '%B1' => '±',
11676
            '%B2' => '²',
11677
            '%B3' => '³',
11678
            '%B4' => '´',
11679
            '%B5' => 'µ',
11680
            '%B6' => '¶',
11681
            '%B7' => '·',
11682
            '%B8' => '¸',
11683
            '%B9' => '¹',
11684
            '%BA' => 'º',
11685
            '%BB' => '»',
11686
            '%BC' => '¼',
11687
            '%BD' => '½',
11688
            '%BE' => '¾',
11689
            '%BF' => '¿',
11690
            '%C0' => 'À',
11691
            '%C1' => 'Á',
11692
            '%C2' => 'Â',
11693
            '%C3' => 'Ã',
11694
            '%C4' => 'Ä',
11695
            '%C5' => 'Å',
11696
            '%C6' => 'Æ',
11697
            '%C7' => 'Ç',
11698
            '%C8' => 'È',
11699
            '%C9' => 'É',
11700
            '%CA' => 'Ê',
11701
            '%CB' => 'Ë',
11702
            '%CC' => 'Ì',
11703
            '%CD' => 'Í',
11704
            '%CE' => 'Î',
11705
            '%CF' => 'Ï',
11706
            '%D0' => 'Ð',
11707
            '%D1' => 'Ñ',
11708
            '%D2' => 'Ò',
11709
            '%D3' => 'Ó',
11710
            '%D4' => 'Ô',
11711
            '%D5' => 'Õ',
11712
            '%D6' => 'Ö',
11713
            '%D7' => '×',
11714
            '%D8' => 'Ø',
11715
            '%D9' => 'Ù',
11716
            '%DA' => 'Ú',
11717
            '%DB' => 'Û',
11718
            '%DC' => 'Ü',
11719
            '%DD' => 'Ý',
11720
            '%DE' => 'Þ',
11721
            '%DF' => 'ß',
11722
            '%E0' => 'à',
11723
            '%E1' => 'á',
11724
            '%E2' => 'â',
11725
            '%E3' => 'ã',
11726
            '%E4' => 'ä',
11727
            '%E5' => 'å',
11728
            '%E6' => 'æ',
11729
            '%E7' => 'ç',
11730
            '%E8' => 'è',
11731
            '%E9' => 'é',
11732
            '%EA' => 'ê',
11733
            '%EB' => 'ë',
11734
            '%EC' => 'ì',
11735
            '%ED' => 'í',
11736
            '%EE' => 'î',
11737
            '%EF' => 'ï',
11738
            '%F0' => 'ð',
11739
            '%F1' => 'ñ',
11740
            '%F2' => 'ò',
11741
            '%F3' => 'ó',
11742
            '%F4' => 'ô',
11743
            '%F5' => 'õ',
11744
            '%F6' => 'ö',
11745
            '%F7' => '÷',
11746
            '%F8' => 'ø',
11747
            '%F9' => 'ù',
11748
            '%FA' => 'ú',
11749
            '%FB' => 'û',
11750
            '%FC' => 'ü',
11751
            '%FD' => 'ý',
11752
            '%FE' => 'þ',
11753
            '%FF' => 'ÿ',
11754
        ];
11755
    }
11756
11757
    /**
11758
     * Decodes an UTF-8 string to ISO-8859-1.
11759
     *
11760
     * @param string $str           <p>The input string.</p>
11761
     * @param bool   $keepUtf8Chars
11762
     *
11763
     * @return string
11764
     */
11765 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11766
    {
11767 14
        if ($str === '') {
11768 5
            return '';
11769
        }
11770
11771 14
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11772 14
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11773
11774 14
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11775 1
            if (self::$WIN1252_TO_UTF8 === null) {
11776
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11777
            }
11778
11779 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11780 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11781
        }
11782
11783 14
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11784
11785
        // save for later comparision
11786 14
        $str_backup = $str;
11787 14
        $len = \strlen($str);
11788
11789 14
        if (self::$ORD === null) {
11790
            self::$ORD = self::getData('ord');
11791
        }
11792
11793 14
        if (self::$CHR === null) {
11794
            self::$CHR = self::getData('chr');
11795
        }
11796
11797 14
        $noCharFound = '?';
11798
        /** @noinspection ForeachInvariantsInspection */
11799 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11800 14
            switch ($str[$i] & "\xF0") {
11801 14
                case "\xC0":
11802 13
                case "\xD0":
11803 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11804 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11805
11806 13
                    break;
11807
11808
                /** @noinspection PhpMissingBreakStatementInspection */
11809 13
                case "\xF0":
11810
                    ++$i;
11811
11812
                // no break
11813
11814 13
                case "\xE0":
11815 11
                    $str[$j] = $noCharFound;
11816 11
                    $i += 2;
11817
11818 11
                    break;
11819
11820
                default:
11821 12
                    $str[$j] = $str[$i];
11822
            }
11823
        }
11824
11825 14
        $return = \substr($str, 0, $j);
11826 14
        if ($return === false) {
11827
            $return = '';
11828
        }
11829
11830
        if (
11831 14
            $keepUtf8Chars === true
11832
            &&
11833 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11834
        ) {
11835 2
            return $str_backup;
11836
        }
11837
11838 14
        return $return;
11839
    }
11840
11841
    /**
11842
     * Encodes an ISO-8859-1 string to UTF-8.
11843
     *
11844
     * @param string $str <p>The input string.</p>
11845
     *
11846
     * @return string
11847
     */
11848 14
    public static function utf8_encode(string $str): string
11849
    {
11850 14
        if ($str === '') {
11851 13
            return '';
11852
        }
11853
11854 14
        $str = \utf8_encode($str);
11855
11856
        // the polyfill maybe return false
11857
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11858
        /** @psalm-suppress TypeDoesNotContainType */
11859 14
        if ($str === false) {
11860
            return '';
11861
        }
11862
11863 14
        if (\strpos($str, "\xC2") === false) {
11864 6
            return $str;
11865
        }
11866
11867 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11868 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11869
11870 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11871 1
            if (self::$WIN1252_TO_UTF8 === null) {
11872
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11873
            }
11874
11875 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11876 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11877
        }
11878
11879 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11880
    }
11881
11882
    /**
11883
     * fix -> utf8-win1252 chars
11884
     *
11885
     * @param string $str <p>The input string.</p>
11886
     *
11887
     * @return string
11888
     *
11889
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11890
     */
11891 2
    public static function utf8_fix_win1252_chars(string $str): string
11892
    {
11893 2
        return self::fix_simple_utf8($str);
11894
    }
11895
11896
    /**
11897
     * Returns an array with all utf8 whitespace characters.
11898
     *
11899
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11900
     *
11901
     * @author: Derek E. [email protected]
11902
     *
11903
     * @return string[]
11904
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
11905
     *                  as defined in above URL
11906
     */
11907 2
    public static function whitespace_table(): array
11908
    {
11909 2
        return self::$WHITESPACE_TABLE;
11910
    }
11911
11912
    /**
11913
     * Limit the number of words in a string.
11914
     *
11915
     * @param string $str      <p>The input string.</p>
11916
     * @param int    $limit    <p>The limit of words as integer.</p>
11917
     * @param string $strAddOn <p>Replacement for the striped string.</p>
11918
     *
11919
     * @return string
11920
     */
11921 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11922
    {
11923 2
        if ($str === '' || $limit < 1) {
11924 2
            return '';
11925
        }
11926
11927 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11928
11929
        if (
11930 2
            !isset($matches[0])
11931
            ||
11932 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
11933
        ) {
11934 2
            return $str;
11935
        }
11936
11937 2
        return \rtrim($matches[0]) . $strAddOn;
11938
    }
11939
11940
    /**
11941
     * Wraps a string to a given number of characters
11942
     *
11943
     * @see  http://php.net/manual/en/function.wordwrap.php
11944
     *
11945
     * @param string $str   <p>The input string.</p>
11946
     * @param int    $width [optional] <p>The column width.</p>
11947
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11948
     * @param bool   $cut   [optional] <p>
11949
     *                      If the cut is set to true, the string is
11950
     *                      always wrapped at or before the specified width. So if you have
11951
     *                      a word that is larger than the given width, it is broken apart.
11952
     *                      </p>
11953
     *
11954
     * @return string
11955
     *                <p>The given string wrapped at the specified column.</p>
11956
     */
11957 10
    public static function wordwrap(
11958
        string $str,
11959
        int $width = 75,
11960
        string $break = "\n",
11961
        bool $cut = false
11962
    ): string {
11963 10
        if ($str === '' || $break === '') {
11964 3
            return '';
11965
        }
11966
11967 8
        $w = '';
11968 8
        $strSplit = \explode($break, $str);
11969 8
        if ($strSplit === false) {
11970
            return '';
11971
        }
11972 8
        $chars = [];
11973
11974 8
        foreach ($strSplit as $i => $iValue) {
11975 8
            if ($i) {
11976 1
                $chars[] = $break;
11977 1
                $w .= '#';
11978
            }
11979
11980 8
            $c = $iValue;
11981 8
            unset($strSplit[$i]);
11982
11983 8
            foreach (self::str_split($c) as $c) {
11984 8
                $chars[] = $c;
11985 8
                $w .= $c === ' ' ? ' ' : '?';
11986
            }
11987
        }
11988
11989 8
        $strReturn = '';
11990 8
        $j = 0;
11991 8
        $b = $i = -1;
11992 8
        $w = \wordwrap($w, $width, '#', $cut);
11993
11994 8
        while (false !== $b = \mb_strpos($w, '#', $b + 1)) {
11995 6
            for (++$i; $i < $b; ++$i) {
11996 6
                $strReturn .= $chars[$j];
11997 6
                unset($chars[$j++]);
11998
            }
11999
12000 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
12001 3
                unset($chars[$j++]);
12002
            }
12003
12004 6
            $strReturn .= $break;
12005
        }
12006
12007 8
        return $strReturn . \implode('', $chars);
12008
    }
12009
12010
    /**
12011
     * Line-Wrap the string after $limit, but also after the next word.
12012
     *
12013
     * @param string $str
12014
     * @param int    $limit
12015
     *
12016
     * @return string
12017
     */
12018 1
    public static function wordwrap_per_line(string $str, int $limit): string
12019
    {
12020 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12021
12022 1
        $string = '';
12023 1
        foreach ($strings as &$value) {
12024 1
            if ($value === false) {
12025
                continue;
12026
            }
12027
12028 1
            $string .= \wordwrap($value, $limit);
12029 1
            $string .= "\n";
12030
        }
12031
12032 1
        return $string;
12033
    }
12034
12035
    /**
12036
     * Returns an array of Unicode White Space characters.
12037
     *
12038
     * @return string[] an array with numeric code point as key and White Space Character as value
12039
     */
12040 2
    public static function ws(): array
12041
    {
12042 2
        return self::$WHITESPACE;
12043
    }
12044
12045
    /**
12046
     * @param string $str
12047
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12048
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12049
     *
12050
     * @return string
12051
     */
12052 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12053
    {
12054 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12055 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12056
12057 33
        if ($useLower === true) {
12058 2
            $str = \str_replace(
12059 2
                $upper,
12060 2
                $lower,
12061 2
                $str
12062
            );
12063
        } else {
12064 31
            $str = \str_replace(
12065 31
                $lower,
12066 31
                $upper,
12067 31
                $str
12068
            );
12069
        }
12070
12071 33
        if ($fullCaseFold) {
12072 31
            static $FULL_CASE_FOLD = null;
12073 31
            if ($FULL_CASE_FOLD === null) {
12074 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12075
            }
12076
12077 31
            if ($useLower === true) {
12078 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12079
            } else {
12080 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12081
            }
12082
        }
12083
12084 33
        return $str;
12085
    }
12086
12087
    /**
12088
     * get data from "/data/*.php"
12089
     *
12090
     * @param string $file
12091
     *
12092
     * @return mixed
12093
     */
12094 4
    private static function getData(string $file)
12095
    {
12096
        /** @noinspection PhpIncludeInspection */
12097
        /** @noinspection UsingInclusionReturnValueInspection */
12098
        /** @psalm-suppress UnresolvableInclude */
12099 4
        return include __DIR__ . '/data/' . $file . '.php';
12100
    }
12101
12102
    /**
12103
     * get data from "/data/*.php"
12104
     *
12105
     * @param string $file
12106
     *
12107
     * @return false|mixed will return false on error
12108
     */
12109 9
    private static function getDataIfExists(string $file)
12110
    {
12111 9
        $file = __DIR__ . '/data/' . $file . '.php';
12112 9
        if (\file_exists($file)) {
12113
            /** @noinspection PhpIncludeInspection */
12114
            /** @noinspection UsingInclusionReturnValueInspection */
12115 8
            return include $file;
12116
        }
12117
12118 2
        return false;
12119
    }
12120
12121
    /**
12122
     * Checks whether mbstring "overloaded" is active on the server.
12123
     *
12124
     * @return bool
12125
     */
12126
    private static function mbstring_overloaded(): bool
12127
    {
12128
        /**
12129
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12130
         */
12131
12132
        /** @noinspection PhpComposerExtensionStubsInspection */
12133
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12134
        return \defined('MB_OVERLOAD_STRING')
12135
               &&
12136
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12137
    }
12138
12139
    /**
12140
     * @param array $strings
12141
     * @param bool  $removeEmptyValues
12142
     * @param int   $removeShortValues
12143
     *
12144
     * @return array
12145
     */
12146 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12147
    {
12148
        // init
12149 2
        $return = [];
12150
12151 2
        foreach ($strings as &$str) {
12152
            if (
12153 2
                $removeShortValues !== null
12154
                &&
12155 2
                \mb_strlen($str) <= $removeShortValues
12156
            ) {
12157 2
                continue;
12158
            }
12159
12160
            if (
12161 2
                $removeEmptyValues === true
12162
                &&
12163 2
                \trim($str) === ''
12164
            ) {
12165 2
                continue;
12166
            }
12167
12168 2
            $return[] = $str;
12169
        }
12170
12171 2
        return $return;
12172
    }
12173
12174
    /**
12175
     * rxClass
12176
     *
12177
     * @param string $s
12178
     * @param string $class
12179
     *
12180
     * @return string
12181
     */
12182 33
    private static function rxClass(string $s, string $class = ''): string
12183
    {
12184 33
        static $RX_CLASSS_CACHE = [];
12185
12186 33
        $cacheKey = $s . $class;
12187
12188 33
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
12189 21
            return $RX_CLASSS_CACHE[$cacheKey];
12190
        }
12191
12192 16
        $class = [$class];
12193
12194
        /** @noinspection SuspiciousLoopInspection */
12195
        /** @noinspection AlterInForeachInspection */
12196 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12197 15
            if ($s === '-') {
12198
                $class[0] = '-' . $class[0];
12199 15
            } elseif (!isset($s[2])) {
12200 15
                $class[0] .= \preg_quote($s, '/');
12201 1
            } elseif (self::strlen($s) === 1) {
12202 1
                $class[0] .= $s;
12203
            } else {
12204 15
                $class[] = $s;
12205
            }
12206
        }
12207
12208 16
        if ($class[0]) {
12209 16
            $class[0] = '[' . $class[0] . ']';
12210
        }
12211
12212 16
        if (\count($class) === 1) {
12213 16
            $return = $class[0];
12214
        } else {
12215
            $return = '(?:' . \implode('|', $class) . ')';
12216
        }
12217
12218 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
12219
12220 16
        return $return;
12221
    }
12222
12223
    /**
12224
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12225
     *
12226
     * @param string $names
12227
     * @param string $delimiter
12228
     * @param string $encoding
12229
     *
12230
     * @return string
12231
     */
12232 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12233
    {
12234
        // init
12235 1
        $namesArray = \explode($delimiter, $names);
12236
12237 1
        if ($namesArray === false) {
12238
            return '';
12239
        }
12240
12241
        $specialCases = [
12242 1
            'names' => [
12243
                'ab',
12244
                'af',
12245
                'al',
12246
                'and',
12247
                'ap',
12248
                'bint',
12249
                'binte',
12250
                'da',
12251
                'de',
12252
                'del',
12253
                'den',
12254
                'der',
12255
                'di',
12256
                'dit',
12257
                'ibn',
12258
                'la',
12259
                'mac',
12260
                'nic',
12261
                'of',
12262
                'ter',
12263
                'the',
12264
                'und',
12265
                'van',
12266
                'von',
12267
                'y',
12268
                'zu',
12269
            ],
12270
            'prefixes' => [
12271
                'al-',
12272
                "d'",
12273
                'ff',
12274
                "l'",
12275
                'mac',
12276
                'mc',
12277
                'nic',
12278
            ],
12279
        ];
12280
12281 1
        foreach ($namesArray as &$name) {
12282 1
            if (\in_array($name, $specialCases['names'], true)) {
12283 1
                continue;
12284
            }
12285
12286 1
            $continue = false;
12287
12288 1
            if ($delimiter === '-') {
12289 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12290 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12291 1
                        $continue = true;
12292
                    }
12293
                }
12294 1
                unset($beginning);
12295
            }
12296
12297 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12298 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12299 1
                    $continue = true;
12300
                }
12301
            }
12302 1
            unset($beginning);
12303
12304 1
            if ($continue === true) {
12305 1
                continue;
12306
            }
12307
12308 1
            $name = self::ucfirst($name);
12309
        }
12310
12311 1
        return \implode($delimiter, $namesArray);
12312
    }
12313
12314
    /**
12315
     * Generic case sensitive transformation for collation matching.
12316
     *
12317
     * @param string $str <p>The input string</p>
12318
     *
12319
     * @return string|null
12320
     */
12321 6
    private static function strtonatfold(string $str)
12322
    {
12323 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
12324
    }
12325
12326
    /**
12327
     * @param int|string $input
12328
     *
12329
     * @return string
12330
     */
12331 29
    private static function to_utf8_convert_helper($input): string
12332
    {
12333
        // init
12334 29
        $buf = '';
12335
12336 29
        if (self::$ORD === null) {
12337 1
            self::$ORD = self::getData('ord');
12338
        }
12339
12340 29
        if (self::$CHR === null) {
12341 1
            self::$CHR = self::getData('chr');
12342
        }
12343
12344 29
        if (self::$WIN1252_TO_UTF8 === null) {
12345 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12346
        }
12347
12348 29
        $ordC1 = self::$ORD[$input];
12349 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12350 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12351
        } else {
12352 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12353 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12354 1
            $buf .= $cc1 . $cc2;
12355
        }
12356
12357 29
        return $buf;
12358
    }
12359
}
12360