Passed
Push — master ( 56737a...e4c7b5 )
by Lars
06:16
created

UTF8::str_snakeize()   B

Complexity

Conditions 6
Paths 3

Size

Total Lines 55
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 23
CRAP Score 6.0184

Importance

Changes 0
Metric Value
cc 6
eloc 28
nc 3
nop 2
dl 0
loc 55
ccs 23
cts 25
cp 0.92
crap 6.0184
rs 8.8497
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $CHR;
214
215
    /**
216
     * __construct()
217
     */
218 32
    public function __construct()
219
    {
220 32
    }
221
222
    /**
223
     * Return the character at the specified position: $str[1] like functionality.
224
     *
225
     * @param string $str      <p>A UTF-8 string.</p>
226
     * @param int    $pos      <p>The position of character to return.</p>
227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
228
     *
229
     * @return string single multi-byte character
230
     */
231 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
232
    {
233 3
        if ($str === '' || $pos < 0) {
234 2
            return '';
235
        }
236
237 3
        if ($encoding === 'UTF-8') {
238 3
            return (string) \mb_substr($str, $pos, 1);
239
        }
240
241
        return (string) self::substr($str, $pos, 1, $encoding);
242
    }
243
244
    /**
245
     * Prepends UTF-8 BOM character to the string and returns the whole string.
246
     *
247
     * INFO: If BOM already existed there, the Input string is returned.
248
     *
249
     * @param string $str <p>The input string.</p>
250
     *
251
     * @return string the output string that contains BOM
252
     */
253 2
    public static function add_bom_to_string(string $str): string
254
    {
255 2
        if (self::string_has_bom($str) === false) {
256 2
            $str = self::bom() . $str;
257
        }
258
259 2
        return $str;
260
    }
261
262
    /**
263
     * Changes all keys in an array.
264
     *
265
     * @param array  $array    <p>The array to work on</p>
266
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
267
     *                         or <strong>CASE_LOWER</strong> (default)</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @return string[] an array with its keys lower or uppercased
271
     */
272 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
273
    {
274
        if (
275 2
            $case !== \CASE_LOWER
276
            &&
277 2
            $case !== \CASE_UPPER
278
        ) {
279
            $case = \CASE_LOWER;
280
        }
281
282 2
        $return = [];
283 2
        foreach ($array as $key => &$value) {
284 2
            $key = $case === \CASE_LOWER
285 2
                ? self::strtolower((string) $key, $encoding)
286 2
                : self::strtoupper((string) $key, $encoding);
287
288 2
            $return[$key] = $value;
289
        }
290
291 2
        return $return;
292
    }
293
294
    /**
295
     * Returns the substring between $start and $end, if found, or an empty
296
     * string. An optional offset may be supplied from which to begin the
297
     * search for the start string.
298
     *
299
     * @param string $str
300
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
301
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
302
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
303
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
304
     *
305
     * @return string
306
     */
307 16
    public static function between(
308
        string $str,
309
        string $start,
310
        string $end,
311
        int $offset = 0,
312
        string $encoding = 'UTF-8'
313
    ): string {
314 16
        if ($encoding === 'UTF-8') {
315 8
            $posStart = \mb_strpos($str, $start, $offset);
316 8
            if ($posStart === false) {
317 1
                return '';
318
            }
319
320 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
321 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
322
            if (
323 7
                $posEnd === false
324
                ||
325 7
                $posEnd === $substrIndex
326
            ) {
327 2
                return '';
328
            }
329
330 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
331
        }
332
333 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
334
335 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
336 8
        if ($posStart === false) {
337 1
            return '';
338
        }
339
340 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
341 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
342
        if (
343 7
            $posEnd === false
344
            ||
345 7
            $posEnd === $substrIndex
346
        ) {
347 2
            return '';
348
        }
349
350 5
        return (string) self::substr(
351 5
            $str,
352 5
            $substrIndex,
353 5
            $posEnd - $substrIndex,
354 5
            $encoding
355
        );
356
    }
357
358
    /**
359
     * Convert binary into an string.
360
     *
361
     * @param mixed $bin 1|0
362
     *
363
     * @return string
364
     */
365 2
    public static function binary_to_str($bin): string
366
    {
367 2
        if (!isset($bin[0])) {
368
            return '';
369
        }
370
371 2
        $convert = \base_convert($bin, 2, 16);
372 2
        if ($convert === '0') {
373 1
            return '';
374
        }
375
376 2
        return \pack('H*', $convert);
377
    }
378
379
    /**
380
     * Returns the UTF-8 Byte Order Mark Character.
381
     *
382
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
383
     *
384
     * @return string UTF-8 Byte Order Mark
385
     */
386 4
    public static function bom(): string
387
    {
388 4
        return "\xef\xbb\xbf";
389
    }
390
391
    /**
392
     * @alias of UTF8::chr_map()
393
     *
394
     * @see   UTF8::chr_map()
395
     *
396
     * @param array|string $callback
397
     * @param string       $str
398
     *
399
     * @return string[]
400
     */
401 2
    public static function callback($callback, string $str): array
402
    {
403 2
        return self::chr_map($callback, $str);
404
    }
405
406
    /**
407
     * Returns the character at $index, with indexes starting at 0.
408
     *
409
     * @param string $str      <p>The input string.</p>
410
     * @param int    $index    <p>Position of the character.</p>
411
     * @param string $encoding [optional] <p>Default is UTF-8</p>
412
     *
413
     * @return string the character at $index
414
     */
415 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
416
    {
417 9
        if ($encoding === 'UTF-8') {
418 5
            return (string) \mb_substr($str, $index, 1);
419
        }
420
421 4
        return (string) self::substr($str, $index, 1, $encoding);
422
    }
423
424
    /**
425
     * Returns an array consisting of the characters in the string.
426
     *
427
     * @param string $str <p>The input string.</p>
428
     *
429
     * @return string[] an array of chars
430
     */
431 3
    public static function chars(string $str): array
432
    {
433 3
        return self::str_split($str);
434
    }
435
436
    /**
437
     * This method will auto-detect your server environment for UTF-8 support.
438
     *
439
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
440
     */
441 5
    public static function checkForSupport()
442
    {
443 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
445
446
            // http://php.net/manual/en/book.mbstring.php
447
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
448
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
449
            if (self::$SUPPORT['mbstring'] === true) {
450
                \mb_internal_encoding('UTF-8');
451
                /** @noinspection UnusedFunctionResultInspection */
452
                /** @noinspection PhpComposerExtensionStubsInspection */
453
                \mb_regex_encoding('UTF-8');
454
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
455
            }
456
457
            // http://php.net/manual/en/book.iconv.php
458
            self::$SUPPORT['iconv'] = self::iconv_loaded();
459
460
            // http://php.net/manual/en/book.intl.php
461
            self::$SUPPORT['intl'] = self::intl_loaded();
462
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
463
464
            if (
465
                self::$SUPPORT['intl'] === true
466
                &&
467
                \function_exists('transliterator_list_ids') === true
468
            ) {
469
                /** @noinspection PhpComposerExtensionStubsInspection */
470
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
471
            }
472
473
            // http://php.net/manual/en/class.intlchar.php
474
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
475
476
            // http://php.net/manual/en/book.ctype.php
477
            self::$SUPPORT['ctype'] = self::ctype_loaded();
478
479
            // http://php.net/manual/en/class.finfo.php
480
            self::$SUPPORT['finfo'] = self::finfo_loaded();
481
482
            // http://php.net/manual/en/book.json.php
483
            self::$SUPPORT['json'] = self::json_loaded();
484
485
            // http://php.net/manual/en/book.pcre.php
486
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
487
488
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
489
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
490
                \mb_internal_encoding('UTF-8');
491
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
492
            }
493
        }
494 5
    }
495
496
    /**
497
     * Generates a UTF-8 encoded character from the given code point.
498
     *
499
     * INFO: opposite to UTF8::ord()
500
     *
501
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
502
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
503
     *
504
     * @return string|null multi-byte character, returns null on failure or empty input
505
     */
506 17
    public static function chr($code_point, string $encoding = 'UTF-8')
507
    {
508
        // init
509 17
        static $CHAR_CACHE = [];
510
511 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
512 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
513
        }
514
515
        if (
516 17
            $encoding !== 'UTF-8'
517
            &&
518 17
            $encoding !== 'ISO-8859-1'
519
            &&
520 17
            $encoding !== 'WINDOWS-1252'
521
            &&
522 17
            self::$SUPPORT['mbstring'] === false
523
        ) {
524
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
525
        }
526
527 17
        $cacheKey = $code_point . $encoding;
528 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
529 16
            return $CHAR_CACHE[$cacheKey];
530
        }
531
532 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
533
534 10
            if (self::$CHR === null) {
535
                self::$CHR = (array) self::getData('chr');
536
            }
537
538
            /**
539
             * @psalm-suppress PossiblyNullArrayAccess
540
             */
541 10
            $chr = self::$CHR[$code_point];
542
543 10
            if ($encoding !== 'UTF-8') {
544 1
                $chr = self::encode($encoding, $chr);
545
            }
546
547 10
            return $CHAR_CACHE[$cacheKey] = $chr;
548
        }
549
550
        //
551
        // fallback via "IntlChar"
552
        //
553
554 7
        if (self::$SUPPORT['intlChar'] === true) {
555
            /** @noinspection PhpComposerExtensionStubsInspection */
556 7
            $chr = \IntlChar::chr($code_point);
557
558 7
            if ($encoding !== 'UTF-8') {
559
                $chr = self::encode($encoding, $chr);
560
            }
561
562 7
            return $CHAR_CACHE[$cacheKey] = $chr;
563
        }
564
565
        //
566
        // fallback via vanilla php
567
        //
568
569
        if (self::$CHR === null) {
570
            self::$CHR = (array) self::getData('chr');
571
        }
572
573
        $code_point = (int) $code_point;
574
        if ($code_point <= 0x7F) {
575
            /**
576
             * @psalm-suppress PossiblyNullArrayAccess
577
             */
578
            $chr = self::$CHR[$code_point];
579
        } elseif ($code_point <= 0x7FF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
584
                   self::$CHR[($code_point & 0x3F) + 0x80];
585
        } elseif ($code_point <= 0xFFFF) {
586
            /**
587
             * @psalm-suppress PossiblyNullArrayAccess
588
             */
589
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
590
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
591
                   self::$CHR[($code_point & 0x3F) + 0x80];
592
        } else {
593
            /**
594
             * @psalm-suppress PossiblyNullArrayAccess
595
             */
596
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
597
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
598
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
599
                   self::$CHR[($code_point & 0x3F) + 0x80];
600
        }
601
602
        if ($encoding !== 'UTF-8') {
603
            $chr = self::encode($encoding, $chr);
604
        }
605
606
        return $CHAR_CACHE[$cacheKey] = $chr;
607
    }
608
609
    /**
610
     * Applies callback to all characters of a string.
611
     *
612
     * @param array|string $callback <p>The callback function.</p>
613
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
614
     *
615
     * @return string[] the outcome of callback
616
     */
617 2
    public static function chr_map($callback, string $str): array
618
    {
619 2
        return \array_map(
620 2
            $callback,
621 2
            self::str_split($str)
622
        );
623
    }
624
625
    /**
626
     * Generates an array of byte length of each character of a Unicode string.
627
     *
628
     * 1 byte => U+0000  - U+007F
629
     * 2 byte => U+0080  - U+07FF
630
     * 3 byte => U+0800  - U+FFFF
631
     * 4 byte => U+10000 - U+10FFFF
632
     *
633
     * @param string $str <p>The original unicode string.</p>
634
     *
635
     * @return int[] an array of byte lengths of each character
636
     */
637 4
    public static function chr_size_list(string $str): array
638
    {
639 4
        if ($str === '') {
640 4
            return [];
641
        }
642
643 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
644
            return \array_map(
645
                static function (string $data): int {
646
                    // "mb_" is available if overload is used, so use it ...
647
                    return \mb_strlen($data, 'CP850'); // 8-BIT
648
                },
649
                self::str_split($str)
650
            );
651
        }
652
653 4
        return \array_map('\strlen', self::str_split($str));
654
    }
655
656
    /**
657
     * Get a decimal code representation of a specific character.
658
     *
659
     * @param string $char <p>The input character.</p>
660
     *
661
     * @return int
662
     */
663 4
    public static function chr_to_decimal(string $char): int
664
    {
665 4
        $code = self::ord($char[0]);
666 4
        $bytes = 1;
667
668 4
        if (!($code & 0x80)) {
669
            // 0xxxxxxx
670 4
            return $code;
671
        }
672
673 4
        if (($code & 0xe0) === 0xc0) {
674
            // 110xxxxx
675 4
            $bytes = 2;
676 4
            $code &= ~0xc0;
677 4
        } elseif (($code & 0xf0) === 0xe0) {
678
            // 1110xxxx
679 4
            $bytes = 3;
680 4
            $code &= ~0xe0;
681 2
        } elseif (($code & 0xf8) === 0xf0) {
682
            // 11110xxx
683 2
            $bytes = 4;
684 2
            $code &= ~0xf0;
685
        }
686
687 4
        for ($i = 2; $i <= $bytes; ++$i) {
688
            // 10xxxxxx
689 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
690
        }
691
692 4
        return $code;
693
    }
694
695
    /**
696
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
697
     *
698
     * @param int|string $char <p>The input character</p>
699
     * @param string     $pfix [optional]
700
     *
701
     * @return string The code point encoded as U+xxxx
702
     */
703 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
704
    {
705 2
        if ($char === '') {
706 2
            return '';
707
        }
708
709 2
        if ($char === '&#0;') {
710 2
            $char = '';
711
        }
712
713 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
714
    }
715
716
    /**
717
     * alias for "UTF8::chr_to_decimal()"
718
     *
719
     * @see UTF8::chr_to_decimal()
720
     *
721
     * @param string $chr
722
     *
723
     * @return int
724
     */
725 2
    public static function chr_to_int(string $chr): int
726
    {
727 2
        return self::chr_to_decimal($chr);
728
    }
729
730
    /**
731
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
732
     *
733
     * @param string $body     <p>The original string to be split.</p>
734
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
735
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
736
     *
737
     * @return string the chunked string
738
     */
739 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
740
    {
741 4
        return \implode($end, self::str_split($body, $chunklen));
742
    }
743
744
    /**
745
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
746
     *
747
     * @param string $str                           <p>The string to be sanitized.</p>
748
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
749
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
750
     *                                              whitespace.</p>
751
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
752
     *                                              e.g.: "…"
753
     *                                              => "..."</p>
754
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
755
     *                                              combination with
756
     *                                              $normalize_whitespace</p>
757
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
758
     *                                              mark e.g.: "�"</p>
759
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
760
     *                                              characters e.g.: "\0"</p>
761
     *
762
     * @return string clean UTF-8 encoded string
763
     */
764 111
    public static function clean(
765
        string $str,
766
        bool $remove_bom = false,
767
        bool $normalize_whitespace = false,
768
        bool $normalize_msword = false,
769
        bool $keep_non_breaking_space = false,
770
        bool $replace_diamond_question_mark = false,
771
        bool $remove_invisible_characters = true
772
    ): string {
773
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
774
        // caused connection reset problem on larger strings
775
776 111
        $regx = '/
777
          (
778
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
779
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
780
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
781
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
782
            ){1,100}                      # ...one or more times
783
          )
784
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
785
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
786
        /x';
787 111
        $str = (string) \preg_replace($regx, '$1', $str);
788
789 111
        if ($replace_diamond_question_mark === true) {
790 60
            $str = self::replace_diamond_question_mark($str, '');
791
        }
792
793 111
        if ($remove_invisible_characters === true) {
794 111
            $str = self::remove_invisible_characters($str);
795
        }
796
797 111
        if ($normalize_whitespace === true) {
798 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
799
        }
800
801 111
        if ($normalize_msword === true) {
802 32
            $str = self::normalize_msword($str);
803
        }
804
805 111
        if ($remove_bom === true) {
806 62
            $str = self::remove_bom($str);
807
        }
808
809 111
        return $str;
810
    }
811
812
    /**
813
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
814
     *
815
     * @param string $str <p>The input string.</p>
816
     *
817
     * @return string
818
     */
819 33
    public static function cleanup($str): string
820
    {
821
        // init
822 33
        $str = (string) $str;
823
824 33
        if ($str === '') {
825 5
            return '';
826
        }
827
828
        // fixed ISO <-> UTF-8 Errors
829 33
        $str = self::fix_simple_utf8($str);
830
831
        // remove all none UTF-8 symbols
832
        // && remove diamond question mark (�)
833
        // && remove remove invisible characters (e.g. "\0")
834
        // && remove BOM
835
        // && normalize whitespace chars (but keep non-breaking-spaces)
836 33
        return self::clean(
837 33
            $str,
838 33
            true,
839 33
            true,
840 33
            false,
841 33
            true,
842 33
            true,
843 33
            true
844
        );
845
    }
846
847
    /**
848
     * Accepts a string or a array of strings and returns an array of Unicode code points.
849
     *
850
     * INFO: opposite to UTF8::string()
851
     *
852
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
853
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
854
     *                                 default, code points will be returned as integers.</p>
855
     *
856
     * @return array<int|string>
857
     *                           The array of code points:<br>
858
     *                           array<int> for $u_style === false<br>
859
     *                           array<string> for $u_style === true<br>
860
     */
861 12
    public static function codepoints($arg, bool $u_style = false): array
862
    {
863 12
        if (\is_string($arg) === true) {
864 12
            $arg = self::str_split($arg);
865
        }
866
867 12
        $arg = \array_map(
868
            [
869 12
                self::class,
870
                'ord',
871
            ],
872 12
            $arg
873
        );
874
875 12
        if (\count($arg) === 0) {
876 7
            return [];
877
        }
878
879 11
        if ($u_style === true) {
880 2
            $arg = \array_map(
881
                [
882 2
                    self::class,
883
                    'int_to_hex',
884
                ],
885 2
                $arg
886
            );
887
        }
888
889 11
        return $arg;
890
    }
891
892
    /**
893
     * Trims the string and replaces consecutive whitespace characters with a
894
     * single space. This includes tabs and newline characters, as well as
895
     * multibyte whitespace such as the thin space and ideographic space.
896
     *
897
     * @param string $str <p>The input string.</p>
898
     *
899
     * @return string string with a trimmed $str and condensed whitespace
900
     */
901 13
    public static function collapse_whitespace(string $str): string
902
    {
903 13
        if (self::$SUPPORT['mbstring'] === true) {
904
            /** @noinspection PhpComposerExtensionStubsInspection */
905 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
906
        }
907
908
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
909
    }
910
911
    /**
912
     * Returns count of characters used in a string.
913
     *
914
     * @param string $str                <p>The input string.</p>
915
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
916
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
917
     *
918
     * @return int[] an associative array of Character as keys and
919
     *               their count as values
920
     */
921 19
    public static function count_chars(
922
        string $str,
923
        bool $cleanUtf8 = false,
924
        bool $tryToUseMbFunction = true
925
    ): array {
926 19
        return \array_count_values(
927 19
            self::str_split(
928 19
                $str,
929 19
                1,
930 19
                $cleanUtf8,
931 19
                $tryToUseMbFunction
932
            )
933
        );
934
    }
935
936
    /**
937
     * Remove css media-queries.
938
     *
939
     * @param string $str
940
     *
941
     * @return string
942
     */
943 1
    public static function css_stripe_media_queries(string $str): string
944
    {
945 1
        return (string) \preg_replace(
946 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
947 1
            '',
948 1
            $str
949
        );
950
    }
951
952
    /**
953
     * Checks whether ctype is available on the server.
954
     *
955
     * @return bool
956
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
957
     */
958
    public static function ctype_loaded(): bool
959
    {
960
        return \extension_loaded('ctype');
961
    }
962
963
    /**
964
     * Converts a int-value into an UTF-8 character.
965
     *
966
     * @param mixed $int
967
     *
968
     * @return string
969
     */
970 10
    public static function decimal_to_chr($int): string
971
    {
972 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
973
    }
974
975
    /**
976
     * Decodes a MIME header field
977
     *
978
     * @param string $str
979
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
980
     *
981
     * @return false|string
982
     *                      A decoded MIME field on success,
983
     *                      or false if an error occurs during the decoding
984
     */
985
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
986
    {
987
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
988
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
989
        }
990
991
        if (self::$SUPPORT['iconv'] === true) {
992
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
993
        }
994
995
        if ($encoding !== 'UTF-8') {
996
            $str = self::encode($encoding, $str);
997
        }
998
999
        return \mb_decode_mimeheader($str);
1000
    }
1001
1002
    /**
1003
     * Encode a string with a new charset-encoding.
1004
     *
1005
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1006
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1007
     *
1008
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1009
     * @param string $str                    <p>The input string</p>
1010
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1011
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1012
     *                                       string-encoding</p>
1013
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1014
     *                                       A empty string will trigger the autodetect anyway.</p>
1015
     *
1016
     * @return string
1017
     *
1018
     * @psalm-suppress InvalidReturnStatement
1019
     */
1020 28
    public static function encode(
1021
        string $toEncoding,
1022
        string $str,
1023
        bool $autodetectFromEncoding = true,
1024
        string $fromEncoding = ''
1025
    ): string {
1026 28
        if ($str === '' || $toEncoding === '') {
1027 13
            return $str;
1028
        }
1029
1030 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1031 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1032
        }
1033
1034 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1035 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1036
        }
1037
1038
        if (
1039 28
            $toEncoding
1040
            &&
1041 28
            $fromEncoding
1042
            &&
1043 28
            $fromEncoding === $toEncoding
1044
        ) {
1045
            return $str;
1046
        }
1047
1048 28
        if ($toEncoding === 'JSON') {
1049 1
            $return = self::json_encode($str);
1050 1
            if ($return === false) {
1051
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1052
            }
1053
1054 1
            return $return;
1055
        }
1056 28
        if ($fromEncoding === 'JSON') {
1057 1
            $str = self::json_decode($str);
1058 1
            $fromEncoding = '';
1059
        }
1060
1061 28
        if ($toEncoding === 'BASE64') {
1062 2
            return \base64_encode($str);
1063
        }
1064 28
        if ($fromEncoding === 'BASE64') {
1065 2
            $str = \base64_decode($str, true);
1066 2
            $fromEncoding = '';
1067
        }
1068
1069 28
        if ($toEncoding === 'HTML-ENTITIES') {
1070 2
            return self::html_encode($str, true, 'UTF-8');
1071
        }
1072 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1073 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1074 2
            $fromEncoding = '';
1075
        }
1076
1077 28
        $fromEncodingDetected = false;
1078
        if (
1079 28
            $autodetectFromEncoding === true
1080
            ||
1081 28
            !$fromEncoding
1082
        ) {
1083 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1084
        }
1085
1086
        // DEBUG
1087
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1088
1089 28
        if ($fromEncodingDetected !== false) {
1090 24
            $fromEncoding = $fromEncodingDetected;
1091 7
        } elseif ($autodetectFromEncoding === true) {
1092
            // fallback for the "autodetect"-mode
1093 7
            return self::to_utf8($str);
1094
        }
1095
1096
        if (
1097 24
            !$fromEncoding
1098
            ||
1099 24
            $fromEncoding === $toEncoding
1100
        ) {
1101 15
            return $str;
1102
        }
1103
1104
        if (
1105 18
            $toEncoding === 'UTF-8'
1106
            &&
1107
            (
1108 16
                $fromEncoding === 'WINDOWS-1252'
1109
                ||
1110 18
                $fromEncoding === 'ISO-8859-1'
1111
            )
1112
        ) {
1113 13
            return self::to_utf8($str);
1114
        }
1115
1116
        if (
1117 11
            $toEncoding === 'ISO-8859-1'
1118
            &&
1119
            (
1120 6
                $fromEncoding === 'WINDOWS-1252'
1121
                ||
1122 11
                $fromEncoding === 'UTF-8'
1123
            )
1124
        ) {
1125 6
            return self::to_iso8859($str);
1126
        }
1127
1128
        if (
1129 9
            $toEncoding !== 'UTF-8'
1130
            &&
1131 9
            $toEncoding !== 'ISO-8859-1'
1132
            &&
1133 9
            $toEncoding !== 'WINDOWS-1252'
1134
            &&
1135 9
            self::$SUPPORT['mbstring'] === false
1136
        ) {
1137
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1138
        }
1139
1140 9
        if (self::$SUPPORT['mbstring'] === true) {
1141
            // warning: do not use the symfony polyfill here
1142 9
            $strEncoded = \mb_convert_encoding(
1143 9
                $str,
1144 9
                $toEncoding,
1145 9
                $fromEncoding
1146
            );
1147
1148 9
            if ($strEncoded) {
1149 9
                return $strEncoded;
1150
            }
1151
        }
1152
1153
        $return = \iconv($fromEncoding, $toEncoding, $str);
1154
        if ($return !== false) {
1155
            return $return;
1156
        }
1157
1158
        return $str;
1159
    }
1160
1161
    /**
1162
     * @param string $str
1163
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1164
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1165
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1166
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1167
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1168
     *
1169
     * @return false|string
1170
     *                      An encoded MIME field on success,
1171
     *                      or false if an error occurs during the encoding
1172
     */
1173
    public static function encode_mimeheader(
1174
        $str,
1175
        $fromCharset = 'UTF-8',
1176
        $toCharset = 'UTF-8',
1177
        $transferEncoding = 'Q',
1178
        $linefeed = "\r\n",
1179
        $indent = 76
1180
    ) {
1181
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1182
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1183
        }
1184
1185
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1186
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1187
        }
1188
1189
        return \iconv_mime_encode(
1190
            '',
1191
            $str,
1192
            [
1193
                'scheme'           => $transferEncoding,
1194
                'line-length'      => $indent,
1195
                'input-charset'    => $fromCharset,
1196
                'output-charset'   => $toCharset,
1197
                'line-break-chars' => $linefeed,
1198
            ]
1199
        );
1200
    }
1201
1202
    /**
1203
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1204
     *
1205
     * @param string   $str                    <p>The input string.</p>
1206
     * @param string   $search                 <p>The searched string.</p>
1207
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1208
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1209
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1210
     *
1211
     * @return string
1212
     */
1213 1
    public static function extract_text(
1214
        string $str,
1215
        string $search = '',
1216
        int $length = null,
1217
        string $replacerForSkippedText = '…',
1218
        string $encoding = 'UTF-8'
1219
    ): string {
1220 1
        if ($str === '') {
1221 1
            return '';
1222
        }
1223
1224 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1225
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1226
        }
1227
1228 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1229
1230 1
        if ($length === null) {
1231 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1232
        }
1233
1234 1
        if ($search === '') {
1235 1
            if ($encoding === 'UTF-8') {
1236 1
                if ($length > 0) {
1237 1
                    $stringLength = (int) \mb_strlen($str);
1238 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1239
                } else {
1240 1
                    $end = 0;
1241
                }
1242
1243 1
                $pos = (int) \min(
1244 1
                    \mb_strpos($str, ' ', $end),
1245 1
                    \mb_strpos($str, '.', $end)
1246
                );
1247
            } else {
1248
                if ($length > 0) {
1249
                    $stringLength = (int) self::strlen($str, $encoding);
1250
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1251
                } else {
1252
                    $end = 0;
1253
                }
1254
1255
                $pos = (int) \min(
1256
                    self::strpos($str, ' ', $end, $encoding),
1257
                    self::strpos($str, '.', $end, $encoding)
1258
                );
1259
            }
1260
1261 1
            if ($pos) {
1262 1
                if ($encoding === 'UTF-8') {
1263 1
                    $strSub = \mb_substr($str, 0, $pos);
1264
                } else {
1265
                    $strSub = self::substr($str, 0, $pos, $encoding);
1266
                }
1267
1268 1
                if ($strSub === false) {
1269
                    return '';
1270
                }
1271
1272 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1273
            }
1274
1275
            return $str;
1276
        }
1277
1278 1
        if ($encoding === 'UTF-8') {
1279 1
            $wordPos = (int) \mb_stripos($str, $search);
1280 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1281
        } else {
1282
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1283
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1284
        }
1285
1286 1
        $pos_start = 0;
1287 1
        if ($halfSide > 0) {
1288 1
            if ($encoding === 'UTF-8') {
1289 1
                $halfText = \mb_substr($str, 0, $halfSide);
1290
            } else {
1291
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1292
            }
1293 1
            if ($halfText !== false) {
1294 1
                if ($encoding === 'UTF-8') {
1295 1
                    $pos_start = (int) \max(
1296 1
                        \mb_strrpos($halfText, ' '),
1297 1
                        \mb_strrpos($halfText, '.')
1298
                    );
1299
                } else {
1300
                    $pos_start = (int) \max(
1301
                        self::strrpos($halfText, ' ', 0, $encoding),
1302
                        self::strrpos($halfText, '.', 0, $encoding)
1303
                    );
1304
                }
1305
            }
1306
        }
1307
1308 1
        if ($wordPos && $halfSide > 0) {
1309 1
            $offset = $pos_start + $length - 1;
1310 1
            $realLength = (int) self::strlen($str, $encoding);
1311
1312 1
            if ($offset > $realLength) {
1313
                $offset = $realLength;
1314
            }
1315
1316 1
            if ($encoding === 'UTF-8') {
1317 1
                $pos_end = (int) \min(
1318 1
                        \mb_strpos($str, ' ', $offset),
1319 1
                        \mb_strpos($str, '.', $offset)
1320 1
                    ) - $pos_start;
1321
            } else {
1322
                $pos_end = (int) \min(
1323
                        self::strpos($str, ' ', $offset, $encoding),
1324
                        self::strpos($str, '.', $offset, $encoding)
1325
                    ) - $pos_start;
1326
            }
1327
1328 1
            if (!$pos_end || $pos_end <= 0) {
1329 1
                if ($encoding === 'UTF-8') {
1330 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1331
                } else {
1332
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1333
                }
1334 1
                if ($strSub !== false) {
1335 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1336
                } else {
1337 1
                    $extract = '';
1338
                }
1339
            } else {
1340 1
                if ($encoding === 'UTF-8') {
1341 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1342
                } else {
1343
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1344
                }
1345 1
                if ($strSub !== false) {
1346 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1347
                } else {
1348 1
                    $extract = '';
1349
                }
1350
            }
1351
        } else {
1352 1
            $offset = $length - 1;
1353 1
            $trueLength = (int) self::strlen($str, $encoding);
1354
1355 1
            if ($offset > $trueLength) {
1356
                $offset = $trueLength;
1357
            }
1358
1359 1
            if ($encoding === 'UTF-8') {
1360 1
                $pos_end = (int) \min(
1361 1
                    \mb_strpos($str, ' ', $offset),
1362 1
                    \mb_strpos($str, '.', $offset)
1363
                );
1364
            } else {
1365
                $pos_end = (int) \min(
1366
                    self::strpos($str, ' ', $offset, $encoding),
1367
                    self::strpos($str, '.', $offset, $encoding)
1368
                );
1369
            }
1370
1371 1
            if ($pos_end) {
1372 1
                if ($encoding === 'UTF-8') {
1373 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1374
                } else {
1375
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1376
                }
1377 1
                if ($strSub !== false) {
1378 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1379
                } else {
1380 1
                    $extract = '';
1381
                }
1382
            } else {
1383 1
                $extract = $str;
1384
            }
1385
        }
1386
1387 1
        return $extract;
1388
    }
1389
1390
    /**
1391
     * Reads entire file into a string.
1392
     *
1393
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1394
     *
1395
     * @see http://php.net/manual/en/function.file-get-contents.php
1396
     *
1397
     * @param string        $filename         <p>
1398
     *                                        Name of the file to read.
1399
     *                                        </p>
1400
     * @param bool          $use_include_path [optional] <p>
1401
     *                                        Prior to PHP 5, this parameter is called
1402
     *                                        use_include_path and is a bool.
1403
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1404
     *                                        to trigger include path
1405
     *                                        search.
1406
     *                                        </p>
1407
     * @param resource|null $context          [optional] <p>
1408
     *                                        A valid context resource created with
1409
     *                                        stream_context_create. If you don't need to use a
1410
     *                                        custom context, you can skip this parameter by &null;.
1411
     *                                        </p>
1412
     * @param int|null      $offset           [optional] <p>
1413
     *                                        The offset where the reading starts.
1414
     *                                        </p>
1415
     * @param int|null      $maxLength        [optional] <p>
1416
     *                                        Maximum length of data read. The default is to read until end
1417
     *                                        of file is reached.
1418
     *                                        </p>
1419
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1420
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1421
     *                                        some files, because they used non default utf-8 chars. Binary files
1422
     *                                        like images or pdf will not be converted.</p>
1423
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1424
     *                                        A empty string will trigger the autodetect anyway.</p>
1425
     *
1426
     * @return false|string the function returns the read data or false on failure
1427
     */
1428 12
    public static function file_get_contents(
1429
        string $filename,
1430
        bool $use_include_path = false,
1431
        $context = null,
1432
        int $offset = null,
1433
        int $maxLength = null,
1434
        int $timeout = 10,
1435
        bool $convertToUtf8 = true,
1436
        string $fromEncoding = ''
1437
    ) {
1438
        // init
1439 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1440
1441 12
        if ($timeout && $context === null) {
1442 9
            $context = \stream_context_create(
1443
                [
1444
                    'http' => [
1445 9
                        'timeout' => $timeout,
1446
                    ],
1447
                ]
1448
            );
1449
        }
1450
1451 12
        if ($offset === null) {
1452 12
            $offset = 0;
1453
        }
1454
1455 12
        if (\is_int($maxLength) === true) {
1456 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1457
        } else {
1458 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1459
        }
1460
1461
        // return false on error
1462 12
        if ($data === false) {
1463
            return false;
1464
        }
1465
1466 12
        if ($convertToUtf8 === true) {
1467
            if (
1468 12
                self::is_binary($data, true) === true
1469
                &&
1470 12
                self::is_utf16($data, false) === false
1471
                &&
1472 12
                self::is_utf32($data, false) === false
1473 7
            ) {
1474
                // do nothing, it's binary and not UTF16 or UTF32
1475
            } else {
1476 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1477 9
                $data = self::cleanup($data);
1478
            }
1479
        }
1480
1481 12
        return $data;
1482
    }
1483
1484
    /**
1485
     * Checks if a file starts with BOM (Byte Order Mark) character.
1486
     *
1487
     * @param string $file_path <p>Path to a valid file.</p>
1488
     *
1489
     * @throws \RuntimeException if file_get_contents() returned false
1490
     *
1491
     * @return bool
1492
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1493
     */
1494 2
    public static function file_has_bom(string $file_path): bool
1495
    {
1496 2
        $file_content = \file_get_contents($file_path);
1497 2
        if ($file_content === false) {
1498
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1499
        }
1500
1501 2
        return self::string_has_bom($file_content);
1502
    }
1503
1504
    /**
1505
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1506
     *
1507
     * @param mixed  $var
1508
     * @param int    $normalization_form
1509
     * @param string $leading_combining
1510
     *
1511
     * @return mixed
1512
     */
1513 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1514
    {
1515 62
        switch (\gettype($var)) {
1516 62
            case 'array':
1517 6
                foreach ($var as $k => &$v) {
1518 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1519
                }
1520 6
                unset($v);
1521
1522 6
                break;
1523 62
            case 'object':
1524 4
                foreach ($var as $k => &$v) {
1525 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1526
                }
1527 4
                unset($v);
1528
1529 4
                break;
1530 62
            case 'string':
1531
1532 62
                if (\strpos($var, "\r") !== false) {
1533
                    // Workaround https://bugs.php.net/65732
1534 3
                    $var = self::normalize_line_ending($var);
1535
                }
1536
1537 62
                if (self::is_ascii($var) === false) {
1538 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1539 27
                        $n = '-';
1540
                    } else {
1541 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1542
1543 12
                        if (isset($n[0])) {
1544 7
                            $var = $n;
1545
                        } else {
1546 8
                            $var = self::encode('UTF-8', $var, true);
1547
                        }
1548
                    }
1549
1550
                    if (
1551 32
                        $var[0] >= "\x80"
1552
                        &&
1553 32
                        isset($n[0], $leading_combining[0])
1554
                        &&
1555 32
                        \preg_match('/^\p{Mn}/u', $var)
1556
                    ) {
1557
                        // Prevent leading combining chars
1558
                        // for NFC-safe concatenations.
1559 3
                        $var = $leading_combining . $var;
1560
                    }
1561
                }
1562
1563 62
                break;
1564
        }
1565
1566 62
        return $var;
1567
    }
1568
1569
    /**
1570
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1571
     *
1572
     * Gets a specific external variable by name and optionally filters it
1573
     *
1574
     * @see  http://php.net/manual/en/function.filter-input.php
1575
     *
1576
     * @param int    $type          <p>
1577
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1578
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1579
     *                              <b>INPUT_ENV</b>.
1580
     *                              </p>
1581
     * @param string $variable_name <p>
1582
     *                              Name of a variable to get.
1583
     *                              </p>
1584
     * @param int    $filter        [optional] <p>
1585
     *                              The ID of the filter to apply. The
1586
     *                              manual page lists the available filters.
1587
     *                              </p>
1588
     * @param mixed  $options       [optional] <p>
1589
     *                              Associative array of options or bitwise disjunction of flags. If filter
1590
     *                              accepts options, flags can be provided in "flags" field of array.
1591
     *                              </p>
1592
     *
1593
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1594
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1595
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1596
     */
1597
    public static function filter_input(
1598
        int $type,
1599
        string $variable_name,
1600
        int $filter = \FILTER_DEFAULT,
1601
        $options = null
1602
    ) {
1603
        if (\func_num_args() < 4) {
1604
            $var = \filter_input($type, $variable_name, $filter);
1605
        } else {
1606
            $var = \filter_input($type, $variable_name, $filter, $options);
1607
        }
1608
1609
        return self::filter($var);
1610
    }
1611
1612
    /**
1613
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1614
     *
1615
     * Gets external variables and optionally filters them
1616
     *
1617
     * @see  http://php.net/manual/en/function.filter-input-array.php
1618
     *
1619
     * @param int   $type       <p>
1620
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1621
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1622
     *                          <b>INPUT_ENV</b>.
1623
     *                          </p>
1624
     * @param mixed $definition [optional] <p>
1625
     *                          An array defining the arguments. A valid key is a string
1626
     *                          containing a variable name and a valid value is either a filter type, or an array
1627
     *                          optionally specifying the filter, flags and options. If the value is an
1628
     *                          array, valid keys are filter which specifies the
1629
     *                          filter type,
1630
     *                          flags which specifies any flags that apply to the
1631
     *                          filter, and options which specifies any options that
1632
     *                          apply to the filter. See the example below for a better understanding.
1633
     *                          </p>
1634
     *                          <p>
1635
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1636
     *                          input array are filtered by this filter.
1637
     *                          </p>
1638
     * @param bool  $add_empty  [optional] <p>
1639
     *                          Add missing keys as <b>NULL</b> to the return value.
1640
     *                          </p>
1641
     *
1642
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1643
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1644
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1645
     *               is not set and <b>NULL</b> if the filter fails.
1646
     */
1647
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1648
    {
1649
        if (\func_num_args() < 2) {
1650
            $a = \filter_input_array($type);
1651
        } else {
1652
            $a = \filter_input_array($type, $definition, $add_empty);
1653
        }
1654
1655
        return self::filter($a);
1656
    }
1657
1658
    /**
1659
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1660
     *
1661
     * Filters a variable with a specified filter
1662
     *
1663
     * @see  http://php.net/manual/en/function.filter-var.php
1664
     *
1665
     * @param mixed $variable <p>
1666
     *                        Value to filter.
1667
     *                        </p>
1668
     * @param int   $filter   [optional] <p>
1669
     *                        The ID of the filter to apply. The
1670
     *                        manual page lists the available filters.
1671
     *                        </p>
1672
     * @param mixed $options  [optional] <p>
1673
     *                        Associative array of options or bitwise disjunction of flags. If filter
1674
     *                        accepts options, flags can be provided in "flags" field of array. For
1675
     *                        the "callback" filter, callable type should be passed. The
1676
     *                        callback must accept one argument, the value to be filtered, and return
1677
     *                        the value after filtering/sanitizing it.
1678
     *                        </p>
1679
     *                        <p>
1680
     *                        <code>
1681
     *                        // for filters that accept options, use this format
1682
     *                        $options = array(
1683
     *                        'options' => array(
1684
     *                        'default' => 3, // value to return if the filter fails
1685
     *                        // other options here
1686
     *                        'min_range' => 0
1687
     *                        ),
1688
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1689
     *                        );
1690
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1691
     *                        // for filter that only accept flags, you can pass them directly
1692
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1693
     *                        // for filter that only accept flags, you can also pass as an array
1694
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1695
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1696
     *                        // callback validate filter
1697
     *                        function foo($value)
1698
     *                        {
1699
     *                        // Expected format: Surname, GivenNames
1700
     *                        if (strpos($value, ", ") === false) return false;
1701
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1702
     *                        $empty = (empty($surname) || empty($givennames));
1703
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1704
     *                        if ($empty || $notstrings) {
1705
     *                        return false;
1706
     *                        } else {
1707
     *                        return $value;
1708
     *                        }
1709
     *                        }
1710
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1711
     *                        </code>
1712
     *                        </p>
1713
     *
1714
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1715
     */
1716 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1717
    {
1718 2
        if (\func_num_args() < 3) {
1719 2
            $variable = \filter_var($variable, $filter);
1720
        } else {
1721 2
            $variable = \filter_var($variable, $filter, $options);
1722
        }
1723
1724 2
        return self::filter($variable);
1725
    }
1726
1727
    /**
1728
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1729
     *
1730
     * Gets multiple variables and optionally filters them
1731
     *
1732
     * @see  http://php.net/manual/en/function.filter-var-array.php
1733
     *
1734
     * @param array $data       <p>
1735
     *                          An array with string keys containing the data to filter.
1736
     *                          </p>
1737
     * @param mixed $definition [optional] <p>
1738
     *                          An array defining the arguments. A valid key is a string
1739
     *                          containing a variable name and a valid value is either a
1740
     *                          filter type, or an
1741
     *                          array optionally specifying the filter, flags and options.
1742
     *                          If the value is an array, valid keys are filter
1743
     *                          which specifies the filter type,
1744
     *                          flags which specifies any flags that apply to the
1745
     *                          filter, and options which specifies any options that
1746
     *                          apply to the filter. See the example below for a better understanding.
1747
     *                          </p>
1748
     *                          <p>
1749
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1750
     *                          input array are filtered by this filter.
1751
     *                          </p>
1752
     * @param bool  $add_empty  [optional] <p>
1753
     *                          Add missing keys as <b>NULL</b> to the return value.
1754
     *                          </p>
1755
     *
1756
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1757
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1758
     *               set
1759
     */
1760 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1761
    {
1762 2
        if (\func_num_args() < 2) {
1763 2
            $a = \filter_var_array($data);
1764
        } else {
1765 2
            $a = \filter_var_array($data, $definition, $add_empty);
1766
        }
1767
1768 2
        return self::filter($a);
1769
    }
1770
1771
    /**
1772
     * Checks whether finfo is available on the server.
1773
     *
1774
     * @return bool
1775
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1776
     */
1777
    public static function finfo_loaded(): bool
1778
    {
1779
        return \class_exists('finfo');
1780
    }
1781
1782
    /**
1783
     * Returns the first $n characters of the string.
1784
     *
1785
     * @param string $str      <p>The input string.</p>
1786
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1787
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1788
     *
1789
     * @return string
1790
     */
1791 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1792
    {
1793 13
        if ($str === '' || $n <= 0) {
1794 5
            return '';
1795
        }
1796
1797 8
        if ($encoding === 'UTF-8') {
1798 4
            return (string) \mb_substr($str, 0, $n);
1799
        }
1800
1801 4
        return (string) self::substr($str, 0, $n, $encoding);
1802
    }
1803
1804
    /**
1805
     * Check if the number of unicode characters are not more than the specified integer.
1806
     *
1807
     * @param string $str      the original string to be checked
1808
     * @param int    $box_size the size in number of chars to be checked against string
1809
     *
1810
     * @return bool true if string is less than or equal to $box_size, false otherwise
1811
     */
1812 2
    public static function fits_inside(string $str, int $box_size): bool
1813
    {
1814 2
        return self::strlen($str) <= $box_size;
1815
    }
1816
1817
    /**
1818
     * Try to fix simple broken UTF-8 strings.
1819
     *
1820
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1821
     *
1822
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1823
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1824
     * See: http://en.wikipedia.org/wiki/Windows-1252
1825
     *
1826
     * @param string $str <p>The input string</p>
1827
     *
1828
     * @return string
1829
     */
1830 42
    public static function fix_simple_utf8(string $str): string
1831
    {
1832 42
        if ($str === '') {
1833 4
            return '';
1834
        }
1835
1836 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1837 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1838
1839 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1840 1
            if (self::$BROKEN_UTF8_FIX === null) {
1841 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1842
            }
1843
1844 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1845 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1846
        }
1847
1848 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1849
    }
1850
1851
    /**
1852
     * Fix a double (or multiple) encoded UTF8 string.
1853
     *
1854
     * @param string|string[] $str you can use a string or an array of strings
1855
     *
1856
     * @return string|string[]
1857
     *                         Will return the fixed input-"array" or
1858
     *                         the fixed input-"string"
1859
     *
1860
     * @psalm-suppress InvalidReturnType
1861
     */
1862 2
    public static function fix_utf8($str)
1863
    {
1864 2
        if (\is_array($str) === true) {
1865 2
            foreach ($str as $k => &$v) {
1866 2
                $v = self::fix_utf8($v);
1867
            }
1868 2
            unset($v);
1869
1870
            /**
1871
             * @psalm-suppress InvalidReturnStatement
1872
             */
1873 2
            return $str;
1874
        }
1875
1876 2
        $str = (string) $str;
1877 2
        $last = '';
1878 2
        while ($last !== $str) {
1879 2
            $last = $str;
1880
            /**
1881
             * @psalm-suppress PossiblyInvalidArgument
1882
             */
1883 2
            $str = self::to_utf8(
1884 2
                self::utf8_decode($str, true)
1885
            );
1886
        }
1887
1888
        /**
1889
         * @psalm-suppress InvalidReturnStatement
1890
         */
1891 2
        return $str;
1892
    }
1893
1894
    /**
1895
     * Get character of a specific character.
1896
     *
1897
     * @param string $char
1898
     *
1899
     * @return string 'RTL' or 'LTR'
1900
     */
1901 2
    public static function getCharDirection(string $char): string
1902
    {
1903 2
        if (self::$SUPPORT['intlChar'] === true) {
1904
            /** @noinspection PhpComposerExtensionStubsInspection */
1905 2
            $tmpReturn = \IntlChar::charDirection($char);
1906
1907
            // from "IntlChar"-Class
1908
            $charDirection = [
1909 2
                'RTL' => [1, 13, 14, 15, 21],
1910
                'LTR' => [0, 11, 12, 20],
1911
            ];
1912
1913 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1914
                return 'LTR';
1915
            }
1916
1917 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1918 2
                return 'RTL';
1919
            }
1920
        }
1921
1922 2
        $c = static::chr_to_decimal($char);
1923
1924 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1925 2
            return 'LTR';
1926
        }
1927
1928 2
        if ($c <= 0x85e) {
1929 2
            if ($c === 0x5be ||
1930 2
                $c === 0x5c0 ||
1931 2
                $c === 0x5c3 ||
1932 2
                $c === 0x5c6 ||
1933 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1934 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1935 2
                $c === 0x608 ||
1936 2
                $c === 0x60b ||
1937 2
                $c === 0x60d ||
1938 2
                $c === 0x61b ||
1939 2
                ($c >= 0x61e && $c <= 0x64a) ||
1940
                ($c >= 0x66d && $c <= 0x66f) ||
1941
                ($c >= 0x671 && $c <= 0x6d5) ||
1942
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1943
                ($c >= 0x6ee && $c <= 0x6ef) ||
1944
                ($c >= 0x6fa && $c <= 0x70d) ||
1945
                $c === 0x710 ||
1946
                ($c >= 0x712 && $c <= 0x72f) ||
1947
                ($c >= 0x74d && $c <= 0x7a5) ||
1948
                $c === 0x7b1 ||
1949
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1950
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1951
                $c === 0x7fa ||
1952
                ($c >= 0x800 && $c <= 0x815) ||
1953
                $c === 0x81a ||
1954
                $c === 0x824 ||
1955
                $c === 0x828 ||
1956
                ($c >= 0x830 && $c <= 0x83e) ||
1957
                ($c >= 0x840 && $c <= 0x858) ||
1958 2
                $c === 0x85e
1959
            ) {
1960 2
                return 'RTL';
1961
            }
1962 2
        } elseif ($c === 0x200f) {
1963
            return 'RTL';
1964 2
        } elseif ($c >= 0xfb1d) {
1965 2
            if ($c === 0xfb1d ||
1966 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1967 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1968 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1969 2
                $c === 0xfb3e ||
1970 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1971 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1972 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1973 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1974 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1975 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1976 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1977 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1978 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1979 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1980 2
                $c === 0x10808 ||
1981 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1982 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1983 2
                $c === 0x1083c ||
1984 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1985 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1986 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1987 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1988 2
                $c === 0x1093f ||
1989 2
                $c === 0x10a00 ||
1990 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1991 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1992 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1993 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1994 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1995 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1996 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1997 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1998 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1999 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2000
            ) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        return 'LTR';
2006
    }
2007
2008
    /**
2009
     * Check for php-support.
2010
     *
2011
     * @param string|null $key
2012
     *
2013
     * @return mixed
2014
     *               Return the full support-"array", if $key === null<br>
2015
     *               return bool-value, if $key is used and available<br>
2016
     *               otherwise return <strong>null</strong>
2017
     */
2018 26
    public static function getSupportInfo(string $key = null)
2019
    {
2020 26
        if ($key === null) {
2021 4
            return self::$SUPPORT;
2022
        }
2023
2024 24
        if (!isset(self::$SUPPORT[$key])) {
2025 2
            return null;
2026
        }
2027
2028 22
        return self::$SUPPORT[$key];
2029
    }
2030
2031
    /**
2032
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2033
     *          if you need more supported types, please use e.g. "finfo"
2034
     *
2035
     * @param string $str
2036
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2037
     *
2038
     * @return array
2039
     *               with this keys: 'ext', 'mime', 'type'
2040
     */
2041 39
    public static function get_file_type(
2042
        string $str,
2043
        array $fallback = [
2044
            'ext'  => null,
2045
            'mime' => 'application/octet-stream',
2046
            'type' => null,
2047
        ]
2048
    ): array {
2049 39
        if ($str === '') {
2050
            return $fallback;
2051
        }
2052
2053 39
        $str_info = \substr($str, 0, 2);
2054 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2055 11
            return $fallback;
2056
        }
2057
2058 35
        $str_info = \unpack('C2chars', $str_info);
2059 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2060
2061
        // DEBUG
2062
        //var_dump($type_code);
2063
2064
        switch ($type_code) {
2065 35
            case 3780:
2066 5
                $ext = 'pdf';
2067 5
                $mime = 'application/pdf';
2068 5
                $type = 'binary';
2069
2070 5
                break;
2071 35
            case 7790:
2072
                $ext = 'exe';
2073
                $mime = 'application/octet-stream';
2074
                $type = 'binary';
2075
2076
                break;
2077 35
            case 7784:
2078
                $ext = 'midi';
2079
                $mime = 'audio/x-midi';
2080
                $type = 'binary';
2081
2082
                break;
2083 35
            case 8075:
2084 7
                $ext = 'zip';
2085 7
                $mime = 'application/zip';
2086 7
                $type = 'binary';
2087
2088 7
                break;
2089 35
            case 8297:
2090
                $ext = 'rar';
2091
                $mime = 'application/rar';
2092
                $type = 'binary';
2093
2094
                break;
2095 35
            case 255216:
2096
                $ext = 'jpg';
2097
                $mime = 'image/jpeg';
2098
                $type = 'binary';
2099
2100
                break;
2101 35
            case 7173:
2102
                $ext = 'gif';
2103
                $mime = 'image/gif';
2104
                $type = 'binary';
2105
2106
                break;
2107 35
            case 6677:
2108
                $ext = 'bmp';
2109
                $mime = 'image/bmp';
2110
                $type = 'binary';
2111
2112
                break;
2113 35
            case 13780:
2114 7
                $ext = 'png';
2115 7
                $mime = 'image/png';
2116 7
                $type = 'binary';
2117
2118 7
                break;
2119
            default:
2120 32
                return $fallback;
2121
        }
2122
2123
        return [
2124 7
            'ext'  => $ext,
2125 7
            'mime' => $mime,
2126 7
            'type' => $type,
2127
        ];
2128
    }
2129
2130
    /**
2131
     * @param int    $length        <p>Length of the random string.</p>
2132
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2133
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2134
     *
2135
     * @return string
2136
     */
2137 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2138
    {
2139
        // init
2140 1
        $i = 0;
2141 1
        $str = '';
2142
2143
        //
2144
        // add random chars
2145
        //
2146
2147 1
        if ($encoding === 'UTF-8') {
2148 1
            $maxlength = (int) \mb_strlen($possibleChars);
2149 1
            if ($maxlength === 0) {
2150 1
                return '';
2151
            }
2152
2153 1
            while ($i < $length) {
2154
                try {
2155 1
                    $randInt = \random_int(0, $maxlength - 1);
2156
                } catch (\Exception $e) {
2157
                    /** @noinspection RandomApiMigrationInspection */
2158
                    $randInt = \mt_rand(0, $maxlength - 1);
2159
                }
2160 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2161 1
                if ($char !== false) {
2162 1
                    $str .= $char;
2163 1
                    ++$i;
2164
                }
2165
            }
2166
        } else {
2167
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2168
2169
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2170
            if ($maxlength === 0) {
2171
                return '';
2172
            }
2173
2174
            while ($i < $length) {
2175
                try {
2176
                    $randInt = \random_int(0, $maxlength - 1);
2177
                } catch (\Exception $e) {
2178
                    /** @noinspection RandomApiMigrationInspection */
2179
                    $randInt = \mt_rand(0, $maxlength - 1);
2180
                }
2181
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2182
                if ($char !== false) {
2183
                    $str .= $char;
2184
                    ++$i;
2185
                }
2186
            }
2187
        }
2188
2189 1
        return $str;
2190
    }
2191
2192
    /**
2193
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2194
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2195
     *
2196
     * @return string
2197
     */
2198 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2199
    {
2200 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2201 1
                        \session_id() .
2202 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2203 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2204 1
                        $entropyExtra;
2205
2206 1
        $uniqueString = \uniqid($uniqueHelper, true);
2207
2208 1
        if ($md5) {
2209 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2210
        }
2211
2212 1
        return $uniqueString;
2213
    }
2214
2215
    /**
2216
     * alias for "UTF8::string_has_bom()"
2217
     *
2218
     * @see        UTF8::string_has_bom()
2219
     *
2220
     * @param string $str
2221
     *
2222
     * @return bool
2223
     *
2224
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2225
     */
2226 2
    public static function hasBom(string $str): bool
2227
    {
2228 2
        return self::string_has_bom($str);
2229
    }
2230
2231
    /**
2232
     * Returns true if the string contains a lower case char, false otherwise.
2233
     *
2234
     * @param string $str <p>The input string.</p>
2235
     *
2236
     * @return bool whether or not the string contains a lower case character
2237
     */
2238 47
    public static function has_lowercase(string $str): bool
2239
    {
2240 47
        if (self::$SUPPORT['mbstring'] === true) {
2241
            /** @noinspection PhpComposerExtensionStubsInspection */
2242 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2243
        }
2244
2245
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2246
    }
2247
2248
    /**
2249
     * Returns true if the string contains an upper case char, false otherwise.
2250
     *
2251
     * @param string $str <p>The input string.</p>
2252
     *
2253
     * @return bool whether or not the string contains an upper case character
2254
     */
2255 12
    public static function has_uppercase(string $str): bool
2256
    {
2257 12
        if (self::$SUPPORT['mbstring'] === true) {
2258
            /** @noinspection PhpComposerExtensionStubsInspection */
2259 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2260
        }
2261
2262
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2263
    }
2264
2265
    /**
2266
     * Converts a hexadecimal-value into an UTF-8 character.
2267
     *
2268
     * @param string $hexdec <p>The hexadecimal value.</p>
2269
     *
2270
     * @return false|string one single UTF-8 character
2271
     */
2272 4
    public static function hex_to_chr(string $hexdec)
2273
    {
2274 4
        return self::decimal_to_chr(\hexdec($hexdec));
2275
    }
2276
2277
    /**
2278
     * Converts hexadecimal U+xxxx code point representation to integer.
2279
     *
2280
     * INFO: opposite to UTF8::int_to_hex()
2281
     *
2282
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2283
     *
2284
     * @return false|int the code point, or false on failure
2285
     */
2286 2
    public static function hex_to_int($hexDec)
2287
    {
2288
        // init
2289 2
        $hexDec = (string) $hexDec;
2290
2291 2
        if ($hexDec === '') {
2292 2
            return false;
2293
        }
2294
2295 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2296 2
            return \intval($match[1], 16);
2297
        }
2298
2299 2
        return false;
2300
    }
2301
2302
    /**
2303
     * alias for "UTF8::html_entity_decode()"
2304
     *
2305
     * @see UTF8::html_entity_decode()
2306
     *
2307
     * @param string $str
2308
     * @param int    $flags
2309
     * @param string $encoding
2310
     *
2311
     * @return string
2312
     */
2313 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2314
    {
2315 4
        return self::html_entity_decode($str, $flags, $encoding);
2316
    }
2317
2318
    /**
2319
     * Converts a UTF-8 string to a series of HTML numbered entities.
2320
     *
2321
     * INFO: opposite to UTF8::html_decode()
2322
     *
2323
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2324
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2325
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2326
     *
2327
     * @return string HTML numbered entities
2328
     */
2329 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2330
    {
2331 13
        if ($str === '') {
2332 4
            return '';
2333
        }
2334
2335 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2336 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2337
        }
2338
2339
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2340 13
        if (self::$SUPPORT['mbstring'] === true) {
2341 13
            $startCode = 0x00;
2342 13
            if ($keepAsciiChars === true) {
2343 13
                $startCode = 0x80;
2344
            }
2345
2346 13
            if ($encoding === 'UTF-8') {
2347 13
                return \mb_encode_numericentity(
2348 13
                    $str,
2349 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2350
                );
2351
            }
2352
2353 4
            return \mb_encode_numericentity(
2354 4
                $str,
2355 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2356 4
                $encoding
2357
            );
2358
        }
2359
2360
        //
2361
        // fallback via vanilla php
2362
        //
2363
2364
        return \implode(
2365
            '',
2366
            \array_map(
2367
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2368
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2369
                },
2370
                self::str_split($str)
2371
            )
2372
        );
2373
    }
2374
2375
    /**
2376
     * UTF-8 version of html_entity_decode()
2377
     *
2378
     * The reason we are not using html_entity_decode() by itself is because
2379
     * while it is not technically correct to leave out the semicolon
2380
     * at the end of an entity most browsers will still interpret the entity
2381
     * correctly. html_entity_decode() does not convert entities without
2382
     * semicolons, so we are left with our own little solution here. Bummer.
2383
     *
2384
     * Convert all HTML entities to their applicable characters
2385
     *
2386
     * INFO: opposite to UTF8::html_encode()
2387
     *
2388
     * @see http://php.net/manual/en/function.html-entity-decode.php
2389
     *
2390
     * @param string $str      <p>
2391
     *                         The input string.
2392
     *                         </p>
2393
     * @param int    $flags    [optional] <p>
2394
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2395
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2396
     *                         <table>
2397
     *                         Available <i>flags</i> constants
2398
     *                         <tr valign="top">
2399
     *                         <td>Constant Name</td>
2400
     *                         <td>Description</td>
2401
     *                         </tr>
2402
     *                         <tr valign="top">
2403
     *                         <td><b>ENT_COMPAT</b></td>
2404
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2405
     *                         </tr>
2406
     *                         <tr valign="top">
2407
     *                         <td><b>ENT_QUOTES</b></td>
2408
     *                         <td>Will convert both double and single quotes.</td>
2409
     *                         </tr>
2410
     *                         <tr valign="top">
2411
     *                         <td><b>ENT_NOQUOTES</b></td>
2412
     *                         <td>Will leave both double and single quotes unconverted.</td>
2413
     *                         </tr>
2414
     *                         <tr valign="top">
2415
     *                         <td><b>ENT_HTML401</b></td>
2416
     *                         <td>
2417
     *                         Handle code as HTML 4.01.
2418
     *                         </td>
2419
     *                         </tr>
2420
     *                         <tr valign="top">
2421
     *                         <td><b>ENT_XML1</b></td>
2422
     *                         <td>
2423
     *                         Handle code as XML 1.
2424
     *                         </td>
2425
     *                         </tr>
2426
     *                         <tr valign="top">
2427
     *                         <td><b>ENT_XHTML</b></td>
2428
     *                         <td>
2429
     *                         Handle code as XHTML.
2430
     *                         </td>
2431
     *                         </tr>
2432
     *                         <tr valign="top">
2433
     *                         <td><b>ENT_HTML5</b></td>
2434
     *                         <td>
2435
     *                         Handle code as HTML 5.
2436
     *                         </td>
2437
     *                         </tr>
2438
     *                         </table>
2439
     *                         </p>
2440
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2441
     *
2442
     * @return string the decoded string
2443
     */
2444 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2445
    {
2446
        if (
2447 40
            !isset($str[3]) // examples: &; || &x;
2448
            ||
2449 40
            \strpos($str, '&') === false // no "&"
2450
        ) {
2451 19
            return $str;
2452
        }
2453
2454 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2455 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2456
        }
2457
2458 39
        if ($flags === null) {
2459 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2460
        }
2461
2462
        if (
2463 39
            $encoding !== 'UTF-8'
2464
            &&
2465 39
            $encoding !== 'ISO-8859-1'
2466
            &&
2467 39
            $encoding !== 'WINDOWS-1252'
2468
            &&
2469 39
            self::$SUPPORT['mbstring'] === false
2470
        ) {
2471
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2472
        }
2473
2474
        do {
2475 39
            $str_compare = $str;
2476
2477
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2478 39
            if (self::$SUPPORT['mbstring'] === true) {
2479 39
                if ($encoding === 'UTF-8') {
2480 39
                    $str = \mb_decode_numericentity(
2481 39
                        $str,
2482 39
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2483
                    );
2484
                } else {
2485 4
                    $str = \mb_decode_numericentity(
2486 4
                        $str,
2487 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2488 39
                        $encoding
2489
                    );
2490
                }
2491
            } else {
2492
                $str = (string) \preg_replace_callback(
2493
                    "/&#\d{2,6};/",
2494
                    /**
2495
                     * @param string[] $matches
2496
                     *
2497
                     * @return string
2498
                     */
2499
                    static function (array $matches) use ($encoding): string {
2500
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2501
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2502
                            return $returnTmp;
2503
                        }
2504
2505
                        return $matches[0];
2506
                    },
2507
                    $str
2508
                );
2509
            }
2510
2511 39
            if (\strpos($str, '&') !== false) {
2512 37
                if (\strpos($str, '&#') !== false) {
2513
                    // decode also numeric & UTF16 two byte entities
2514 29
                    $str = (string) \preg_replace(
2515 29
                        '/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS',
2516 29
                        '$1;',
2517 29
                        $str
2518
                    );
2519
                }
2520
2521 37
                $str = \html_entity_decode(
2522 37
                    $str,
2523 37
                    $flags,
2524 37
                    $encoding
2525
                );
2526
            }
2527 39
        } while ($str_compare !== $str);
2528
2529 39
        return $str;
2530
    }
2531
2532
    /**
2533
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2534
     *
2535
     * @param string $str
2536
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2537
     *
2538
     * @return string
2539
     */
2540 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2541
    {
2542 6
        return self::htmlspecialchars(
2543 6
            $str,
2544 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2545 6
            $encoding
2546
        );
2547
    }
2548
2549
    /**
2550
     * Remove empty html-tag.
2551
     *
2552
     * e.g.: <tag></tag>
2553
     *
2554
     * @param string $str
2555
     *
2556
     * @return string
2557
     */
2558 1
    public static function html_stripe_empty_tags(string $str): string
2559
    {
2560 1
        return (string) \preg_replace(
2561 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2562 1
            '',
2563 1
            $str
2564
        );
2565
    }
2566
2567
    /**
2568
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2569
     *
2570
     * @see http://php.net/manual/en/function.htmlentities.php
2571
     *
2572
     * @param string $str           <p>
2573
     *                              The input string.
2574
     *                              </p>
2575
     * @param int    $flags         [optional] <p>
2576
     *                              A bitmask of one or more of the following flags, which specify how to handle
2577
     *                              quotes, invalid code unit sequences and the used document type. The default is
2578
     *                              ENT_COMPAT | ENT_HTML401.
2579
     *                              <table>
2580
     *                              Available <i>flags</i> constants
2581
     *                              <tr valign="top">
2582
     *                              <td>Constant Name</td>
2583
     *                              <td>Description</td>
2584
     *                              </tr>
2585
     *                              <tr valign="top">
2586
     *                              <td><b>ENT_COMPAT</b></td>
2587
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2588
     *                              </tr>
2589
     *                              <tr valign="top">
2590
     *                              <td><b>ENT_QUOTES</b></td>
2591
     *                              <td>Will convert both double and single quotes.</td>
2592
     *                              </tr>
2593
     *                              <tr valign="top">
2594
     *                              <td><b>ENT_NOQUOTES</b></td>
2595
     *                              <td>Will leave both double and single quotes unconverted.</td>
2596
     *                              </tr>
2597
     *                              <tr valign="top">
2598
     *                              <td><b>ENT_IGNORE</b></td>
2599
     *                              <td>
2600
     *                              Silently discard invalid code unit sequences instead of returning
2601
     *                              an empty string. Using this flag is discouraged as it
2602
     *                              may have security implications.
2603
     *                              </td>
2604
     *                              </tr>
2605
     *                              <tr valign="top">
2606
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2607
     *                              <td>
2608
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2609
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2610
     *                              string.
2611
     *                              </td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_DISALLOWED</b></td>
2615
     *                              <td>
2616
     *                              Replace invalid code points for the given document type with a
2617
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2618
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2619
     *                              instance, to ensure the well-formedness of XML documents with
2620
     *                              embedded external content.
2621
     *                              </td>
2622
     *                              </tr>
2623
     *                              <tr valign="top">
2624
     *                              <td><b>ENT_HTML401</b></td>
2625
     *                              <td>
2626
     *                              Handle code as HTML 4.01.
2627
     *                              </td>
2628
     *                              </tr>
2629
     *                              <tr valign="top">
2630
     *                              <td><b>ENT_XML1</b></td>
2631
     *                              <td>
2632
     *                              Handle code as XML 1.
2633
     *                              </td>
2634
     *                              </tr>
2635
     *                              <tr valign="top">
2636
     *                              <td><b>ENT_XHTML</b></td>
2637
     *                              <td>
2638
     *                              Handle code as XHTML.
2639
     *                              </td>
2640
     *                              </tr>
2641
     *                              <tr valign="top">
2642
     *                              <td><b>ENT_HTML5</b></td>
2643
     *                              <td>
2644
     *                              Handle code as HTML 5.
2645
     *                              </td>
2646
     *                              </tr>
2647
     *                              </table>
2648
     *                              </p>
2649
     * @param string $encoding      [optional] <p>
2650
     *                              Like <b>htmlspecialchars</b>,
2651
     *                              <b>htmlentities</b> takes an optional third argument
2652
     *                              <i>encoding</i> which defines encoding used in
2653
     *                              conversion.
2654
     *                              Although this argument is technically optional, you are highly
2655
     *                              encouraged to specify the correct value for your code.
2656
     *                              </p>
2657
     * @param bool   $double_encode [optional] <p>
2658
     *                              When <i>double_encode</i> is turned off PHP will not
2659
     *                              encode existing html entities. The default is to convert everything.
2660
     *                              </p>
2661
     *
2662
     * @return string
2663
     *                <p>
2664
     *                The encoded string.
2665
     *                <br><br>
2666
     *                If the input <i>string</i> contains an invalid code unit
2667
     *                sequence within the given <i>encoding</i> an empty string
2668
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2669
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2670
     *                </p>
2671
     */
2672 9
    public static function htmlentities(
2673
        string $str,
2674
        int $flags = \ENT_COMPAT,
2675
        string $encoding = 'UTF-8',
2676
        bool $double_encode = true
2677
    ): string {
2678 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2679 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2680
        }
2681
2682 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2683
2684
        /**
2685
         * PHP doesn't replace a backslash to its html entity since this is something
2686
         * that's mostly used to escape characters when inserting in a database. Since
2687
         * we're using a decent database layer, we don't need this shit and we're replacing
2688
         * the double backslashes by its' html entity equivalent.
2689
         *
2690
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2691
         */
2692 9
        $str = \str_replace('\\', '&#92;', $str);
2693
2694 9
        return self::html_encode($str, true, $encoding);
2695
    }
2696
2697
    /**
2698
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2699
     *
2700
     * INFO: Take a look at "UTF8::htmlentities()"
2701
     *
2702
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2703
     *
2704
     * @param string $str           <p>
2705
     *                              The string being converted.
2706
     *                              </p>
2707
     * @param int    $flags         [optional] <p>
2708
     *                              A bitmask of one or more of the following flags, which specify how to handle
2709
     *                              quotes, invalid code unit sequences and the used document type. The default is
2710
     *                              ENT_COMPAT | ENT_HTML401.
2711
     *                              <table>
2712
     *                              Available <i>flags</i> constants
2713
     *                              <tr valign="top">
2714
     *                              <td>Constant Name</td>
2715
     *                              <td>Description</td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_COMPAT</b></td>
2719
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2720
     *                              </tr>
2721
     *                              <tr valign="top">
2722
     *                              <td><b>ENT_QUOTES</b></td>
2723
     *                              <td>Will convert both double and single quotes.</td>
2724
     *                              </tr>
2725
     *                              <tr valign="top">
2726
     *                              <td><b>ENT_NOQUOTES</b></td>
2727
     *                              <td>Will leave both double and single quotes unconverted.</td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_IGNORE</b></td>
2731
     *                              <td>
2732
     *                              Silently discard invalid code unit sequences instead of returning
2733
     *                              an empty string. Using this flag is discouraged as it
2734
     *                              may have security implications.
2735
     *                              </td>
2736
     *                              </tr>
2737
     *                              <tr valign="top">
2738
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2739
     *                              <td>
2740
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2741
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2742
     *                              string.
2743
     *                              </td>
2744
     *                              </tr>
2745
     *                              <tr valign="top">
2746
     *                              <td><b>ENT_DISALLOWED</b></td>
2747
     *                              <td>
2748
     *                              Replace invalid code points for the given document type with a
2749
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2750
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2751
     *                              instance, to ensure the well-formedness of XML documents with
2752
     *                              embedded external content.
2753
     *                              </td>
2754
     *                              </tr>
2755
     *                              <tr valign="top">
2756
     *                              <td><b>ENT_HTML401</b></td>
2757
     *                              <td>
2758
     *                              Handle code as HTML 4.01.
2759
     *                              </td>
2760
     *                              </tr>
2761
     *                              <tr valign="top">
2762
     *                              <td><b>ENT_XML1</b></td>
2763
     *                              <td>
2764
     *                              Handle code as XML 1.
2765
     *                              </td>
2766
     *                              </tr>
2767
     *                              <tr valign="top">
2768
     *                              <td><b>ENT_XHTML</b></td>
2769
     *                              <td>
2770
     *                              Handle code as XHTML.
2771
     *                              </td>
2772
     *                              </tr>
2773
     *                              <tr valign="top">
2774
     *                              <td><b>ENT_HTML5</b></td>
2775
     *                              <td>
2776
     *                              Handle code as HTML 5.
2777
     *                              </td>
2778
     *                              </tr>
2779
     *                              </table>
2780
     *                              </p>
2781
     * @param string $encoding      [optional] <p>
2782
     *                              Defines encoding used in conversion.
2783
     *                              </p>
2784
     *                              <p>
2785
     *                              For the purposes of this function, the encodings
2786
     *                              ISO-8859-1, ISO-8859-15,
2787
     *                              UTF-8, cp866,
2788
     *                              cp1251, cp1252, and
2789
     *                              KOI8-R are effectively equivalent, provided the
2790
     *                              <i>string</i> itself is valid for the encoding, as
2791
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2792
     *                              the same positions in all of these encodings.
2793
     *                              </p>
2794
     * @param bool   $double_encode [optional] <p>
2795
     *                              When <i>double_encode</i> is turned off PHP will not
2796
     *                              encode existing html entities, the default is to convert everything.
2797
     *                              </p>
2798
     *
2799
     * @return string the converted string.
2800
     *                </p>
2801
     *                <p>
2802
     *                If the input <i>string</i> contains an invalid code unit
2803
     *                sequence within the given <i>encoding</i> an empty string
2804
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2805
     *                <b>ENT_SUBSTITUTE</b> flags are set
2806
     */
2807 8
    public static function htmlspecialchars(
2808
        string $str,
2809
        int $flags = \ENT_COMPAT,
2810
        string $encoding = 'UTF-8',
2811
        bool $double_encode = true
2812
    ): string {
2813 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2814 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2815
        }
2816
2817 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2818
    }
2819
2820
    /**
2821
     * Checks whether iconv is available on the server.
2822
     *
2823
     * @return bool
2824
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2825
     */
2826
    public static function iconv_loaded(): bool
2827
    {
2828
        return \extension_loaded('iconv');
2829
    }
2830
2831
    /**
2832
     * alias for "UTF8::decimal_to_chr()"
2833
     *
2834
     * @see UTF8::decimal_to_chr()
2835
     *
2836
     * @param mixed $int
2837
     *
2838
     * @return string
2839
     */
2840 4
    public static function int_to_chr($int): string
2841
    {
2842 4
        return self::decimal_to_chr($int);
2843
    }
2844
2845
    /**
2846
     * Converts Integer to hexadecimal U+xxxx code point representation.
2847
     *
2848
     * INFO: opposite to UTF8::hex_to_int()
2849
     *
2850
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2851
     * @param string $pfix [optional]
2852
     *
2853
     * @return string the code point, or empty string on failure
2854
     */
2855 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2856
    {
2857 6
        $hex = \dechex($int);
2858
2859 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2860
2861 6
        return $pfix . $hex . '';
2862
    }
2863
2864
    /**
2865
     * Checks whether intl-char is available on the server.
2866
     *
2867
     * @return bool
2868
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2869
     */
2870
    public static function intlChar_loaded(): bool
2871
    {
2872
        return \class_exists('IntlChar');
2873
    }
2874
2875
    /**
2876
     * Checks whether intl is available on the server.
2877
     *
2878
     * @return bool
2879
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2880
     */
2881 5
    public static function intl_loaded(): bool
2882
    {
2883 5
        return \extension_loaded('intl');
2884
    }
2885
2886
    /**
2887
     * alias for "UTF8::is_ascii()"
2888
     *
2889
     * @see        UTF8::is_ascii()
2890
     *
2891
     * @param string $str
2892
     *
2893
     * @return bool
2894
     *
2895
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2896
     */
2897 2
    public static function isAscii(string $str): bool
2898
    {
2899 2
        return self::is_ascii($str);
2900
    }
2901
2902
    /**
2903
     * alias for "UTF8::is_base64()"
2904
     *
2905
     * @see        UTF8::is_base64()
2906
     *
2907
     * @param string $str
2908
     *
2909
     * @return bool
2910
     *
2911
     * @deprecated <p>use "UTF8::is_base64()"</p>
2912
     */
2913 2
    public static function isBase64($str): bool
2914
    {
2915 2
        return self::is_base64($str);
2916
    }
2917
2918
    /**
2919
     * alias for "UTF8::is_binary()"
2920
     *
2921
     * @see        UTF8::is_binary()
2922
     *
2923
     * @param mixed $str
2924
     * @param bool  $strict
2925
     *
2926
     * @return bool
2927
     *
2928
     * @deprecated <p>use "UTF8::is_binary()"</p>
2929
     */
2930 4
    public static function isBinary($str, $strict = false): bool
2931
    {
2932 4
        return self::is_binary($str, $strict);
2933
    }
2934
2935
    /**
2936
     * alias for "UTF8::is_bom()"
2937
     *
2938
     * @see        UTF8::is_bom()
2939
     *
2940
     * @param string $utf8_chr
2941
     *
2942
     * @return bool
2943
     *
2944
     * @deprecated <p>use "UTF8::is_bom()"</p>
2945
     */
2946 2
    public static function isBom(string $utf8_chr): bool
2947
    {
2948 2
        return self::is_bom($utf8_chr);
2949
    }
2950
2951
    /**
2952
     * alias for "UTF8::is_html()"
2953
     *
2954
     * @see        UTF8::is_html()
2955
     *
2956
     * @param string $str
2957
     *
2958
     * @return bool
2959
     *
2960
     * @deprecated <p>use "UTF8::is_html()"</p>
2961
     */
2962 2
    public static function isHtml(string $str): bool
2963
    {
2964 2
        return self::is_html($str);
2965
    }
2966
2967
    /**
2968
     * alias for "UTF8::is_json()"
2969
     *
2970
     * @see        UTF8::is_json()
2971
     *
2972
     * @param string $str
2973
     *
2974
     * @return bool
2975
     *
2976
     * @deprecated <p>use "UTF8::is_json()"</p>
2977
     */
2978
    public static function isJson(string $str): bool
2979
    {
2980
        return self::is_json($str);
2981
    }
2982
2983
    /**
2984
     * alias for "UTF8::is_utf16()"
2985
     *
2986
     * @see        UTF8::is_utf16()
2987
     *
2988
     * @param mixed $str
2989
     *
2990
     * @return false|int
2991
     *                   <strong>false</strong> if is't not UTF16,<br>
2992
     *                   <strong>1</strong> for UTF-16LE,<br>
2993
     *                   <strong>2</strong> for UTF-16BE
2994
     *
2995
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2996
     */
2997 2
    public static function isUtf16($str)
2998
    {
2999 2
        return self::is_utf16($str);
3000
    }
3001
3002
    /**
3003
     * alias for "UTF8::is_utf32()"
3004
     *
3005
     * @see        UTF8::is_utf32()
3006
     *
3007
     * @param mixed $str
3008
     *
3009
     * @return false|int
3010
     *                   <strong>false</strong> if is't not UTF16,
3011
     *                   <strong>1</strong> for UTF-32LE,
3012
     *                   <strong>2</strong> for UTF-32BE
3013
     *
3014
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3015
     */
3016 2
    public static function isUtf32($str)
3017
    {
3018 2
        return self::is_utf32($str);
3019
    }
3020
3021
    /**
3022
     * alias for "UTF8::is_utf8()"
3023
     *
3024
     * @see        UTF8::is_utf8()
3025
     *
3026
     * @param string $str
3027
     * @param bool   $strict
3028
     *
3029
     * @return bool
3030
     *
3031
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3032
     */
3033 17
    public static function isUtf8($str, $strict = false): bool
3034
    {
3035 17
        return self::is_utf8($str, $strict);
3036
    }
3037
3038
    /**
3039
     * Returns true if the string contains only alphabetic chars, false otherwise.
3040
     *
3041
     * @param string $str
3042
     *
3043
     * @return bool
3044
     *              Whether or not $str contains only alphabetic chars
3045
     */
3046 10
    public static function is_alpha(string $str): bool
3047
    {
3048 10
        if (self::$SUPPORT['mbstring'] === true) {
3049
            /** @noinspection PhpComposerExtensionStubsInspection */
3050 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3051
        }
3052
3053
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3054
    }
3055
3056
    /**
3057
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *              Whether or not $str contains only alphanumeric chars
3063
     */
3064 13
    public static function is_alphanumeric(string $str): bool
3065
    {
3066 13
        if (self::$SUPPORT['mbstring'] === true) {
3067
            /** @noinspection PhpComposerExtensionStubsInspection */
3068 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3069
        }
3070
3071
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3072
    }
3073
3074
    /**
3075
     * Checks if a string is 7 bit ASCII.
3076
     *
3077
     * @param string $str <p>The string to check.</p>
3078
     *
3079
     * @return bool
3080
     *              <strong>true</strong> if it is ASCII<br>
3081
     *              <strong>false</strong> otherwise
3082
     */
3083 137
    public static function is_ascii(string $str): bool
3084
    {
3085 137
        if ($str === '') {
3086 10
            return true;
3087
        }
3088
3089 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3090
    }
3091
3092
    /**
3093
     * Returns true if the string is base64 encoded, false otherwise.
3094
     *
3095
     * @param mixed|string $str                <p>The input string.</p>
3096
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3097
     *
3098
     * @return bool whether or not $str is base64 encoded
3099
     */
3100 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3101
    {
3102 16
        if ($emptyStringIsValid === false && $str === '') {
3103 3
            return false;
3104
        }
3105
3106
        /**
3107
         * @psalm-suppress RedundantConditionGivenDocblockType
3108
         */
3109 15
        if (\is_string($str) === false) {
3110 2
            return false;
3111
        }
3112
3113 15
        $base64String = \base64_decode($str, true);
3114
3115 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3116
    }
3117
3118
    /**
3119
     * Check if the input is binary... (is look like a hack).
3120
     *
3121
     * @param mixed $input
3122
     * @param bool  $strict
3123
     *
3124
     * @return bool
3125
     */
3126 39
    public static function is_binary($input, bool $strict = false): bool
3127
    {
3128 39
        $input = (string) $input;
3129 39
        if ($input === '') {
3130 10
            return false;
3131
        }
3132
3133 39
        if (\preg_match('~^[01]+$~', $input)) {
3134 13
            return true;
3135
        }
3136
3137 39
        $ext = self::get_file_type($input);
3138 39
        if ($ext['type'] === 'binary') {
3139 7
            return true;
3140
        }
3141
3142 36
        $testLength = \strlen($input);
3143 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3144 36
        if (($testNull / $testLength) > 0.25) {
3145 12
            return true;
3146
        }
3147
3148 34
        if ($strict === true) {
3149 34
            if (self::$SUPPORT['finfo'] === false) {
3150
                throw new \RuntimeException('ext-fileinfo: is not installed');
3151
            }
3152
3153
            /** @noinspection PhpComposerExtensionStubsInspection */
3154 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3155 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3156 15
                return true;
3157
            }
3158
        }
3159
3160 30
        return false;
3161
    }
3162
3163
    /**
3164
     * Check if the file is binary.
3165
     *
3166
     * @param string $file
3167
     *
3168
     * @return bool
3169
     */
3170 6
    public static function is_binary_file($file): bool
3171
    {
3172
        // init
3173 6
        $block = '';
3174
3175 6
        $fp = \fopen($file, 'rb');
3176 6
        if (\is_resource($fp)) {
3177 6
            $block = \fread($fp, 512);
3178 6
            \fclose($fp);
3179
        }
3180
3181 6
        if ($block === '') {
3182 2
            return false;
3183
        }
3184
3185 6
        return self::is_binary($block, true);
3186
    }
3187
3188
    /**
3189
     * Returns true if the string contains only whitespace chars, false otherwise.
3190
     *
3191
     * @param string $str
3192
     *
3193
     * @return bool
3194
     *              Whether or not $str contains only whitespace characters
3195
     */
3196 15
    public static function is_blank(string $str): bool
3197
    {
3198 15
        if (self::$SUPPORT['mbstring'] === true) {
3199
            /** @noinspection PhpComposerExtensionStubsInspection */
3200 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3201
        }
3202
3203
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3204
    }
3205
3206
    /**
3207
     * Checks if the given string is equal to any "Byte Order Mark".
3208
     *
3209
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3210
     *
3211
     * @param string $str <p>The input string.</p>
3212
     *
3213
     * @return bool
3214
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3215
     */
3216 2
    public static function is_bom($str): bool
3217
    {
3218
        /** @noinspection PhpUnusedLocalVariableInspection */
3219 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3220 2
            if ($str === $bomString) {
3221 2
                return true;
3222
            }
3223
        }
3224
3225 2
        return false;
3226
    }
3227
3228
    /**
3229
     * Determine whether the string is considered to be empty.
3230
     *
3231
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3232
     * empty() does not generate a warning if the variable does not exist.
3233
     *
3234
     * @param mixed $str
3235
     *
3236
     * @return bool whether or not $str is empty()
3237
     */
3238
    public static function is_empty($str): bool
3239
    {
3240
        return empty($str);
3241
    }
3242
3243
    /**
3244
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3245
     *
3246
     * @param string $str
3247
     *
3248
     * @return bool
3249
     *              Whether or not $str contains only hexadecimal chars
3250
     */
3251 13
    public static function is_hexadecimal(string $str): bool
3252
    {
3253 13
        if (self::$SUPPORT['mbstring'] === true) {
3254
            /** @noinspection PhpComposerExtensionStubsInspection */
3255 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3256
        }
3257
3258
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3259
    }
3260
3261
    /**
3262
     * Check if the string contains any html-tags <lall>.
3263
     *
3264
     * @param string $str <p>The input string.</p>
3265
     *
3266
     * @return bool
3267
     */
3268 3
    public static function is_html(string $str): bool
3269
    {
3270 3
        if ($str === '') {
3271 3
            return false;
3272
        }
3273
3274
        // init
3275 3
        $matches = [];
3276
3277 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3278
3279 3
        return \count($matches) !== 0;
3280
    }
3281
3282
    /**
3283
     * Try to check if "$str" is an json-string.
3284
     *
3285
     * @param string $str                              <p>The input string.</p>
3286
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3287
     *
3288
     * @return bool
3289
     */
3290 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3291
    {
3292 42
        if ($str === '') {
3293 4
            return false;
3294
        }
3295
3296 40
        if (self::$SUPPORT['json'] === false) {
3297
            throw new \RuntimeException('ext-json: is not installed');
3298
        }
3299
3300 40
        $json = self::json_decode($str);
3301 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3302 18
            return false;
3303
        }
3304
3305
        if (
3306 24
            $onlyArrayOrObjectResultsAreValid === true
3307
            &&
3308 24
            \is_object($json) === false
3309
            &&
3310 24
            \is_array($json) === false
3311
        ) {
3312 5
            return false;
3313
        }
3314
3315
        /** @noinspection PhpComposerExtensionStubsInspection */
3316 19
        return \json_last_error() === \JSON_ERROR_NONE;
3317
    }
3318
3319
    /**
3320
     * @param string $str
3321
     *
3322
     * @return bool
3323
     */
3324 8
    public static function is_lowercase(string $str): bool
3325
    {
3326 8
        if (self::$SUPPORT['mbstring'] === true) {
3327
            /** @noinspection PhpComposerExtensionStubsInspection */
3328 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3329
        }
3330
3331
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3332
    }
3333
3334
    /**
3335
     * Returns true if the string is serialized, false otherwise.
3336
     *
3337
     * @param string $str
3338
     *
3339
     * @return bool whether or not $str is serialized
3340
     */
3341 7
    public static function is_serialized(string $str): bool
3342
    {
3343 7
        if ($str === '') {
3344 1
            return false;
3345
        }
3346
3347
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3348
        /** @noinspection UnserializeExploitsInspection */
3349 6
        return $str === 'b:0;'
3350
               ||
3351 6
               @\unserialize($str) !== false;
3352
    }
3353
3354
    /**
3355
     * Returns true if the string contains only lower case chars, false
3356
     * otherwise.
3357
     *
3358
     * @param string $str <p>The input string.</p>
3359
     *
3360
     * @return bool
3361
     *              Whether or not $str contains only lower case characters
3362
     */
3363 8
    public static function is_uppercase(string $str): bool
3364
    {
3365 8
        if (self::$SUPPORT['mbstring'] === true) {
3366
            /** @noinspection PhpComposerExtensionStubsInspection */
3367 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3368
        }
3369
3370
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3371
    }
3372
3373
    /**
3374
     * Check if the string is UTF-16.
3375
     *
3376
     * @param mixed $str                   <p>The input string.</p>
3377
     * @param bool  $checkIfStringIsBinary
3378
     *
3379
     * @return false|int
3380
     *                   <strong>false</strong> if is't not UTF-16,<br>
3381
     *                   <strong>1</strong> for UTF-16LE,<br>
3382
     *                   <strong>2</strong> for UTF-16BE
3383
     */
3384 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3385
    {
3386
        // init
3387 22
        $str = (string) $str;
3388 22
        $strChars = [];
3389
3390
        if (
3391 22
            $checkIfStringIsBinary === true
3392
            &&
3393 22
            self::is_binary($str, true) === false
3394
        ) {
3395 2
            return false;
3396
        }
3397
3398 22
        if (self::$SUPPORT['mbstring'] === false) {
3399 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3400
        }
3401
3402 22
        $str = self::remove_bom($str);
3403
3404 22
        $maybeUTF16LE = 0;
3405 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3406 22
        if ($test) {
3407 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3408 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3409 15
            if ($test3 === $test) {
3410 15
                if (\count($strChars) === 0) {
3411 15
                    $strChars = self::count_chars($str, true, false);
3412
                }
3413 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3414 15
                    if (\in_array($test3char, $strChars, true) === true) {
3415 15
                        ++$maybeUTF16LE;
3416
                    }
3417
                }
3418 15
                unset($test3charEmpty);
3419
            }
3420
        }
3421
3422 22
        $maybeUTF16BE = 0;
3423 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3424 22
        if ($test) {
3425 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3426 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3427 15
            if ($test3 === $test) {
3428 15
                if (\count($strChars) === 0) {
3429 7
                    $strChars = self::count_chars($str, true, false);
3430
                }
3431 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3432 15
                    if (\in_array($test3char, $strChars, true) === true) {
3433 15
                        ++$maybeUTF16BE;
3434
                    }
3435
                }
3436 15
                unset($test3charEmpty);
3437
            }
3438
        }
3439
3440 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3441 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3442 4
                return 1;
3443
            }
3444
3445 6
            return 2;
3446
        }
3447
3448 18
        return false;
3449
    }
3450
3451
    /**
3452
     * Check if the string is UTF-32.
3453
     *
3454
     * @param mixed $str                   <p>The input string.</p>
3455
     * @param bool  $checkIfStringIsBinary
3456
     *
3457
     * @return false|int
3458
     *                   <strong>false</strong> if is't not UTF-32,<br>
3459
     *                   <strong>1</strong> for UTF-32LE,<br>
3460
     *                   <strong>2</strong> for UTF-32BE
3461
     */
3462 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3463
    {
3464
        // init
3465 18
        $str = (string) $str;
3466 18
        $strChars = [];
3467
3468
        if (
3469 18
            $checkIfStringIsBinary === true
3470
            &&
3471 18
            self::is_binary($str, true) === false
3472
        ) {
3473 2
            return false;
3474
        }
3475
3476 18
        if (self::$SUPPORT['mbstring'] === false) {
3477 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3478
        }
3479
3480 18
        $str = self::remove_bom($str);
3481
3482 18
        $maybeUTF32LE = 0;
3483 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3484 18
        if ($test) {
3485 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3486 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3487 11
            if ($test3 === $test) {
3488 11
                if (\count($strChars) === 0) {
3489 11
                    $strChars = self::count_chars($str, true, false);
3490
                }
3491 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3492 11
                    if (\in_array($test3char, $strChars, true) === true) {
3493 11
                        ++$maybeUTF32LE;
3494
                    }
3495
                }
3496 11
                unset($test3charEmpty);
3497
            }
3498
        }
3499
3500 18
        $maybeUTF32BE = 0;
3501 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3502 18
        if ($test) {
3503 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3504 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3505 11
            if ($test3 === $test) {
3506 11
                if (\count($strChars) === 0) {
3507 7
                    $strChars = self::count_chars($str, true, false);
3508
                }
3509 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3510 11
                    if (\in_array($test3char, $strChars, true) === true) {
3511 11
                        ++$maybeUTF32BE;
3512
                    }
3513
                }
3514 11
                unset($test3charEmpty);
3515
            }
3516
        }
3517
3518 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3519 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3520 2
                return 1;
3521
            }
3522
3523 2
            return 2;
3524
        }
3525
3526 18
        return false;
3527
    }
3528
3529
    /**
3530
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3531
     *
3532
     * @see    http://hsivonen.iki.fi/php-utf8/
3533
     *
3534
     * @param string|string[] $str    <p>The string to be checked.</p>
3535
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3536
     *
3537
     * @return bool
3538
     */
3539 106
    public static function is_utf8($str, bool $strict = false): bool
3540
    {
3541 106
        if (\is_array($str) === true) {
3542 2
            foreach ($str as &$v) {
3543 2
                if (self::is_utf8($v, $strict) === false) {
3544 2
                    return false;
3545
                }
3546
            }
3547
3548
            return true;
3549
        }
3550
3551 106
        if ($str === '') {
3552 12
            return true;
3553
        }
3554
3555 102
        if ($strict === true) {
3556 2
            $isBinary = self::is_binary($str, true);
3557
3558 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3559 2
                return false;
3560
            }
3561
3562
            if ($isBinary && self::is_utf32($str, false) !== false) {
3563
                return false;
3564
            }
3565
        }
3566
3567 102
        if (self::pcre_utf8_support() !== true) {
3568
3569
            // If even just the first character can be matched, when the /u
3570
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3571
            // invalid, nothing at all will match, even if the string contains
3572
            // some valid sequences
3573
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3574
        }
3575
3576 102
        $mState = 0; // cached expected number of octets after the current octet
3577
        // until the beginning of the next UTF8 character sequence
3578 102
        $mUcs4 = 0; // cached Unicode character
3579 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3580
3581 102
        if (self::$ORD === null) {
3582
            self::$ORD = self::getData('ord');
3583
        }
3584
3585 102
        $len = \strlen((string) $str);
3586
        /** @noinspection ForeachInvariantsInspection */
3587 102
        for ($i = 0; $i < $len; ++$i) {
3588 102
            $in = self::$ORD[$str[$i]];
3589 102
            if ($mState === 0) {
3590
                // When mState is zero we expect either a US-ASCII character or a
3591
                // multi-octet sequence.
3592 102
                if ((0x80 & $in) === 0) {
3593
                    // US-ASCII, pass straight through.
3594 97
                    $mBytes = 1;
3595 83
                } elseif ((0xE0 & $in) === 0xC0) {
3596
                    // First octet of 2 octet sequence.
3597 73
                    $mUcs4 = $in;
3598 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3599 73
                    $mState = 1;
3600 73
                    $mBytes = 2;
3601 58
                } elseif ((0xF0 & $in) === 0xE0) {
3602
                    // First octet of 3 octet sequence.
3603 42
                    $mUcs4 = $in;
3604 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3605 42
                    $mState = 2;
3606 42
                    $mBytes = 3;
3607 29
                } elseif ((0xF8 & $in) === 0xF0) {
3608
                    // First octet of 4 octet sequence.
3609 18
                    $mUcs4 = $in;
3610 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3611 18
                    $mState = 3;
3612 18
                    $mBytes = 4;
3613 13
                } elseif ((0xFC & $in) === 0xF8) {
3614
                    /* First octet of 5 octet sequence.
3615
                     *
3616
                     * This is illegal because the encoded codepoint must be either
3617
                     * (a) not the shortest form or
3618
                     * (b) outside the Unicode range of 0-0x10FFFF.
3619
                     * Rather than trying to resynchronize, we will carry on until the end
3620
                     * of the sequence and let the later error handling code catch it.
3621
                     */
3622 5
                    $mUcs4 = $in;
3623 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3624 5
                    $mState = 4;
3625 5
                    $mBytes = 5;
3626 10
                } elseif ((0xFE & $in) === 0xFC) {
3627
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3628 5
                    $mUcs4 = $in;
3629 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3630 5
                    $mState = 5;
3631 5
                    $mBytes = 6;
3632
                } else {
3633
                    // Current octet is neither in the US-ASCII range nor a legal first
3634
                    // octet of a multi-octet sequence.
3635 102
                    return false;
3636
                }
3637 83
            } elseif ((0xC0 & $in) === 0x80) {
3638
3639
                // When mState is non-zero, we expect a continuation of the multi-octet
3640
                // sequence
3641
3642
                // Legal continuation.
3643 75
                $shift = ($mState - 1) * 6;
3644 75
                $tmp = $in;
3645 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3646 75
                $mUcs4 |= $tmp;
3647
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3648
                // Unicode code point to be output.
3649 75
                if (--$mState === 0) {
3650
                    // Check for illegal sequences and code points.
3651
                    //
3652
                    // From Unicode 3.1, non-shortest form is illegal
3653
                    if (
3654 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3655
                        ||
3656 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3657
                        ||
3658 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3659
                        ||
3660 75
                        ($mBytes > 4)
3661
                        ||
3662
                        // From Unicode 3.2, surrogate characters are illegal.
3663 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3664
                        ||
3665
                        // Code points outside the Unicode range are illegal.
3666 75
                        ($mUcs4 > 0x10FFFF)
3667
                    ) {
3668 8
                        return false;
3669
                    }
3670
                    // initialize UTF8 cache
3671 75
                    $mState = 0;
3672 75
                    $mUcs4 = 0;
3673 75
                    $mBytes = 1;
3674
                }
3675
            } else {
3676
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3677
                // Incomplete multi-octet sequence.
3678 35
                return false;
3679
            }
3680
        }
3681
3682 67
        return true;
3683
    }
3684
3685
    /**
3686
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3687
     * Decodes a JSON string
3688
     *
3689
     * @see http://php.net/manual/en/function.json-decode.php
3690
     *
3691
     * @param string $json    <p>
3692
     *                        The <i>json</i> string being decoded.
3693
     *                        </p>
3694
     *                        <p>
3695
     *                        This function only works with UTF-8 encoded strings.
3696
     *                        </p>
3697
     *                        <p>PHP implements a superset of
3698
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3699
     *                        only supports these values when they are nested inside an array or an object.
3700
     *                        </p>
3701
     * @param bool   $assoc   [optional] <p>
3702
     *                        When <b>TRUE</b>, returned objects will be converted into
3703
     *                        associative arrays.
3704
     *                        </p>
3705
     * @param int    $depth   [optional] <p>
3706
     *                        User specified recursion depth.
3707
     *                        </p>
3708
     * @param int    $options [optional] <p>
3709
     *                        Bitmask of JSON decode options. Currently only
3710
     *                        <b>JSON_BIGINT_AS_STRING</b>
3711
     *                        is supported (default is to cast large integers as floats)
3712
     *                        </p>
3713
     *
3714
     * @return mixed
3715
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3716
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3717
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3718
     *               is deeper than the recursion limit.
3719
     */
3720 43
    public static function json_decode(
3721
        string $json,
3722
        bool $assoc = false,
3723
        int $depth = 512,
3724
        int $options = 0
3725
    ) {
3726 43
        $json = self::filter($json);
3727
3728 43
        if (self::$SUPPORT['json'] === false) {
3729
            throw new \RuntimeException('ext-json: is not installed');
3730
        }
3731
3732
        /** @noinspection PhpComposerExtensionStubsInspection */
3733 43
        return \json_decode($json, $assoc, $depth, $options);
3734
    }
3735
3736
    /**
3737
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3738
     * Returns the JSON representation of a value.
3739
     *
3740
     * @see http://php.net/manual/en/function.json-encode.php
3741
     *
3742
     * @param mixed $value   <p>
3743
     *                       The <i>value</i> being encoded. Can be any type except
3744
     *                       a resource.
3745
     *                       </p>
3746
     *                       <p>
3747
     *                       All string data must be UTF-8 encoded.
3748
     *                       </p>
3749
     *                       <p>PHP implements a superset of
3750
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3751
     *                       only supports these values when they are nested inside an array or an object.
3752
     *                       </p>
3753
     * @param int   $options [optional] <p>
3754
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3755
     *                       <b>JSON_HEX_TAG</b>,
3756
     *                       <b>JSON_HEX_AMP</b>,
3757
     *                       <b>JSON_HEX_APOS</b>,
3758
     *                       <b>JSON_NUMERIC_CHECK</b>,
3759
     *                       <b>JSON_PRETTY_PRINT</b>,
3760
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3761
     *                       <b>JSON_FORCE_OBJECT</b>,
3762
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3763
     *                       constants is described on
3764
     *                       the JSON constants page.
3765
     *                       </p>
3766
     * @param int   $depth   [optional] <p>
3767
     *                       Set the maximum depth. Must be greater than zero.
3768
     *                       </p>
3769
     *
3770
     * @return false|string
3771
     *                      A JSON encoded <strong>string</strong> on success or<br>
3772
     *                      <strong>FALSE</strong> on failure
3773
     */
3774 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3775
    {
3776 5
        $value = self::filter($value);
3777
3778 5
        if (self::$SUPPORT['json'] === false) {
3779
            throw new \RuntimeException('ext-json: is not installed');
3780
        }
3781
3782
        /** @noinspection PhpComposerExtensionStubsInspection */
3783 5
        return \json_encode($value, $options, $depth);
3784
    }
3785
3786
    /**
3787
     * Checks whether JSON is available on the server.
3788
     *
3789
     * @return bool
3790
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3791
     */
3792
    public static function json_loaded(): bool
3793
    {
3794
        return \function_exists('json_decode');
3795
    }
3796
3797
    /**
3798
     * Makes string's first char lowercase.
3799
     *
3800
     * @param string      $str                   <p>The input string</p>
3801
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3802
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3803
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3804
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3805
     *
3806
     * @return string the resulting string
3807
     */
3808 46
    public static function lcfirst(
3809
        string $str,
3810
        string $encoding = 'UTF-8',
3811
        bool $cleanUtf8 = false,
3812
        string $lang = null,
3813
        bool $tryToKeepStringLength = false
3814
    ): string {
3815 46
        if ($cleanUtf8 === true) {
3816
            $str = self::clean($str);
3817
        }
3818
3819 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3820
3821 46
        if ($encoding === 'UTF-8') {
3822 43
            $strPartTwo = (string) \mb_substr($str, 1);
3823
3824 43
            if ($useMbFunction === true) {
3825 43
                $strPartOne = \mb_strtolower(
3826 43
                    (string) \mb_substr($str, 0, 1)
3827
                );
3828
            } else {
3829
                $strPartOne = self::strtolower(
3830
                    (string) \mb_substr($str, 0, 1),
3831
                    $encoding,
3832
                    false,
3833
                    $lang,
3834 43
                    $tryToKeepStringLength
3835
                );
3836
            }
3837
        } else {
3838 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3839
3840 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3841
3842 3
            $strPartOne = self::strtolower(
3843 3
                (string) self::substr($str, 0, 1, $encoding),
3844 3
                $encoding,
3845 3
                false,
3846 3
                $lang,
3847 3
                $tryToKeepStringLength
3848
            );
3849
        }
3850
3851 46
        return $strPartOne . $strPartTwo;
3852
    }
3853
3854
    /**
3855
     * alias for "UTF8::lcfirst()"
3856
     *
3857
     * @see UTF8::lcfirst()
3858
     *
3859
     * @param string      $str
3860
     * @param string      $encoding
3861
     * @param bool        $cleanUtf8
3862
     * @param string|null $lang
3863
     * @param bool        $tryToKeepStringLength
3864
     *
3865
     * @return string
3866
     */
3867 2
    public static function lcword(
3868
        string $str,
3869
        string $encoding = 'UTF-8',
3870
        bool $cleanUtf8 = false,
3871
        string $lang = null,
3872
        bool $tryToKeepStringLength = false
3873
    ): string {
3874 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3875
    }
3876
3877
    /**
3878
     * Lowercase for all words in the string.
3879
     *
3880
     * @param string      $str                   <p>The input string.</p>
3881
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3882
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3883
     *                                           a new word.</p>
3884
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3885
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3886
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3887
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3888
     *
3889
     * @return string
3890
     */
3891 2
    public static function lcwords(
3892
        string $str,
3893
        array $exceptions = [],
3894
        string $charlist = '',
3895
        string $encoding = 'UTF-8',
3896
        bool $cleanUtf8 = false,
3897
        string $lang = null,
3898
        bool $tryToKeepStringLength = false
3899
    ): string {
3900 2
        if (!$str) {
3901 2
            return '';
3902
        }
3903
3904 2
        $words = self::str_to_words($str, $charlist);
3905 2
        $useExceptions = \count($exceptions) > 0;
3906
3907 2
        foreach ($words as &$word) {
3908 2
            if (!$word) {
3909 2
                continue;
3910
            }
3911
3912
            if (
3913 2
                $useExceptions === false
3914
                ||
3915 2
                !\in_array($word, $exceptions, true)
3916
            ) {
3917 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3918
            }
3919
        }
3920
3921 2
        return \implode('', $words);
3922
    }
3923
3924
    /**
3925
     * alias for "UTF8::lcfirst()"
3926
     *
3927
     * @see UTF8::lcfirst()
3928
     *
3929
     * @param string      $str
3930
     * @param string      $encoding
3931
     * @param bool        $cleanUtf8
3932
     * @param string|null $lang
3933
     * @param bool        $tryToKeepStringLength
3934
     *
3935
     * @return string
3936
     */
3937 5
    public static function lowerCaseFirst(
3938
        string $str,
3939
        string $encoding = 'UTF-8',
3940
        bool $cleanUtf8 = false,
3941
        string $lang = null,
3942
        bool $tryToKeepStringLength = false
3943
    ): string {
3944 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3945
    }
3946
3947
    /**
3948
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3949
     *
3950
     * @param string      $str   <p>The string to be trimmed</p>
3951
     * @param string|null $chars <p>Optional characters to be stripped</p>
3952
     *
3953
     * @return string the string with unwanted characters stripped from the left
3954
     */
3955 22
    public static function ltrim(string $str = '', string $chars = null): string
3956
    {
3957 22
        if ($str === '') {
3958 3
            return '';
3959
        }
3960
3961 21
        if ($chars) {
3962 10
            $chars = \preg_quote($chars, '/');
3963 10
            $pattern = "^[${chars}]+";
3964
        } else {
3965 14
            $pattern = "^[\s]+";
3966
        }
3967
3968 21
        if (self::$SUPPORT['mbstring'] === true) {
3969
            /** @noinspection PhpComposerExtensionStubsInspection */
3970 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3971
        }
3972
3973
        return self::regex_replace($str, $pattern, '', '', '/');
3974
    }
3975
3976
    /**
3977
     * Returns the UTF-8 character with the maximum code point in the given data.
3978
     *
3979
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3980
     *
3981
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3982
     */
3983 2
    public static function max($arg)
3984
    {
3985 2
        if (\is_array($arg) === true) {
3986 2
            $arg = \implode('', $arg);
3987
        }
3988
3989 2
        $codepoints = self::codepoints($arg, false);
3990 2
        if (\count($codepoints) === 0) {
3991 2
            return null;
3992
        }
3993
3994 2
        $codepoint_max = \max($codepoints);
3995
3996 2
        return self::chr($codepoint_max);
3997
    }
3998
3999
    /**
4000
     * Calculates and returns the maximum number of bytes taken by any
4001
     * UTF-8 encoded character in the given string.
4002
     *
4003
     * @param string $str <p>The original Unicode string.</p>
4004
     *
4005
     * @return int max byte lengths of the given chars
4006
     */
4007 2
    public static function max_chr_width(string $str): int
4008
    {
4009 2
        $bytes = self::chr_size_list($str);
4010 2
        if (\count($bytes) > 0) {
4011 2
            return (int) \max($bytes);
4012
        }
4013
4014 2
        return 0;
4015
    }
4016
4017
    /**
4018
     * Checks whether mbstring is available on the server.
4019
     *
4020
     * @return bool
4021
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4022
     */
4023 27
    public static function mbstring_loaded(): bool
4024
    {
4025 27
        return \extension_loaded('mbstring');
4026
    }
4027
4028
    /**
4029
     * Returns the UTF-8 character with the minimum code point in the given data.
4030
     *
4031
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4032
     *
4033
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4034
     */
4035 2
    public static function min($arg)
4036
    {
4037 2
        if (\is_array($arg) === true) {
4038 2
            $arg = \implode('', $arg);
4039
        }
4040
4041 2
        $codepoints = self::codepoints($arg, false);
4042 2
        if (\count($codepoints) === 0) {
4043 2
            return null;
4044
        }
4045
4046 2
        $codepoint_min = \min($codepoints);
4047
4048 2
        return self::chr($codepoint_min);
4049
    }
4050
4051
    /**
4052
     * alias for "UTF8::normalize_encoding()"
4053
     *
4054
     * @see        UTF8::normalize_encoding()
4055
     *
4056
     * @param mixed $encoding
4057
     * @param mixed $fallback
4058
     *
4059
     * @return mixed
4060
     *
4061
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4062
     */
4063 2
    public static function normalizeEncoding($encoding, $fallback = '')
4064
    {
4065 2
        return self::normalize_encoding($encoding, $fallback);
4066
    }
4067
4068
    /**
4069
     * Normalize the encoding-"name" input.
4070
     *
4071
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4072
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4073
     *
4074
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4075
     */
4076 323
    public static function normalize_encoding($encoding, $fallback = '')
4077
    {
4078 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4079
4080
        // init
4081 323
        $encoding = (string) $encoding;
4082
4083 323
        if (!$encoding) {
4084 278
            return $fallback;
4085
        }
4086
4087
        if (
4088 50
            $encoding === 'UTF-8'
4089
            ||
4090 50
            $encoding === 'UTF8'
4091
        ) {
4092 24
            return 'UTF-8';
4093
        }
4094
4095
        if (
4096 43
            $encoding === '8BIT'
4097
            ||
4098 43
            $encoding === 'BINARY'
4099
        ) {
4100
            return 'CP850';
4101
        }
4102
4103
        if (
4104 43
            $encoding === 'HTML'
4105
            ||
4106 43
            $encoding === 'HTML-ENTITIES'
4107
        ) {
4108 2
            return 'HTML-ENTITIES';
4109
        }
4110
4111
        if (
4112 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4113
            ||
4114 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4115
        ) {
4116 1
            return $fallback;
4117
        }
4118
4119 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4120 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4121
        }
4122
4123 6
        if (self::$ENCODINGS === null) {
4124 1
            self::$ENCODINGS = self::getData('encodings');
4125
        }
4126
4127 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4128 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4129
4130 4
            return $encoding;
4131
        }
4132
4133 5
        $encodingOrig = $encoding;
4134 5
        $encoding = \strtoupper($encoding);
4135 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4136
4137
        $equivalences = [
4138 5
            'ISO8859'     => 'ISO-8859-1',
4139
            'ISO88591'    => 'ISO-8859-1',
4140
            'ISO'         => 'ISO-8859-1',
4141
            'LATIN'       => 'ISO-8859-1',
4142
            'LATIN1'      => 'ISO-8859-1', // Western European
4143
            'ISO88592'    => 'ISO-8859-2',
4144
            'LATIN2'      => 'ISO-8859-2', // Central European
4145
            'ISO88593'    => 'ISO-8859-3',
4146
            'LATIN3'      => 'ISO-8859-3', // Southern European
4147
            'ISO88594'    => 'ISO-8859-4',
4148
            'LATIN4'      => 'ISO-8859-4', // Northern European
4149
            'ISO88595'    => 'ISO-8859-5',
4150
            'ISO88596'    => 'ISO-8859-6', // Greek
4151
            'ISO88597'    => 'ISO-8859-7',
4152
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4153
            'ISO88599'    => 'ISO-8859-9',
4154
            'LATIN5'      => 'ISO-8859-9', // Turkish
4155
            'ISO885911'   => 'ISO-8859-11',
4156
            'TIS620'      => 'ISO-8859-11', // Thai
4157
            'ISO885910'   => 'ISO-8859-10',
4158
            'LATIN6'      => 'ISO-8859-10', // Nordic
4159
            'ISO885913'   => 'ISO-8859-13',
4160
            'LATIN7'      => 'ISO-8859-13', // Baltic
4161
            'ISO885914'   => 'ISO-8859-14',
4162
            'LATIN8'      => 'ISO-8859-14', // Celtic
4163
            'ISO885915'   => 'ISO-8859-15',
4164
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4165
            'ISO885916'   => 'ISO-8859-16',
4166
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4167
            'CP1250'      => 'WINDOWS-1250',
4168
            'WIN1250'     => 'WINDOWS-1250',
4169
            'WINDOWS1250' => 'WINDOWS-1250',
4170
            'CP1251'      => 'WINDOWS-1251',
4171
            'WIN1251'     => 'WINDOWS-1251',
4172
            'WINDOWS1251' => 'WINDOWS-1251',
4173
            'CP1252'      => 'WINDOWS-1252',
4174
            'WIN1252'     => 'WINDOWS-1252',
4175
            'WINDOWS1252' => 'WINDOWS-1252',
4176
            'CP1253'      => 'WINDOWS-1253',
4177
            'WIN1253'     => 'WINDOWS-1253',
4178
            'WINDOWS1253' => 'WINDOWS-1253',
4179
            'CP1254'      => 'WINDOWS-1254',
4180
            'WIN1254'     => 'WINDOWS-1254',
4181
            'WINDOWS1254' => 'WINDOWS-1254',
4182
            'CP1255'      => 'WINDOWS-1255',
4183
            'WIN1255'     => 'WINDOWS-1255',
4184
            'WINDOWS1255' => 'WINDOWS-1255',
4185
            'CP1256'      => 'WINDOWS-1256',
4186
            'WIN1256'     => 'WINDOWS-1256',
4187
            'WINDOWS1256' => 'WINDOWS-1256',
4188
            'CP1257'      => 'WINDOWS-1257',
4189
            'WIN1257'     => 'WINDOWS-1257',
4190
            'WINDOWS1257' => 'WINDOWS-1257',
4191
            'CP1258'      => 'WINDOWS-1258',
4192
            'WIN1258'     => 'WINDOWS-1258',
4193
            'WINDOWS1258' => 'WINDOWS-1258',
4194
            'UTF16'       => 'UTF-16',
4195
            'UTF32'       => 'UTF-32',
4196
            'UTF8'        => 'UTF-8',
4197
            'UTF'         => 'UTF-8',
4198
            'UTF7'        => 'UTF-7',
4199
            '8BIT'        => 'CP850',
4200
            'BINARY'      => 'CP850',
4201
        ];
4202
4203 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4204 4
            $encoding = $equivalences[$encodingUpperHelper];
4205
        }
4206
4207 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4208
4209 5
        return $encoding;
4210
    }
4211
4212
    /**
4213
     * Standardize line ending to unix-like.
4214
     *
4215
     * @param string $str
4216
     *
4217
     * @return string
4218
     */
4219 5
    public static function normalize_line_ending(string $str): string
4220
    {
4221 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4222
    }
4223
4224
    /**
4225
     * Normalize some MS Word special characters.
4226
     *
4227
     * @param string $str <p>The string to be normalized.</p>
4228
     *
4229
     * @return string
4230
     */
4231 38
    public static function normalize_msword(string $str): string
4232
    {
4233 38
        if ($str === '') {
4234 2
            return '';
4235
        }
4236
4237
        $keys = [
4238 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4239
            "\xc2\xbb", // » (U+00BB) in UTF-8
4240
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4241
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4242
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4243
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4244
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4245
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4246
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4247
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4248
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4249
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4250
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4251
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4252
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4253
        ];
4254
4255
        $values = [
4256 38
            '"', // « (U+00AB) in UTF-8
4257
            '"', // » (U+00BB) in UTF-8
4258
            "'", // ‘ (U+2018) in UTF-8
4259
            "'", // ’ (U+2019) in UTF-8
4260
            "'", // ‚ (U+201A) in UTF-8
4261
            "'", // ‛ (U+201B) in UTF-8
4262
            '"', // “ (U+201C) in UTF-8
4263
            '"', // ” (U+201D) in UTF-8
4264
            '"', // „ (U+201E) in UTF-8
4265
            '"', // ‟ (U+201F) in UTF-8
4266
            "'", // ‹ (U+2039) in UTF-8
4267
            "'", // › (U+203A) in UTF-8
4268
            '-', // – (U+2013) in UTF-8
4269
            '-', // — (U+2014) in UTF-8
4270
            '...', // … (U+2026) in UTF-8
4271
        ];
4272
4273 38
        return \str_replace($keys, $values, $str);
4274
    }
4275
4276
    /**
4277
     * Normalize the whitespace.
4278
     *
4279
     * @param string $str                     <p>The string to be normalized.</p>
4280
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4281
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4282
     *                                        bidirectional text chars.</p>
4283
     *
4284
     * @return string
4285
     */
4286 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4287
    {
4288 86
        if ($str === '') {
4289 9
            return '';
4290
        }
4291
4292 86
        static $WHITESPACE_CACHE = [];
4293 86
        $cacheKey = (int) $keepNonBreakingSpace;
4294
4295 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4296 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4297
4298 2
            if ($keepNonBreakingSpace === true) {
4299 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4300
            }
4301
4302 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4303
        }
4304
4305 86
        if ($keepBidiUnicodeControls === false) {
4306 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4307
4308 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4309 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4310
            }
4311
4312 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4313
        }
4314
4315 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4316
    }
4317
4318
    /**
4319
     * Calculates Unicode code point of the given UTF-8 encoded character.
4320
     *
4321
     * INFO: opposite to UTF8::chr()
4322
     *
4323
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4324
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4325
     *
4326
     * @return int
4327
     *             Unicode code point of the given character,<br>
4328
     *             0 on invalid UTF-8 byte sequence
4329
     */
4330 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4331
    {
4332 30
        static $CHAR_CACHE = [];
4333
4334
        // init
4335 30
        $chr = (string) $chr;
4336
4337 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4338 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4339
        }
4340
4341 30
        $cacheKey = $chr . $encoding;
4342 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4343 30
            return $CHAR_CACHE[$cacheKey];
4344
        }
4345
4346
        // check again, if it's still not UTF-8
4347 12
        if ($encoding !== 'UTF-8') {
4348 3
            $chr = self::encode($encoding, $chr);
4349
        }
4350
4351 12
        if (self::$ORD === null) {
4352
            self::$ORD = self::getData('ord');
4353
        }
4354
4355 12
        if (isset(self::$ORD[$chr])) {
4356 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4357
        }
4358
4359
        //
4360
        // fallback via "IntlChar"
4361
        //
4362
4363 6
        if (self::$SUPPORT['intlChar'] === true) {
4364
            /** @noinspection PhpComposerExtensionStubsInspection */
4365 5
            $code = \IntlChar::ord($chr);
4366 5
            if ($code) {
4367 5
                return $CHAR_CACHE[$cacheKey] = $code;
4368
            }
4369
        }
4370
4371
        //
4372
        // fallback via vanilla php
4373
        //
4374
4375
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4376 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4377 1
        $code = $chr ? $chr[1] : 0;
4378
4379 1
        if ($code >= 0xF0 && isset($chr[4])) {
4380
            /** @noinspection UnnecessaryCastingInspection */
4381
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4382
        }
4383
4384 1
        if ($code >= 0xE0 && isset($chr[3])) {
4385
            /** @noinspection UnnecessaryCastingInspection */
4386 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4387
        }
4388
4389 1
        if ($code >= 0xC0 && isset($chr[2])) {
4390
            /** @noinspection UnnecessaryCastingInspection */
4391 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4392
        }
4393
4394
        return $CHAR_CACHE[$cacheKey] = $code;
4395
    }
4396
4397
    /**
4398
     * Parses the string into an array (into the the second parameter).
4399
     *
4400
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4401
     *          if the second parameter is not set!
4402
     *
4403
     * @see http://php.net/manual/en/function.parse-str.php
4404
     *
4405
     * @param string $str       <p>The input string.</p>
4406
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4407
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4408
     *
4409
     * @return bool
4410
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4411
     */
4412 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4413
    {
4414 2
        if ($cleanUtf8 === true) {
4415 2
            $str = self::clean($str);
4416
        }
4417
4418 2
        if (self::$SUPPORT['mbstring'] === true) {
4419 2
            $return = \mb_parse_str($str, $result);
4420
4421 2
            return $return !== false && $result !== [];
4422
        }
4423
4424
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4425
        \parse_str($str, $result);
4426
4427
        return $result !== [];
4428
    }
4429
4430
    /**
4431
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4432
     *
4433
     * @return bool
4434
     *              <strong>true</strong> if support is available,<br>
4435
     *              <strong>false</strong> otherwise
4436
     */
4437 102
    public static function pcre_utf8_support(): bool
4438
    {
4439
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4440 102
        return (bool) @\preg_match('//u', '');
4441
    }
4442
4443
    /**
4444
     * Create an array containing a range of UTF-8 characters.
4445
     *
4446
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4447
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4448
     *
4449
     * @return string[]
4450
     */
4451 2
    public static function range($var1, $var2): array
4452
    {
4453 2
        if (!$var1 || !$var2) {
4454 2
            return [];
4455
        }
4456
4457 2
        if (self::$SUPPORT['ctype'] === false) {
4458
            throw new \RuntimeException('ext-ctype: is not installed');
4459
        }
4460
4461
        /** @noinspection PhpComposerExtensionStubsInspection */
4462 2
        if (\ctype_digit((string) $var1)) {
4463 2
            $start = (int) $var1;
4464 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4465
            $start = (int) self::hex_to_int($var1);
4466
        } else {
4467 2
            $start = self::ord($var1);
4468
        }
4469
4470 2
        if (!$start) {
4471
            return [];
4472
        }
4473
4474
        /** @noinspection PhpComposerExtensionStubsInspection */
4475 2
        if (\ctype_digit((string) $var2)) {
4476 2
            $end = (int) $var2;
4477 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4478
            $end = (int) self::hex_to_int($var2);
4479
        } else {
4480 2
            $end = self::ord($var2);
4481
        }
4482
4483 2
        if (!$end) {
4484
            return [];
4485
        }
4486
4487 2
        return \array_map(
4488
            static function (int $i): string {
4489 2
                return (string) self::chr($i);
4490 2
            },
4491 2
            \range($start, $end)
4492
        );
4493
    }
4494
4495
    /**
4496
     * Multi decode html entity & fix urlencoded-win1252-chars.
4497
     *
4498
     * e.g:
4499
     * 'test+test'                     => 'test+test'
4500
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4501
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4502
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4503
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4504
     * 'Düsseldorf'                   => 'Düsseldorf'
4505
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4506
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4507
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4508
     *
4509
     * @param string $str          <p>The input string.</p>
4510
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4511
     *
4512
     * @return string
4513
     */
4514 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4515
    {
4516 3
        if ($str === '') {
4517 2
            return '';
4518
        }
4519
4520 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4521 3
        if (\preg_match($pattern, $str)) {
4522 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4523
        }
4524
4525 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4526
4527 3
        if ($multi_decode === true) {
4528
            do {
4529 3
                $str_compare = $str;
4530
4531
                /**
4532
                 * @psalm-suppress PossiblyInvalidArgument
4533
                 */
4534 3
                $str = self::fix_simple_utf8(
4535 3
                    \rawurldecode(
4536 3
                        self::html_entity_decode(
4537 3
                            self::to_utf8($str),
4538 3
                            $flags
4539
                        )
4540
                    )
4541
                );
4542 3
            } while ($str_compare !== $str);
4543
        }
4544
4545 3
        return $str;
4546
    }
4547
4548
    /**
4549
     * Replaces all occurrences of $pattern in $str by $replacement.
4550
     *
4551
     * @param string $str         <p>The input string.</p>
4552
     * @param string $pattern     <p>The regular expression pattern.</p>
4553
     * @param string $replacement <p>The string to replace with.</p>
4554
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4555
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4556
     *
4557
     * @return string
4558
     */
4559 18
    public static function regex_replace(
4560
        string $str,
4561
        string $pattern,
4562
        string $replacement,
4563
        string $options = '',
4564
        string $delimiter = '/'
4565
    ): string {
4566 18
        if ($options === 'msr') {
4567 9
            $options = 'ms';
4568
        }
4569
4570
        // fallback
4571 18
        if (!$delimiter) {
4572
            $delimiter = '/';
4573
        }
4574
4575 18
        return (string) \preg_replace(
4576 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4577 18
            $replacement,
4578 18
            $str
4579
        );
4580
    }
4581
4582
    /**
4583
     * alias for "UTF8::remove_bom()"
4584
     *
4585
     * @see        UTF8::remove_bom()
4586
     *
4587
     * @param string $str
4588
     *
4589
     * @return string
4590
     *
4591
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4592
     */
4593
    public static function removeBOM(string $str): string
4594
    {
4595
        return self::remove_bom($str);
4596
    }
4597
4598
    /**
4599
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4600
     *
4601
     * @param string $str <p>The input string.</p>
4602
     *
4603
     * @return string string without UTF-BOM
4604
     */
4605 80
    public static function remove_bom(string $str): string
4606
    {
4607 80
        if ($str === '') {
4608 7
            return '';
4609
        }
4610
4611 80
        $strLength = \strlen($str);
4612 80
        foreach (self::$BOM as $bomString => $bomByteLength) {
4613 80
            if (\strpos($str, $bomString, 0) === 0) {
4614 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4615 10
                if ($strTmp === false) {
4616
                    return '';
4617
                }
4618
4619 10
                $strLength -= (int) $bomByteLength;
4620
4621 80
                $str = (string) $strTmp;
4622
            }
4623
        }
4624
4625 80
        return $str;
4626
    }
4627
4628
    /**
4629
     * Removes duplicate occurrences of a string in another string.
4630
     *
4631
     * @param string          $str  <p>The base string.</p>
4632
     * @param string|string[] $what <p>String to search for in the base string.</p>
4633
     *
4634
     * @return string the result string with removed duplicates
4635
     */
4636 2
    public static function remove_duplicates(string $str, $what = ' '): string
4637
    {
4638 2
        if (\is_string($what) === true) {
4639 2
            $what = [$what];
4640
        }
4641
4642 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4643
            /** @noinspection ForeachSourceInspection */
4644 2
            foreach ($what as $item) {
4645 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4646
            }
4647
        }
4648
4649 2
        return $str;
4650
    }
4651
4652
    /**
4653
     * Remove html via "strip_tags()" from the string.
4654
     *
4655
     * @param string $str
4656
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4657
     *                              not be stripped. Default: null
4658
     *                              </p>
4659
     *
4660
     * @return string
4661
     */
4662 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4663
    {
4664 6
        return \strip_tags($str, $allowableTags);
4665
    }
4666
4667
    /**
4668
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4669
     *
4670
     * @param string $str
4671
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4672
     *
4673
     * @return string
4674
     */
4675 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4676
    {
4677 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4678
    }
4679
4680
    /**
4681
     * Remove invisible characters from a string.
4682
     *
4683
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4684
     *
4685
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4686
     *
4687
     * @param string $str
4688
     * @param bool   $url_encoded
4689
     * @param string $replacement
4690
     *
4691
     * @return string
4692
     */
4693 113
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4694
    {
4695
        // init
4696 113
        $non_displayables = [];
4697
4698
        // every control character except newline (dec 10),
4699
        // carriage return (dec 13) and horizontal tab (dec 09)
4700 113
        if ($url_encoded) {
4701 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4702 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4703
        }
4704
4705 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4706
4707
        do {
4708 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4709 113
        } while ($count !== 0);
4710
4711 113
        return $str;
4712
    }
4713
4714
    /**
4715
     * Returns a new string with the prefix $substring removed, if present.
4716
     *
4717
     * @param string $str
4718
     * @param string $substring <p>The prefix to remove.</p>
4719
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4720
     *
4721
     * @return string string without the prefix $substring
4722
     */
4723 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4724
    {
4725 12
        if ($substring && \strpos($str, $substring) === 0) {
4726 6
            if ($encoding === 'UTF-8') {
4727 4
                return (string) \mb_substr(
4728 4
                    $str,
4729 4
                    (int) \mb_strlen($substring)
4730
                );
4731
            }
4732
4733 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4734
4735 2
            return (string) self::substr(
4736 2
                $str,
4737 2
                (int) self::strlen($substring, $encoding),
4738 2
                null,
4739 2
                $encoding
4740
            );
4741
        }
4742
4743 6
        return $str;
4744
    }
4745
4746
    /**
4747
     * Returns a new string with the suffix $substring removed, if present.
4748
     *
4749
     * @param string $str
4750
     * @param string $substring <p>The suffix to remove.</p>
4751
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4752
     *
4753
     * @return string string having a $str without the suffix $substring
4754
     */
4755 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4756
    {
4757 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4758 6
            if ($encoding === 'UTF-8') {
4759 4
                return (string) \mb_substr(
4760 4
                    $str,
4761 4
                    0,
4762 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4763
                );
4764
            }
4765
4766 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4767
4768 2
            return (string) self::substr(
4769 2
                $str,
4770 2
                0,
4771 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4772 2
                $encoding
4773
            );
4774
        }
4775
4776 6
        return $str;
4777
    }
4778
4779
    /**
4780
     * Replaces all occurrences of $search in $str by $replacement.
4781
     *
4782
     * @param string $str           <p>The input string.</p>
4783
     * @param string $search        <p>The needle to search for.</p>
4784
     * @param string $replacement   <p>The string to replace with.</p>
4785
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4786
     *
4787
     * @return string string after the replacements
4788
     */
4789 29
    public static function replace(
4790
        string $str,
4791
        string $search,
4792
        string $replacement,
4793
        bool $caseSensitive = true
4794
    ): string {
4795 29
        if ($caseSensitive) {
4796 22
            return \str_replace($search, $replacement, $str);
4797
        }
4798
4799 7
        return self::str_ireplace($search, $replacement, $str);
4800
    }
4801
4802
    /**
4803
     * Replaces all occurrences of $search in $str by $replacement.
4804
     *
4805
     * @param string       $str           <p>The input string.</p>
4806
     * @param array        $search        <p>The elements to search for.</p>
4807
     * @param array|string $replacement   <p>The string to replace with.</p>
4808
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4809
     *
4810
     * @return string string after the replacements
4811
     */
4812 30
    public static function replace_all(
4813
        string $str,
4814
        array $search,
4815
        $replacement,
4816
        bool $caseSensitive = true
4817
    ): string {
4818 30
        if ($caseSensitive) {
4819 23
            return \str_replace($search, $replacement, $str);
4820
        }
4821
4822 7
        return self::str_ireplace($search, $replacement, $str);
4823
    }
4824
4825
    /**
4826
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4827
     *
4828
     * @param string $str                <p>The input string</p>
4829
     * @param string $replacementChar    <p>The replacement character.</p>
4830
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4831
     *
4832
     * @return string
4833
     */
4834 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4835
    {
4836 62
        if ($str === '') {
4837 9
            return '';
4838
        }
4839
4840 62
        if ($processInvalidUtf8 === true) {
4841 62
            $replacementCharHelper = $replacementChar;
4842 62
            if ($replacementChar === '') {
4843 62
                $replacementCharHelper = 'none';
4844
            }
4845
4846 62
            if (self::$SUPPORT['mbstring'] === false) {
4847
                // if there is no native support for "mbstring",
4848
                // then we need to clean the string before ...
4849
                $str = self::clean($str);
4850
            }
4851
4852 62
            $save = \mb_substitute_character();
4853 62
            \mb_substitute_character($replacementCharHelper);
4854
            // the polyfill maybe return false, so cast to string
4855 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4856 62
            \mb_substitute_character($save);
4857
        }
4858
4859 62
        return \str_replace(
4860
            [
4861 62
                "\xEF\xBF\xBD",
4862
                '�',
4863
            ],
4864
            [
4865 62
                $replacementChar,
4866 62
                $replacementChar,
4867
            ],
4868 62
            $str
4869
        );
4870
    }
4871
4872
    /**
4873
     * Strip whitespace or other characters from end of a UTF-8 string.
4874
     *
4875
     * @param string      $str   <p>The string to be trimmed.</p>
4876
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4877
     *
4878
     * @return string the string with unwanted characters stripped from the right
4879
     */
4880 20
    public static function rtrim(string $str = '', string $chars = null): string
4881
    {
4882 20
        if ($str === '') {
4883 3
            return '';
4884
        }
4885
4886 19
        if ($chars) {
4887 8
            $chars = \preg_quote($chars, '/');
4888 8
            $pattern = "[${chars}]+\$";
4889
        } else {
4890 14
            $pattern = "[\s]+\$";
4891
        }
4892
4893 19
        if (self::$SUPPORT['mbstring'] === true) {
4894
            /** @noinspection PhpComposerExtensionStubsInspection */
4895 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4896
        }
4897
4898
        return self::regex_replace($str, $pattern, '', '', '/');
4899
    }
4900
4901
    /**
4902
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4903
     */
4904 2
    public static function showSupport()
4905
    {
4906 2
        echo '<pre>';
4907 2
        foreach (self::$SUPPORT as $key => &$value) {
4908 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4909
        }
4910 2
        unset($value);
4911 2
        echo '</pre>';
4912 2
    }
4913
4914
    /**
4915
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4916
     *
4917
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4918
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4919
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4920
     *
4921
     * @return string the HTML numbered entity
4922
     */
4923 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4924
    {
4925 2
        if ($char === '') {
4926 2
            return '';
4927
        }
4928
4929
        if (
4930 2
            $keepAsciiChars === true
4931
            &&
4932 2
            self::is_ascii($char) === true
4933
        ) {
4934 2
            return $char;
4935
        }
4936
4937 2
        return '&#' . self::ord($char, $encoding) . ';';
4938
    }
4939
4940
    /**
4941
     * @param string $str
4942
     * @param int    $tabLength
4943
     *
4944
     * @return string
4945
     */
4946 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4947
    {
4948 5
        if ($tabLength === 4) {
4949 3
            $tab = '    ';
4950 2
        } elseif ($tabLength === 2) {
4951 1
            $tab = '  ';
4952
        } else {
4953 1
            $tab = \str_repeat(' ', $tabLength);
4954
        }
4955
4956 5
        return \str_replace($tab, "\t", $str);
4957
    }
4958
4959
    /**
4960
     * Convert a string to an array of Unicode characters.
4961
     *
4962
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
4963
     * @param int                       $length             [optional] <p>Max character length of each array
4964
     *                                                      element.</p>
4965
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
4966
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
4967
     *                                                      "mb_substr"</p>
4968
     *
4969
     * @return array
4970
     *               <p>An array containing chunks of the input.</p>
4971
     */
4972 87
    public static function str_split(
4973
        $str,
4974
        int $length = 1,
4975
        bool $cleanUtf8 = false,
4976
        bool $tryToUseMbFunction = true
4977
    ): array {
4978 87
        if ($length <= 0) {
4979 3
            return [];
4980
        }
4981
4982 86
        if (\is_array($str) === true) {
4983 2
            foreach ($str as $k => &$v) {
4984 2
                $v = self::str_split(
4985 2
                    $v,
4986 2
                    $length,
4987 2
                    $cleanUtf8,
4988 2
                    $tryToUseMbFunction
4989
                );
4990
            }
4991
4992 2
            return $str;
4993
        }
4994
4995
        // init
4996 86
        $str = (string) $str;
4997
4998 86
        if ($str === '') {
4999 13
            return [];
5000
        }
5001
5002 83
        if ($cleanUtf8 === true) {
5003 19
            $str = self::clean($str);
5004
        }
5005
5006
        if (
5007 83
            $tryToUseMbFunction === true
5008
            &&
5009 83
            self::$SUPPORT['mbstring'] === true
5010
        ) {
5011 79
            $iMax = \mb_strlen($str);
5012
5013 79
            if ($iMax <= 127) {
5014 73
                $ret = [];
5015 73
                for ($i = 0; $i < $iMax; ++$i) {
5016 73
                    $ret[] = \mb_substr($str, $i, 1);
5017
                }
5018
            } else {
5019
                /** @noinspection PhpComposerExtensionStubsInspection */
5020 15
                \mb_ereg_search_init($str, '.', 'm');
5021
                /** @noinspection PhpComposerExtensionStubsInspection */
5022 15
                $r = \mb_ereg_search();
5023 15
                if ($r) {
5024
                    /** @noinspection PhpComposerExtensionStubsInspection */
5025 15
                    $r = \mb_ereg_search_getregs(); // get first result
5026
                    do {
5027 15
                        $ret[] = $r[0];
5028
                        /** @noinspection PhpComposerExtensionStubsInspection */
5029 15
                        $r = \mb_ereg_search_regs(); // get next results
5030 15
                    } while ($r);
5031
                } else {
5032 79
                    $ret = [];
5033
                }
5034
            }
5035 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5036 17
            $retArray = [];
5037 17
            \preg_match_all('/./us', $str, $retArray);
5038 17
            $ret = $retArray[0] ?? [];
5039
        } else {
5040
5041
            // fallback
5042
5043 8
            $ret = [];
5044 8
            $len = \strlen($str);
5045
5046
            /** @noinspection ForeachInvariantsInspection */
5047 8
            for ($i = 0; $i < $len; ++$i) {
5048 8
                if (($str[$i] & "\x80") === "\x00") {
5049 8
                    $ret[] = $str[$i];
5050
                } elseif (
5051 8
                    isset($str[$i + 1])
5052
                    &&
5053 8
                    ($str[$i] & "\xE0") === "\xC0"
5054
                ) {
5055 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5056 4
                        $ret[] = $str[$i] . $str[$i + 1];
5057
5058 4
                        ++$i;
5059
                    }
5060
                } elseif (
5061 6
                    isset($str[$i + 2])
5062
                    &&
5063 6
                    ($str[$i] & "\xF0") === "\xE0"
5064
                ) {
5065
                    if (
5066 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5067
                        &&
5068 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5069
                    ) {
5070 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5071
5072 6
                        $i += 2;
5073
                    }
5074
                } elseif (
5075
                    isset($str[$i + 3])
5076
                    &&
5077
                    ($str[$i] & "\xF8") === "\xF0"
5078
                ) {
5079
                    if (
5080
                        ($str[$i + 1] & "\xC0") === "\x80"
5081
                        &&
5082
                        ($str[$i + 2] & "\xC0") === "\x80"
5083
                        &&
5084
                        ($str[$i + 3] & "\xC0") === "\x80"
5085
                    ) {
5086
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5087
5088
                        $i += 3;
5089
                    }
5090
                }
5091
            }
5092
        }
5093
5094 83
        if ($length > 1) {
5095 11
            $ret = \array_chunk($ret, $length);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $ret does not seem to be defined for all execution paths leading up to this point.
Loading history...
5096
5097 11
            return \array_map(
5098
                static function (array &$item): string {
5099 11
                    return \implode('', $item);
5100 11
                },
5101 11
                $ret
5102
            );
5103
        }
5104
5105 76
        if (isset($ret[0]) && $ret[0] === '') {
5106
            return [];
5107
        }
5108
5109 76
        return $ret;
5110
    }
5111
5112
    /**
5113
     * Returns a camelCase version of the string. Trims surrounding spaces,
5114
     * capitalizes letters following digits, spaces, dashes and underscores,
5115
     * and removes spaces, dashes, as well as underscores.
5116
     *
5117
     * @param string      $str                   <p>The input string.</p>
5118
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5119
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5120
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5121
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5122
     *
5123
     * @return string
5124
     */
5125 32
    public static function str_camelize(
5126
        string $str,
5127
        string $encoding = 'UTF-8',
5128
        bool $cleanUtf8 = false,
5129
        string $lang = null,
5130
        bool $tryToKeepStringLength = false
5131
    ): string {
5132 32
        if ($cleanUtf8 === true) {
5133
            $str = self::clean($str);
5134
        }
5135
5136 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5137 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5138
        }
5139
5140 32
        $str = self::lcfirst(
5141 32
            \trim($str),
5142 32
            $encoding,
5143 32
            false,
5144 32
            $lang,
5145 32
            $tryToKeepStringLength
5146
        );
5147 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5148
5149 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5150
5151 32
        $str = (string) \preg_replace_callback(
5152 32
            '/[-_\s]+(.)?/u',
5153
            /**
5154
             * @param array $match
5155
             *
5156
             * @return string
5157
             */
5158
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5159 27
                if (isset($match[1])) {
5160 27
                    if ($useMbFunction === true) {
5161 27
                        if ($encoding === 'UTF-8') {
5162 27
                            return \mb_strtoupper($match[1]);
5163
                        }
5164
5165
                        return \mb_strtoupper($match[1], $encoding);
5166
                    }
5167
5168
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5169
                }
5170
5171 1
                return '';
5172 32
            },
5173 32
            $str
5174
        );
5175
5176 32
        return (string) \preg_replace_callback(
5177 32
            '/[\d]+(.)?/u',
5178
            /**
5179
             * @param array $match
5180
             *
5181
             * @return string
5182
             */
5183
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5184 6
                if ($useMbFunction === true) {
5185 6
                    if ($encoding === 'UTF-8') {
5186 6
                        return \mb_strtoupper($match[0]);
5187
                    }
5188
5189
                    return \mb_strtoupper($match[0], $encoding);
5190
                }
5191
5192
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5193 32
            },
5194 32
            $str
5195
        );
5196
    }
5197
5198
    /**
5199
     * Returns the string with the first letter of each word capitalized,
5200
     * except for when the word is a name which shouldn't be capitalized.
5201
     *
5202
     * @param string $str
5203
     *
5204
     * @return string string with $str capitalized
5205
     */
5206 1
    public static function str_capitalize_name(string $str): string
5207
    {
5208 1
        return self::str_capitalize_name_helper(
5209 1
            self::str_capitalize_name_helper(
5210 1
                self::collapse_whitespace($str),
5211 1
                ' '
5212
            ),
5213 1
            '-'
5214
        );
5215
    }
5216
5217
    /**
5218
     * Returns true if the string contains $needle, false otherwise. By default
5219
     * the comparison is case-sensitive, but can be made insensitive by setting
5220
     * $caseSensitive to false.
5221
     *
5222
     * @param string $haystack      <p>The input string.</p>
5223
     * @param string $needle        <p>Substring to look for.</p>
5224
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5225
     *
5226
     * @return bool whether or not $haystack contains $needle
5227
     */
5228 21
    public static function str_contains(
5229
        string $haystack,
5230
        string $needle,
5231
        bool $caseSensitive = true
5232
    ): bool {
5233 21
        if ($caseSensitive) {
5234 11
            return \strpos($haystack, $needle) !== false;
5235
        }
5236
5237 10
        return \mb_stripos($haystack, $needle) !== false;
5238
    }
5239
5240
    /**
5241
     * Returns true if the string contains all $needles, false otherwise. By
5242
     * default the comparison is case-sensitive, but can be made insensitive by
5243
     * setting $caseSensitive to false.
5244
     *
5245
     * @param string $haystack      <p>The input string.</p>
5246
     * @param array  $needles       <p>SubStrings to look for.</p>
5247
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5248
     *
5249
     * @return bool whether or not $haystack contains $needle
5250
     */
5251 44
    public static function str_contains_all(
5252
        string $haystack,
5253
        array $needles,
5254
        bool $caseSensitive = true
5255
    ): bool {
5256 44
        if ($haystack === '' || $needles === []) {
5257 1
            return false;
5258
        }
5259
5260
        /** @noinspection LoopWhichDoesNotLoopInspection */
5261 43
        foreach ($needles as &$needle) {
5262 43
            if (!$needle) {
5263 1
                return false;
5264
            }
5265
5266 42
            if ($caseSensitive) {
5267 22
                return \strpos($haystack, $needle) !== false;
5268
            }
5269
5270 20
            return \mb_stripos($haystack, $needle) !== false;
5271
        }
5272
5273
        return true;
5274
    }
5275
5276
    /**
5277
     * Returns true if the string contains any $needles, false otherwise. By
5278
     * default the comparison is case-sensitive, but can be made insensitive by
5279
     * setting $caseSensitive to false.
5280
     *
5281
     * @param string $haystack      <p>The input string.</p>
5282
     * @param array  $needles       <p>SubStrings to look for.</p>
5283
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5284
     *
5285
     * @return bool
5286
     *              Whether or not $str contains $needle
5287
     */
5288 43
    public static function str_contains_any(
5289
        string $haystack,
5290
        array $needles,
5291
        bool $caseSensitive = true
5292
    ): bool {
5293 43
        if ($haystack === '' || $needles === []) {
5294 1
            return false;
5295
        }
5296
5297
        /** @noinspection LoopWhichDoesNotLoopInspection */
5298 42
        foreach ($needles as &$needle) {
5299 42
            if (!$needle) {
5300
                return false;
5301
            }
5302
5303 42
            if ($caseSensitive) {
5304 22
                return \strpos($haystack, $needle) !== false;
5305
            }
5306
5307 20
            return \mb_stripos($haystack, $needle) !== false;
5308
        }
5309
5310
        return false;
5311
    }
5312
5313
    /**
5314
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5315
     * inserted before uppercase characters (with the exception of the first
5316
     * character of the string), and in place of spaces as well as underscores.
5317
     *
5318
     * @param string $str      <p>The input string.</p>
5319
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5320
     *
5321
     * @return string
5322
     */
5323 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5324
    {
5325 19
        return self::str_delimit($str, '-', $encoding);
5326
    }
5327
5328
    /**
5329
     * Returns a lowercase and trimmed string separated by the given delimiter.
5330
     * Delimiters are inserted before uppercase characters (with the exception
5331
     * of the first character of the string), and in place of spaces, dashes,
5332
     * and underscores. Alpha delimiters are not converted to lowercase.
5333
     *
5334
     * @param string      $str                   <p>The input string.</p>
5335
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5336
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5337
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5338
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5339
     *                                           tr</p>
5340
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5341
     *                                           ß</p>
5342
     *
5343
     * @return string
5344
     */
5345 49
    public static function str_delimit(
5346
        string $str,
5347
        string $delimiter,
5348
        string $encoding = 'UTF-8',
5349
        bool $cleanUtf8 = false,
5350
        string $lang = null,
5351
        bool $tryToKeepStringLength = false
5352
    ): string {
5353 49
        if (self::$SUPPORT['mbstring'] === true) {
5354
            /** @noinspection PhpComposerExtensionStubsInspection */
5355 49
            $str = (string) \mb_ereg_replace('\B([A-Z])', '-\1', \trim($str));
5356
5357 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5358 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5359 22
                $str = \mb_strtolower($str);
5360
            } else {
5361 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5362
            }
5363
5364
            /** @noinspection PhpComposerExtensionStubsInspection */
5365 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5366
        }
5367
5368
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', \trim($str));
5369
5370
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5371
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5372
            $str = \mb_strtolower($str);
5373
        } else {
5374
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5375
        }
5376
5377
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5378
    }
5379
5380
    /**
5381
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5382
     *
5383
     * @param string $str <p>The input string.</p>
5384
     *
5385
     * @return false|string
5386
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5387
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5388
     */
5389 30
    public static function str_detect_encoding($str)
5390
    {
5391
        // init
5392 30
        $str = (string) $str;
5393
5394
        //
5395
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5396
        //
5397
5398 30
        if (self::is_binary($str, true) === true) {
5399 11
            $isUtf16 = self::is_utf16($str, false);
5400 11
            if ($isUtf16 === 1) {
5401 2
                return 'UTF-16LE';
5402
            }
5403 11
            if ($isUtf16 === 2) {
5404 2
                return 'UTF-16BE';
5405
            }
5406
5407 9
            $isUtf32 = self::is_utf32($str, false);
5408 9
            if ($isUtf32 === 1) {
5409
                return 'UTF-32LE';
5410
            }
5411 9
            if ($isUtf32 === 2) {
5412
                return 'UTF-32BE';
5413
            }
5414
5415
            // is binary but not "UTF-16" or "UTF-32"
5416 9
            return false;
5417
        }
5418
5419
        //
5420
        // 2.) simple check for ASCII chars
5421
        //
5422
5423 26
        if (self::is_ascii($str) === true) {
5424 9
            return 'ASCII';
5425
        }
5426
5427
        //
5428
        // 3.) simple check for UTF-8 chars
5429
        //
5430
5431 26
        if (self::is_utf8($str) === true) {
5432 19
            return 'UTF-8';
5433
        }
5434
5435
        //
5436
        // 4.) check via "mb_detect_encoding()"
5437
        //
5438
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5439
5440
        $detectOrder = [
5441 15
            'ISO-8859-1',
5442
            'ISO-8859-2',
5443
            'ISO-8859-3',
5444
            'ISO-8859-4',
5445
            'ISO-8859-5',
5446
            'ISO-8859-6',
5447
            'ISO-8859-7',
5448
            'ISO-8859-8',
5449
            'ISO-8859-9',
5450
            'ISO-8859-10',
5451
            'ISO-8859-13',
5452
            'ISO-8859-14',
5453
            'ISO-8859-15',
5454
            'ISO-8859-16',
5455
            'WINDOWS-1251',
5456
            'WINDOWS-1252',
5457
            'WINDOWS-1254',
5458
            'CP932',
5459
            'CP936',
5460
            'CP950',
5461
            'CP866',
5462
            'CP850',
5463
            'CP51932',
5464
            'CP50220',
5465
            'CP50221',
5466
            'CP50222',
5467
            'ISO-2022-JP',
5468
            'ISO-2022-KR',
5469
            'JIS',
5470
            'JIS-ms',
5471
            'EUC-CN',
5472
            'EUC-JP',
5473
        ];
5474
5475 15
        if (self::$SUPPORT['mbstring'] === true) {
5476
            // info: do not use the symfony polyfill here
5477 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5478 15
            if ($encoding) {
5479 15
                return $encoding;
5480
            }
5481
        }
5482
5483
        //
5484
        // 5.) check via "iconv()"
5485
        //
5486
5487
        if (self::$ENCODINGS === null) {
5488
            self::$ENCODINGS = self::getData('encodings');
5489
        }
5490
5491
        foreach (self::$ENCODINGS as $encodingTmp) {
5492
            // INFO: //IGNORE but still throw notice
5493
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5494
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5495
                return $encodingTmp;
5496
            }
5497
        }
5498
5499
        return false;
5500
    }
5501
5502
    /**
5503
     * Check if the string ends with the given substring.
5504
     *
5505
     * @param string $haystack <p>The string to search in.</p>
5506
     * @param string $needle   <p>The substring to search for.</p>
5507
     *
5508
     * @return bool
5509
     */
5510 9
    public static function str_ends_with(string $haystack, string $needle): bool
5511
    {
5512 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5513
    }
5514
5515
    /**
5516
     * Returns true if the string ends with any of $substrings, false otherwise.
5517
     *
5518
     * - case-sensitive
5519
     *
5520
     * @param string   $str        <p>The input string.</p>
5521
     * @param string[] $substrings <p>Substrings to look for.</p>
5522
     *
5523
     * @return bool whether or not $str ends with $substring
5524
     */
5525 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5526
    {
5527 7
        if ($substrings === []) {
5528
            return false;
5529
        }
5530
5531 7
        foreach ($substrings as &$substring) {
5532 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5533 7
                return true;
5534
            }
5535
        }
5536
5537 6
        return false;
5538
    }
5539
5540
    /**
5541
     * Ensures that the string begins with $substring. If it doesn't, it's
5542
     * prepended.
5543
     *
5544
     * @param string $str       <p>The input string.</p>
5545
     * @param string $substring <p>The substring to add if not present.</p>
5546
     *
5547
     * @return string
5548
     */
5549 10
    public static function str_ensure_left(string $str, string $substring): string
5550
    {
5551
        if (
5552 10
            $substring !== ''
5553
            &&
5554 10
            \strpos($str, $substring) === 0
5555
        ) {
5556 6
            return $str;
5557
        }
5558
5559 4
        return $substring . $str;
5560
    }
5561
5562
    /**
5563
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5564
     *
5565
     * @param string $str       <p>The input string.</p>
5566
     * @param string $substring <p>The substring to add if not present.</p>
5567
     *
5568
     * @return string
5569
     */
5570 10
    public static function str_ensure_right(string $str, string $substring): string
5571
    {
5572
        if (
5573 10
            $str === ''
5574
            ||
5575 10
            $substring === ''
5576
            ||
5577 10
            \substr($str, -\strlen($substring)) !== $substring
5578
        ) {
5579 4
            $str .= $substring;
5580
        }
5581
5582 10
        return $str;
5583
    }
5584
5585
    /**
5586
     * Capitalizes the first word of the string, replaces underscores with
5587
     * spaces, and strips '_id'.
5588
     *
5589
     * @param string $str
5590
     *
5591
     * @return string
5592
     */
5593 3
    public static function str_humanize($str): string
5594
    {
5595 3
        $str = \str_replace(
5596
            [
5597 3
                '_id',
5598
                '_',
5599
            ],
5600
            [
5601 3
                '',
5602
                ' ',
5603
            ],
5604 3
            $str
5605
        );
5606
5607 3
        return self::ucfirst(\trim($str));
5608
    }
5609
5610
    /**
5611
     * Check if the string ends with the given substring, case insensitive.
5612
     *
5613
     * @param string $haystack <p>The string to search in.</p>
5614
     * @param string $needle   <p>The substring to search for.</p>
5615
     *
5616
     * @return bool
5617
     */
5618 12
    public static function str_iends_with(string $haystack, string $needle): bool
5619
    {
5620 12
        if ($haystack === '' || $needle === '') {
5621 2
            return false;
5622
        }
5623
5624 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5625
    }
5626
5627
    /**
5628
     * Returns true if the string ends with any of $substrings, false otherwise.
5629
     *
5630
     * - case-insensitive
5631
     *
5632
     * @param string   $str        <p>The input string.</p>
5633
     * @param string[] $substrings <p>Substrings to look for.</p>
5634
     *
5635
     * @return bool whether or not $str ends with $substring
5636
     */
5637 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5638
    {
5639 4
        if ($substrings === []) {
5640
            return false;
5641
        }
5642
5643 4
        foreach ($substrings as &$substring) {
5644 4
            if (self::str_iends_with($str, $substring)) {
5645 4
                return true;
5646
            }
5647
        }
5648
5649
        return false;
5650
    }
5651
5652
    /**
5653
     * Returns the index of the first occurrence of $needle in the string,
5654
     * and false if not found. Accepts an optional offset from which to begin
5655
     * the search.
5656
     *
5657
     * @param string $str      <p>The input string.</p>
5658
     * @param string $needle   <p>Substring to look for.</p>
5659
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5660
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5661
     *
5662
     * @return false|int
5663
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5664
     */
5665 2
    public static function str_iindex_first(
5666
        string $str,
5667
        string $needle,
5668
        int $offset = 0,
5669
        string $encoding = 'UTF-8'
5670
    ) {
5671 2
        return self::stripos(
5672 2
            $str,
5673 2
            $needle,
5674 2
            $offset,
5675 2
            $encoding
5676
        );
5677
    }
5678
5679
    /**
5680
     * Returns the index of the last occurrence of $needle in the string,
5681
     * and false if not found. Accepts an optional offset from which to begin
5682
     * the search. Offsets may be negative to count from the last character
5683
     * in the string.
5684
     *
5685
     * @param string $str      <p>The input string.</p>
5686
     * @param string $needle   <p>Substring to look for.</p>
5687
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5688
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5689
     *
5690
     * @return false|int
5691
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5692
     */
5693
    public static function str_iindex_last(
5694
        string $str,
5695
        string $needle,
5696
        int $offset = 0,
5697
        string $encoding = 'UTF-8'
5698
    ) {
5699
        return self::strripos(
5700
            $str,
5701
            $needle,
5702
            $offset,
5703
            $encoding
5704
        );
5705
    }
5706
5707
    /**
5708
     * Returns the index of the first occurrence of $needle in the string,
5709
     * and false if not found. Accepts an optional offset from which to begin
5710
     * the search.
5711
     *
5712
     * @param string $str      <p>The input string.</p>
5713
     * @param string $needle   <p>Substring to look for.</p>
5714
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5715
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5716
     *
5717
     * @return false|int
5718
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5719
     */
5720 10
    public static function str_index_first(
5721
        string $str,
5722
        string $needle,
5723
        int $offset = 0,
5724
        string $encoding = 'UTF-8'
5725
    ) {
5726 10
        return self::strpos(
5727 10
            $str,
5728 10
            $needle,
5729 10
            $offset,
5730 10
            $encoding
5731
        );
5732
    }
5733
5734
    /**
5735
     * Returns the index of the last occurrence of $needle in the string,
5736
     * and false if not found. Accepts an optional offset from which to begin
5737
     * the search. Offsets may be negative to count from the last character
5738
     * in the string.
5739
     *
5740
     * @param string $str      <p>The input string.</p>
5741
     * @param string $needle   <p>Substring to look for.</p>
5742
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5743
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5744
     *
5745
     * @return false|int
5746
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5747
     */
5748 10
    public static function str_index_last(
5749
        string $str,
5750
        string $needle,
5751
        int $offset = 0,
5752
        string $encoding = 'UTF-8'
5753
    ) {
5754 10
        return self::strrpos(
5755 10
            $str,
5756 10
            $needle,
5757 10
            $offset,
5758 10
            $encoding
5759
        );
5760
    }
5761
5762
    /**
5763
     * Inserts $substring into the string at the $index provided.
5764
     *
5765
     * @param string $str       <p>The input string.</p>
5766
     * @param string $substring <p>String to be inserted.</p>
5767
     * @param int    $index     <p>The index at which to insert the substring.</p>
5768
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5769
     *
5770
     * @return string
5771
     */
5772 8
    public static function str_insert(
5773
        string $str,
5774
        string $substring,
5775
        int $index,
5776
        string $encoding = 'UTF-8'
5777
    ): string {
5778 8
        if ($encoding === 'UTF-8') {
5779 4
            $len = (int) \mb_strlen($str);
5780 4
            if ($index > $len) {
5781
                return $str;
5782
            }
5783
5784
            /** @noinspection UnnecessaryCastingInspection */
5785 4
            return (string) \mb_substr($str, 0, $index) .
5786 4
                   $substring .
5787 4
                   (string) \mb_substr($str, $index, $len);
5788
        }
5789
5790 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5791
5792 4
        $len = (int) self::strlen($str, $encoding);
5793 4
        if ($index > $len) {
5794 1
            return $str;
5795
        }
5796
5797 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5798 3
               $substring .
5799 3
               ((string) self::substr($str, $index, $len, $encoding));
5800
    }
5801
5802
    /**
5803
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5804
     *
5805
     * @see  http://php.net/manual/en/function.str-ireplace.php
5806
     *
5807
     * @param mixed $search  <p>
5808
     *                       Every replacement with search array is
5809
     *                       performed on the result of previous replacement.
5810
     *                       </p>
5811
     * @param mixed $replace <p>
5812
     *                       </p>
5813
     * @param mixed $subject <p>
5814
     *                       If subject is an array, then the search and
5815
     *                       replace is performed with every entry of
5816
     *                       subject, and the return value is an array as
5817
     *                       well.
5818
     *                       </p>
5819
     * @param int   $count   [optional] <p>
5820
     *                       The number of matched and replaced needles will
5821
     *                       be returned in count which is passed by
5822
     *                       reference.
5823
     *                       </p>
5824
     *
5825
     * @return mixed a string or an array of replacements
5826
     */
5827 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5828
    {
5829 29
        $search = (array) $search;
5830
5831
        /** @noinspection AlterInForeachInspection */
5832 29
        foreach ($search as &$s) {
5833 29
            $s = (string) $s;
5834 29
            if ($s === '') {
5835 6
                $s = '/^(?<=.)$/';
5836
            } else {
5837 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5838
            }
5839
        }
5840
5841 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5842 29
        $count = $replace; // used as reference parameter
5843
5844 29
        return $subject;
5845
    }
5846
5847
    /**
5848
     * Replaces $search from the beginning of string with $replacement.
5849
     *
5850
     * @param string $str         <p>The input string.</p>
5851
     * @param string $search      <p>The string to search for.</p>
5852
     * @param string $replacement <p>The replacement.</p>
5853
     *
5854
     * @return string string after the replacements
5855
     */
5856 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5857
    {
5858 17
        if ($str === '') {
5859 4
            if ($replacement === '') {
5860 2
                return '';
5861
            }
5862
5863 2
            if ($search === '') {
5864 2
                return $replacement;
5865
            }
5866
        }
5867
5868 13
        if ($search === '') {
5869 2
            return $str . $replacement;
5870
        }
5871
5872 11
        if (\stripos($str, $search) === 0) {
5873 10
            return $replacement . \substr($str, \strlen($search));
5874
        }
5875
5876 1
        return $str;
5877
    }
5878
5879
    /**
5880
     * Replaces $search from the ending of string with $replacement.
5881
     *
5882
     * @param string $str         <p>The input string.</p>
5883
     * @param string $search      <p>The string to search for.</p>
5884
     * @param string $replacement <p>The replacement.</p>
5885
     *
5886
     * @return string string after the replacements
5887
     */
5888 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5889
    {
5890 17
        if ($str === '') {
5891 4
            if ($replacement === '') {
5892 2
                return '';
5893
            }
5894
5895 2
            if ($search === '') {
5896 2
                return $replacement;
5897
            }
5898
        }
5899
5900 13
        if ($search === '') {
5901 2
            return $str . $replacement;
5902
        }
5903
5904 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5905 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5906
        }
5907
5908 11
        return $str;
5909
    }
5910
5911
    /**
5912
     * Check if the string starts with the given substring, case insensitive.
5913
     *
5914
     * @param string $haystack <p>The string to search in.</p>
5915
     * @param string $needle   <p>The substring to search for.</p>
5916
     *
5917
     * @return bool
5918
     */
5919 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5920
    {
5921 12
        if ($haystack === '' || $needle === '') {
5922 2
            return false;
5923
        }
5924
5925 12
        return self::stripos($haystack, $needle) === 0;
5926
    }
5927
5928
    /**
5929
     * Returns true if the string begins with any of $substrings, false otherwise.
5930
     *
5931
     * - case-insensitive
5932
     *
5933
     * @param string $str        <p>The input string.</p>
5934
     * @param array  $substrings <p>Substrings to look for.</p>
5935
     *
5936
     * @return bool whether or not $str starts with $substring
5937
     */
5938 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5939
    {
5940 4
        if ($str === '') {
5941
            return false;
5942
        }
5943
5944 4
        if ($substrings === []) {
5945
            return false;
5946
        }
5947
5948 4
        foreach ($substrings as &$substring) {
5949 4
            if (self::str_istarts_with($str, $substring)) {
5950 4
                return true;
5951
            }
5952
        }
5953
5954
        return false;
5955
    }
5956
5957
    /**
5958
     * Gets the substring after the first occurrence of a separator.
5959
     *
5960
     * @param string $str       <p>The input string.</p>
5961
     * @param string $separator <p>The string separator.</p>
5962
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5963
     *
5964
     * @return string
5965
     */
5966 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5967
    {
5968 1
        if ($separator === '' || $str === '') {
5969 1
            return '';
5970
        }
5971
5972 1
        $offset = self::str_iindex_first($str, $separator);
5973 1
        if ($offset === false) {
5974 1
            return '';
5975
        }
5976
5977 1
        if ($encoding === 'UTF-8') {
5978 1
            return (string) \mb_substr(
5979 1
                $str,
5980 1
                $offset + (int) \mb_strlen($separator)
5981
            );
5982
        }
5983
5984
        return (string) self::substr(
5985
            $str,
5986
            $offset + (int) self::strlen($separator, $encoding),
5987
            null,
5988
            $encoding
5989
        );
5990
    }
5991
5992
    /**
5993
     * Gets the substring after the last occurrence of a separator.
5994
     *
5995
     * @param string $str       <p>The input string.</p>
5996
     * @param string $separator <p>The string separator.</p>
5997
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5998
     *
5999
     * @return string
6000
     */
6001 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6002
    {
6003 1
        if ($separator === '' || $str === '') {
6004 1
            return '';
6005
        }
6006
6007 1
        $offset = self::strripos($str, $separator);
6008 1
        if ($offset === false) {
6009 1
            return '';
6010
        }
6011
6012 1
        if ($encoding === 'UTF-8') {
6013 1
            return (string) \mb_substr(
6014 1
                $str,
6015 1
                $offset + (int) self::strlen($separator)
6016
            );
6017
        }
6018
6019
        return (string) self::substr(
6020
            $str,
6021
            $offset + (int) self::strlen($separator, $encoding),
6022
            null,
6023
            $encoding
6024
        );
6025
    }
6026
6027
    /**
6028
     * Gets the substring before the first occurrence of a separator.
6029
     *
6030
     * @param string $str       <p>The input string.</p>
6031
     * @param string $separator <p>The string separator.</p>
6032
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6033
     *
6034
     * @return string
6035
     */
6036 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6037
    {
6038 1
        if ($separator === '' || $str === '') {
6039 1
            return '';
6040
        }
6041
6042 1
        $offset = self::str_iindex_first($str, $separator);
6043 1
        if ($offset === false) {
6044 1
            return '';
6045
        }
6046
6047 1
        if ($encoding === 'UTF-8') {
6048 1
            return (string) \mb_substr($str, 0, $offset);
6049
        }
6050
6051
        return (string) self::substr($str, 0, $offset, $encoding);
6052
    }
6053
6054
    /**
6055
     * Gets the substring before the last occurrence of a separator.
6056
     *
6057
     * @param string $str       <p>The input string.</p>
6058
     * @param string $separator <p>The string separator.</p>
6059
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6060
     *
6061
     * @return string
6062
     */
6063 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6064
    {
6065 1
        if ($separator === '' || $str === '') {
6066 1
            return '';
6067
        }
6068
6069 1
        if ($encoding === 'UTF-8') {
6070 1
            $offset = \mb_strripos($str, $separator);
6071 1
            if ($offset === false) {
6072 1
                return '';
6073
            }
6074
6075 1
            return (string) \mb_substr($str, 0, $offset);
6076
        }
6077
6078
        $offset = self::strripos($str, $separator, 0, $encoding);
6079
        if ($offset === false) {
6080
            return '';
6081
        }
6082
6083
        return (string) self::substr($str, 0, $offset, $encoding);
6084
    }
6085
6086
    /**
6087
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6088
     *
6089
     * @param string $str          <p>The input string.</p>
6090
     * @param string $needle       <p>The string to look for.</p>
6091
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6092
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6093
     *
6094
     * @return string
6095
     */
6096 2
    public static function str_isubstr_first(
6097
        string $str,
6098
        string $needle,
6099
        bool $beforeNeedle = false,
6100
        string $encoding = 'UTF-8'
6101
    ): string {
6102
        if (
6103 2
            $needle === ''
6104
            ||
6105 2
            $str === ''
6106
        ) {
6107 2
            return '';
6108
        }
6109
6110 2
        $part = self::stristr(
6111 2
            $str,
6112 2
            $needle,
6113 2
            $beforeNeedle,
6114 2
            $encoding
6115
        );
6116 2
        if ($part === false) {
6117 2
            return '';
6118
        }
6119
6120 2
        return $part;
6121
    }
6122
6123
    /**
6124
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6125
     *
6126
     * @param string $str          <p>The input string.</p>
6127
     * @param string $needle       <p>The string to look for.</p>
6128
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6129
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6130
     *
6131
     * @return string
6132
     */
6133 1
    public static function str_isubstr_last(
6134
        string $str,
6135
        string $needle,
6136
        bool $beforeNeedle = false,
6137
        string $encoding = 'UTF-8'
6138
    ): string {
6139
        if (
6140 1
            $needle === ''
6141
            ||
6142 1
            $str === ''
6143
        ) {
6144 1
            return '';
6145
        }
6146
6147 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6148 1
        if ($part === false) {
6149 1
            return '';
6150
        }
6151
6152 1
        return $part;
6153
    }
6154
6155
    /**
6156
     * Returns the last $n characters of the string.
6157
     *
6158
     * @param string $str      <p>The input string.</p>
6159
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6160
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6161
     *
6162
     * @return string
6163
     */
6164 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6165
    {
6166 12
        if ($str === '' || $n <= 0) {
6167 4
            return '';
6168
        }
6169
6170 8
        if ($encoding === 'UTF-8') {
6171 4
            return (string) \mb_substr($str, -$n);
6172
        }
6173
6174 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6175
6176 4
        return (string) self::substr($str, -$n, null, $encoding);
6177
    }
6178
6179
    /**
6180
     * Limit the number of characters in a string.
6181
     *
6182
     * @param string $str      <p>The input string.</p>
6183
     * @param int    $length   [optional] <p>Default: 100</p>
6184
     * @param string $strAddOn [optional] <p>Default: …</p>
6185
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6186
     *
6187
     * @return string
6188
     */
6189 2
    public static function str_limit(
6190
        string $str,
6191
        int $length = 100,
6192
        string $strAddOn = '…',
6193
        string $encoding = 'UTF-8'
6194
    ): string {
6195 2
        if ($str === '' || $length <= 0) {
6196 2
            return '';
6197
        }
6198
6199 2
        if ($encoding === 'UTF-8') {
6200 2
            if ((int) \mb_strlen($str) <= $length) {
6201 2
                return $str;
6202
            }
6203
6204
            /** @noinspection UnnecessaryCastingInspection */
6205 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6206
        }
6207
6208
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6209
6210
        if ((int) self::strlen($str, $encoding) <= $length) {
6211
            return $str;
6212
        }
6213
6214
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6215
    }
6216
6217
    /**
6218
     * Limit the number of characters in a string, but also after the next word.
6219
     *
6220
     * @param string $str      <p>The input string.</p>
6221
     * @param int    $length   [optional] <p>Default: 100</p>
6222
     * @param string $strAddOn [optional] <p>Default: …</p>
6223
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6224
     *
6225
     * @return string
6226
     */
6227 6
    public static function str_limit_after_word(
6228
        string $str,
6229
        int $length = 100,
6230
        string $strAddOn = '…',
6231
        string $encoding = 'UTF-8'
6232
    ): string {
6233 6
        if ($str === '' || $length <= 0) {
6234 2
            return '';
6235
        }
6236
6237 6
        if ($encoding === 'UTF-8') {
6238
            /** @noinspection UnnecessaryCastingInspection */
6239 2
            if ((int) \mb_strlen($str) <= $length) {
6240 2
                return $str;
6241
            }
6242
6243 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6244 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6245
            }
6246
6247 2
            $str = \mb_substr($str, 0, $length);
6248
6249 2
            $array = \explode(' ', $str);
6250 2
            \array_pop($array);
6251 2
            $new_str = \implode(' ', $array);
6252
6253 2
            if ($new_str === '') {
6254 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6255
            }
6256
        } else {
6257 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6258
                return $str;
6259
            }
6260
6261 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6262 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6263
            }
6264
6265 1
            $str = self::substr($str, 0, $length, $encoding);
6266 1
            if ($str === false) {
6267
                return '' . $strAddOn;
6268
            }
6269
6270 1
            $array = \explode(' ', $str);
6271 1
            \array_pop($array);
6272 1
            $new_str = \implode(' ', $array);
6273
6274 1
            if ($new_str === '') {
6275
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6276
            }
6277
        }
6278
6279 3
        return $new_str . $strAddOn;
6280
    }
6281
6282
    /**
6283
     * Returns the longest common prefix between the string and $otherStr.
6284
     *
6285
     * @param string $str      <p>The input sting.</p>
6286
     * @param string $otherStr <p>Second string for comparison.</p>
6287
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6288
     *
6289
     * @return string
6290
     */
6291 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6292
    {
6293
        // init
6294 10
        $longestCommonPrefix = '';
6295
6296 10
        if ($encoding === 'UTF-8') {
6297 5
            $maxLength = (int) \min(
6298 5
                \mb_strlen($str),
6299 5
                \mb_strlen($otherStr)
6300
            );
6301
6302 5
            for ($i = 0; $i < $maxLength; ++$i) {
6303 4
                $char = \mb_substr($str, $i, 1);
6304
6305
                if (
6306 4
                    $char !== false
6307
                    &&
6308 4
                    $char === \mb_substr($otherStr, $i, 1)
6309
                ) {
6310 3
                    $longestCommonPrefix .= $char;
6311
                } else {
6312 3
                    break;
6313
                }
6314
            }
6315
        } else {
6316 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6317
6318 5
            $maxLength = (int) \min(
6319 5
                self::strlen($str, $encoding),
6320 5
                self::strlen($otherStr, $encoding)
6321
            );
6322
6323 5
            for ($i = 0; $i < $maxLength; ++$i) {
6324 4
                $char = self::substr($str, $i, 1, $encoding);
6325
6326
                if (
6327 4
                    $char !== false
6328
                    &&
6329 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6330
                ) {
6331 3
                    $longestCommonPrefix .= $char;
6332
                } else {
6333 3
                    break;
6334
                }
6335
            }
6336
        }
6337
6338 10
        return $longestCommonPrefix;
6339
    }
6340
6341
    /**
6342
     * Returns the longest common substring between the string and $otherStr.
6343
     * In the case of ties, it returns that which occurs first.
6344
     *
6345
     * @param string $str
6346
     * @param string $otherStr <p>Second string for comparison.</p>
6347
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6348
     *
6349
     * @return string string with its $str being the longest common substring
6350
     */
6351 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6352
    {
6353 11
        if ($str === '' || $otherStr === '') {
6354 2
            return '';
6355
        }
6356
6357
        // Uses dynamic programming to solve
6358
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6359
6360 9
        if ($encoding === 'UTF-8') {
6361 4
            $strLength = (int) \mb_strlen($str);
6362 4
            $otherLength = (int) \mb_strlen($otherStr);
6363
        } else {
6364 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6365
6366 5
            $strLength = (int) self::strlen($str, $encoding);
6367 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6368
        }
6369
6370
        // Return if either string is empty
6371 9
        if ($strLength === 0 || $otherLength === 0) {
6372
            return '';
6373
        }
6374
6375 9
        $len = 0;
6376 9
        $end = 0;
6377 9
        $table = \array_fill(
6378 9
            0,
6379 9
            $strLength + 1,
6380 9
            \array_fill(0, $otherLength + 1, 0)
6381
        );
6382
6383 9
        if ($encoding === 'UTF-8') {
6384 9
            for ($i = 1; $i <= $strLength; ++$i) {
6385 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6386 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6387 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6388
6389 9
                    if ($strChar === $otherChar) {
6390 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6391 8
                        if ($table[$i][$j] > $len) {
6392 8
                            $len = $table[$i][$j];
6393 8
                            $end = $i;
6394
                        }
6395
                    } else {
6396 9
                        $table[$i][$j] = 0;
6397
                    }
6398
                }
6399
            }
6400
        } else {
6401
            for ($i = 1; $i <= $strLength; ++$i) {
6402
                for ($j = 1; $j <= $otherLength; ++$j) {
6403
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6404
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6405
6406
                    if ($strChar === $otherChar) {
6407
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6408
                        if ($table[$i][$j] > $len) {
6409
                            $len = $table[$i][$j];
6410
                            $end = $i;
6411
                        }
6412
                    } else {
6413
                        $table[$i][$j] = 0;
6414
                    }
6415
                }
6416
            }
6417
        }
6418
6419 9
        if ($encoding === 'UTF-8') {
6420 9
            return (string) \mb_substr($str, $end - $len, $len);
6421
        }
6422
6423
        return (string) self::substr($str, $end - $len, $len, $encoding);
6424
    }
6425
6426
    /**
6427
     * Returns the longest common suffix between the string and $otherStr.
6428
     *
6429
     * @param string $str
6430
     * @param string $otherStr <p>Second string for comparison.</p>
6431
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6432
     *
6433
     * @return string
6434
     */
6435 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6436
    {
6437 10
        if ($str === '' || $otherStr === '') {
6438 2
            return '';
6439
        }
6440
6441 8
        if ($encoding === 'UTF-8') {
6442 4
            $maxLength = (int) \min(
6443 4
                \mb_strlen($str, $encoding),
6444 4
                \mb_strlen($otherStr, $encoding)
6445
            );
6446
6447 4
            $longestCommonSuffix = '';
6448 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6449 4
                $char = \mb_substr($str, -$i, 1);
6450
6451
                if (
6452 4
                    $char !== false
6453
                    &&
6454 4
                    $char === \mb_substr($otherStr, -$i, 1)
6455
                ) {
6456 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6457
                } else {
6458 3
                    break;
6459
                }
6460
            }
6461
        } else {
6462 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6463
6464 4
            $maxLength = (int) \min(
6465 4
                self::strlen($str, $encoding),
6466 4
                self::strlen($otherStr, $encoding)
6467
            );
6468
6469 4
            $longestCommonSuffix = '';
6470 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6471 4
                $char = self::substr($str, -$i, 1, $encoding);
6472
6473
                if (
6474 4
                    $char !== false
6475
                    &&
6476 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6477
                ) {
6478 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6479
                } else {
6480 3
                    break;
6481
                }
6482
            }
6483
        }
6484
6485 8
        return $longestCommonSuffix;
6486
    }
6487
6488
    /**
6489
     * Returns true if $str matches the supplied pattern, false otherwise.
6490
     *
6491
     * @param string $str     <p>The input string.</p>
6492
     * @param string $pattern <p>Regex pattern to match against.</p>
6493
     *
6494
     * @return bool whether or not $str matches the pattern
6495
     */
6496
    public static function str_matches_pattern(string $str, string $pattern): bool
6497
    {
6498
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6499
    }
6500
6501
    /**
6502
     * Returns whether or not a character exists at an index. Offsets may be
6503
     * negative to count from the last character in the string. Implements
6504
     * part of the ArrayAccess interface.
6505
     *
6506
     * @param string $str      <p>The input string.</p>
6507
     * @param int    $offset   <p>The index to check.</p>
6508
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6509
     *
6510
     * @return bool whether or not the index exists
6511
     */
6512 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6513
    {
6514
        // init
6515 6
        $length = (int) self::strlen($str, $encoding);
6516
6517 6
        if ($offset >= 0) {
6518 3
            return $length > $offset;
6519
        }
6520
6521 3
        return $length >= \abs($offset);
6522
    }
6523
6524
    /**
6525
     * Returns the character at the given index. Offsets may be negative to
6526
     * count from the last character in the string. Implements part of the
6527
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6528
     * does not exist.
6529
     *
6530
     * @param string $str      <p>The input string.</p>
6531
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6532
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6533
     *
6534
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6535
     *
6536
     * @return string the character at the specified index
6537
     */
6538 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6539
    {
6540
        // init
6541 2
        $length = (int) self::strlen($str);
6542
6543
        if (
6544 2
            ($index >= 0 && $length <= $index)
6545
            ||
6546 2
            $length < \abs($index)
6547
        ) {
6548 1
            throw new \OutOfBoundsException('No character exists at the index');
6549
        }
6550
6551 1
        return self::char_at($str, $index, $encoding);
6552
    }
6553
6554
    /**
6555
     * Pad a UTF-8 string to given length with another string.
6556
     *
6557
     * @param string     $str        <p>The input string.</p>
6558
     * @param int        $pad_length <p>The length of return string.</p>
6559
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6560
     * @param int|string $pad_type   [optional] <p>
6561
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6562
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6563
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6564
     *                               </p>
6565
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6566
     *
6567
     * @return string returns the padded string
6568
     */
6569 41
    public static function str_pad(
6570
        string $str,
6571
        int $pad_length,
6572
        string $pad_string = ' ',
6573
        $pad_type = \STR_PAD_RIGHT,
6574
        string $encoding = 'UTF-8'
6575
    ): string {
6576 41
        if ($pad_length === 0 || $pad_string === '') {
6577 1
            return $str;
6578
        }
6579
6580 41
        if ($pad_type !== (int) $pad_type) {
6581 13
            if ($pad_type === 'left') {
6582 3
                $pad_type = \STR_PAD_LEFT;
6583 10
            } elseif ($pad_type === 'right') {
6584 6
                $pad_type = \STR_PAD_RIGHT;
6585 4
            } elseif ($pad_type === 'both') {
6586 3
                $pad_type = \STR_PAD_BOTH;
6587
            } else {
6588 1
                throw new \InvalidArgumentException(
6589 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6590
                );
6591
            }
6592
        }
6593
6594 40
        if ($encoding === 'UTF-8') {
6595 25
            $str_length = (int) \mb_strlen($str);
6596
6597 25
            if ($pad_length >= $str_length) {
6598
                switch ($pad_type) {
6599 25
                    case \STR_PAD_LEFT:
6600 8
                        $ps_length = (int) \mb_strlen($pad_string);
6601
6602 8
                        $diff = ($pad_length - $str_length);
6603
6604 8
                        $pre = (string) \mb_substr(
6605 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6606 8
                            0,
6607 8
                            $diff
6608
                        );
6609 8
                        $post = '';
6610
6611 8
                        break;
6612
6613 20
                    case \STR_PAD_BOTH:
6614 14
                        $diff = ($pad_length - $str_length);
6615
6616 14
                        $ps_length_left = (int) \floor($diff / 2);
6617
6618 14
                        $ps_length_right = (int) \ceil($diff / 2);
6619
6620 14
                        $pre = (string) \mb_substr(
6621 14
                            \str_repeat($pad_string, $ps_length_left),
6622 14
                            0,
6623 14
                            $ps_length_left
6624
                        );
6625 14
                        $post = (string) \mb_substr(
6626 14
                            \str_repeat($pad_string, $ps_length_right),
6627 14
                            0,
6628 14
                            $ps_length_right
6629
                        );
6630
6631 14
                        break;
6632
6633 9
                    case \STR_PAD_RIGHT:
6634
                    default:
6635 9
                        $ps_length = (int) \mb_strlen($pad_string);
6636
6637 9
                        $diff = ($pad_length - $str_length);
6638
6639 9
                        $post = (string) \mb_substr(
6640 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6641 9
                            0,
6642 9
                            $diff
6643
                        );
6644 9
                        $pre = '';
6645
                }
6646
6647 25
                return $pre . $str . $post;
6648
            }
6649
6650 3
            return $str;
6651
        }
6652
6653 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6654
6655 15
        $str_length = (int) self::strlen($str, $encoding);
6656
6657 15
        if ($pad_length >= $str_length) {
6658
            switch ($pad_type) {
6659 14
                case \STR_PAD_LEFT:
6660 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6661
6662 5
                    $diff = ($pad_length - $str_length);
6663
6664 5
                    $pre = (string) self::substr(
6665 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6666 5
                        0,
6667 5
                        $diff,
6668 5
                        $encoding
6669
                    );
6670 5
                    $post = '';
6671
6672 5
                    break;
6673
6674 9
                case \STR_PAD_BOTH:
6675 3
                    $diff = ($pad_length - $str_length);
6676
6677 3
                    $ps_length_left = (int) \floor($diff / 2);
6678
6679 3
                    $ps_length_right = (int) \ceil($diff / 2);
6680
6681 3
                    $pre = (string) self::substr(
6682 3
                        \str_repeat($pad_string, $ps_length_left),
6683 3
                        0,
6684 3
                        $ps_length_left,
6685 3
                        $encoding
6686
                    );
6687 3
                    $post = (string) self::substr(
6688 3
                        \str_repeat($pad_string, $ps_length_right),
6689 3
                        0,
6690 3
                        $ps_length_right,
6691 3
                        $encoding
6692
                    );
6693
6694 3
                    break;
6695
6696 6
                case \STR_PAD_RIGHT:
6697
                default:
6698 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6699
6700 6
                    $diff = ($pad_length - $str_length);
6701
6702 6
                    $post = (string) self::substr(
6703 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6704 6
                        0,
6705 6
                        $diff,
6706 6
                        $encoding
6707
                    );
6708 6
                    $pre = '';
6709
            }
6710
6711 14
            return $pre . $str . $post;
6712
        }
6713
6714 1
        return $str;
6715
    }
6716
6717
    /**
6718
     * Returns a new string of a given length such that both sides of the
6719
     * string are padded. Alias for pad() with a $padType of 'both'.
6720
     *
6721
     * @param string $str
6722
     * @param int    $length   <p>Desired string length after padding.</p>
6723
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6724
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6725
     *
6726
     * @return string string with padding applied
6727
     */
6728 11
    public static function str_pad_both(
6729
        string $str,
6730
        int $length,
6731
        string $padStr = ' ',
6732
        string $encoding = 'UTF-8'
6733
    ): string {
6734 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6735
    }
6736
6737
    /**
6738
     * Returns a new string of a given length such that the beginning of the
6739
     * string is padded. Alias for pad() with a $padType of 'left'.
6740
     *
6741
     * @param string $str
6742
     * @param int    $length   <p>Desired string length after padding.</p>
6743
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6745
     *
6746
     * @return string string with left padding
6747
     */
6748 7
    public static function str_pad_left(
6749
        string $str,
6750
        int $length,
6751
        string $padStr = ' ',
6752
        string $encoding = 'UTF-8'
6753
    ): string {
6754 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6755
    }
6756
6757
    /**
6758
     * Returns a new string of a given length such that the end of the string
6759
     * is padded. Alias for pad() with a $padType of 'right'.
6760
     *
6761
     * @param string $str
6762
     * @param int    $length   <p>Desired string length after padding.</p>
6763
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6764
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6765
     *
6766
     * @return string string with right padding
6767
     */
6768 7
    public static function str_pad_right(
6769
        string $str,
6770
        int $length,
6771
        string $padStr = ' ',
6772
        string $encoding = 'UTF-8'
6773
    ): string {
6774 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6775
    }
6776
6777
    /**
6778
     * Repeat a string.
6779
     *
6780
     * @param string $str        <p>
6781
     *                           The string to be repeated.
6782
     *                           </p>
6783
     * @param int    $multiplier <p>
6784
     *                           Number of time the input string should be
6785
     *                           repeated.
6786
     *                           </p>
6787
     *                           <p>
6788
     *                           multiplier has to be greater than or equal to 0.
6789
     *                           If the multiplier is set to 0, the function
6790
     *                           will return an empty string.
6791
     *                           </p>
6792
     *
6793
     * @return string the repeated string
6794
     */
6795 9
    public static function str_repeat(string $str, int $multiplier): string
6796
    {
6797 9
        $str = self::filter($str);
6798
6799 9
        return \str_repeat($str, $multiplier);
6800
    }
6801
6802
    /**
6803
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6804
     *
6805
     * Replace all occurrences of the search string with the replacement string
6806
     *
6807
     * @see http://php.net/manual/en/function.str-replace.php
6808
     *
6809
     * @param mixed $search  <p>
6810
     *                       The value being searched for, otherwise known as the needle.
6811
     *                       An array may be used to designate multiple needles.
6812
     *                       </p>
6813
     * @param mixed $replace <p>
6814
     *                       The replacement value that replaces found search
6815
     *                       values. An array may be used to designate multiple replacements.
6816
     *                       </p>
6817
     * @param mixed $subject <p>
6818
     *                       The string or array being searched and replaced on,
6819
     *                       otherwise known as the haystack.
6820
     *                       </p>
6821
     *                       <p>
6822
     *                       If subject is an array, then the search and
6823
     *                       replace is performed with every entry of
6824
     *                       subject, and the return value is an array as
6825
     *                       well.
6826
     *                       </p>
6827
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6828
     *
6829
     * @return mixed this function returns a string or an array with the replaced values
6830
     */
6831 12
    public static function str_replace(
6832
        $search,
6833
        $replace,
6834
        $subject,
6835
        int &$count = null
6836
    ) {
6837
        /** @psalm-suppress PossiblyNullArgument */
6838 12
        return \str_replace($search, $replace, $subject, $count);
6839
    }
6840
6841
    /**
6842
     * Replaces $search from the beginning of string with $replacement.
6843
     *
6844
     * @param string $str         <p>The input string.</p>
6845
     * @param string $search      <p>The string to search for.</p>
6846
     * @param string $replacement <p>The replacement.</p>
6847
     *
6848
     * @return string string after the replacements
6849
     */
6850 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6851
    {
6852 17
        if ($str === '') {
6853 4
            if ($replacement === '') {
6854 2
                return '';
6855
            }
6856
6857 2
            if ($search === '') {
6858 2
                return $replacement;
6859
            }
6860
        }
6861
6862 13
        if ($search === '') {
6863 2
            return $str . $replacement;
6864
        }
6865
6866 11
        if (\strpos($str, $search) === 0) {
6867 9
            return $replacement . \substr($str, \strlen($search));
6868
        }
6869
6870 2
        return $str;
6871
    }
6872
6873
    /**
6874
     * Replaces $search from the ending of string with $replacement.
6875
     *
6876
     * @param string $str         <p>The input string.</p>
6877
     * @param string $search      <p>The string to search for.</p>
6878
     * @param string $replacement <p>The replacement.</p>
6879
     *
6880
     * @return string string after the replacements
6881
     */
6882 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6883
    {
6884 17
        if ($str === '') {
6885 4
            if ($replacement === '') {
6886 2
                return '';
6887
            }
6888
6889 2
            if ($search === '') {
6890 2
                return $replacement;
6891
            }
6892
        }
6893
6894 13
        if ($search === '') {
6895 2
            return $str . $replacement;
6896
        }
6897
6898 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6899 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6900
        }
6901
6902 11
        return $str;
6903
    }
6904
6905
    /**
6906
     * Replace the first "$search"-term with the "$replace"-term.
6907
     *
6908
     * @param string $search
6909
     * @param string $replace
6910
     * @param string $subject
6911
     *
6912
     * @return string
6913
     *
6914
     * @psalm-suppress InvalidReturnType
6915
     */
6916 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6917
    {
6918 2
        $pos = self::strpos($subject, $search);
6919
6920 2
        if ($pos !== false) {
6921
            /** @psalm-suppress InvalidReturnStatement */
6922 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6923
        }
6924
6925 2
        return $subject;
6926
    }
6927
6928
    /**
6929
     * Replace the last "$search"-term with the "$replace"-term.
6930
     *
6931
     * @param string $search
6932
     * @param string $replace
6933
     * @param string $subject
6934
     *
6935
     * @return string
6936
     *
6937
     * @psalm-suppress InvalidReturnType
6938
     */
6939 2
    public static function str_replace_last(
6940
        string $search,
6941
        string $replace,
6942
        string $subject
6943
    ): string {
6944 2
        $pos = self::strrpos($subject, $search);
6945 2
        if ($pos !== false) {
6946
            /** @psalm-suppress InvalidReturnStatement */
6947 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6948
        }
6949
6950 2
        return $subject;
6951
    }
6952
6953
    /**
6954
     * Shuffles all the characters in the string.
6955
     *
6956
     * PS: uses random algorithm which is weak for cryptography purposes
6957
     *
6958
     * @param string $str      <p>The input string</p>
6959
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6960
     *
6961
     * @return string the shuffled string
6962
     */
6963 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6964
    {
6965 5
        if ($encoding === 'UTF-8') {
6966 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6967
            /** @noinspection NonSecureShuffleUsageInspection */
6968 5
            \shuffle($indexes);
6969
6970
            // init
6971 5
            $shuffledStr = '';
6972
6973 5
            foreach ($indexes as &$i) {
6974 5
                $tmpSubStr = \mb_substr($str, $i, 1);
6975 5
                if ($tmpSubStr !== false) {
6976 5
                    $shuffledStr .= $tmpSubStr;
6977
                }
6978
            }
6979
        } else {
6980
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6981
6982
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6983
            /** @noinspection NonSecureShuffleUsageInspection */
6984
            \shuffle($indexes);
6985
6986
            // init
6987
            $shuffledStr = '';
6988
6989
            foreach ($indexes as &$i) {
6990
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
6991
                if ($tmpSubStr !== false) {
6992
                    $shuffledStr .= $tmpSubStr;
6993
                }
6994
            }
6995
        }
6996
6997 5
        return $shuffledStr;
6998
    }
6999
7000
    /**
7001
     * Returns the substring beginning at $start, and up to, but not including
7002
     * the index specified by $end. If $end is omitted, the function extracts
7003
     * the remaining string. If $end is negative, it is computed from the end
7004
     * of the string.
7005
     *
7006
     * @param string $str
7007
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7008
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7009
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7010
     *
7011
     * @return false|string
7012
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7013
     *                      characters long, <b>FALSE</b> will be returned.
7014
     */
7015 18
    public static function str_slice(
7016
        string $str,
7017
        int $start,
7018
        int $end = null,
7019
        string $encoding = 'UTF-8'
7020
    ) {
7021 18
        if ($encoding === 'UTF-8') {
7022 7
            if ($end === null) {
7023 1
                $length = (int) \mb_strlen($str);
7024 6
            } elseif ($end >= 0 && $end <= $start) {
7025 2
                return '';
7026 4
            } elseif ($end < 0) {
7027 1
                $length = (int) \mb_strlen($str) + $end - $start;
7028
            } else {
7029 3
                $length = $end - $start;
7030
            }
7031
7032 5
            return \mb_substr($str, $start, $length);
7033
        }
7034
7035 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7036
7037 11
        if ($end === null) {
7038 5
            $length = (int) self::strlen($str, $encoding);
7039 6
        } elseif ($end >= 0 && $end <= $start) {
7040 2
            return '';
7041 4
        } elseif ($end < 0) {
7042 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7043
        } else {
7044 3
            $length = $end - $start;
7045
        }
7046
7047 9
        return self::substr($str, $start, $length, $encoding);
7048
    }
7049
7050
    /**
7051
     * Convert a string to e.g.: "snake_case"
7052
     *
7053
     * @param string $str
7054
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7055
     *
7056
     * @return string string in snake_case
7057
     */
7058 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7059
    {
7060 20
        if ($str === '') {
7061
            return '';
7062
        }
7063
7064 20
        $str = \str_replace(
7065 20
            '-',
7066 20
            '_',
7067 20
            self::normalize_whitespace($str)
7068
        );
7069
7070 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7071 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7072
        }
7073
7074 20
        $str = (string) \preg_replace_callback(
7075 20
            '/([\d|A-Z])/u',
7076
            /**
7077
             * @param string[] $matches
7078
             *
7079
             * @return string
7080
             */
7081
            static function (array $matches) use ($encoding): string {
7082 8
                $match = $matches[1];
7083 8
                $matchInt = (int) $match;
7084
7085 8
                if ((string) $matchInt === $match) {
7086 4
                    return '_' . $match . '_';
7087
                }
7088
7089 4
                if ($encoding === 'UTF-8') {
7090 4
                    return '_' . \mb_strtolower($match);
7091
                }
7092
7093
                return '_' . self::strtolower($match, $encoding);
7094 20
            },
7095 20
            $str
7096
        );
7097
7098 20
        $str = (string) \preg_replace(
7099
            [
7100 20
                '/\s+/',        // convert spaces to "_"
7101
                '/^\s+|\s+$/',  // trim leading & trailing spaces
7102
                '/_+/',         // remove double "_"
7103
            ],
7104
            [
7105 20
                '_',
7106
                '',
7107
                '_',
7108
            ],
7109 20
            $str
7110
        );
7111
7112 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7113
    }
7114
7115
    /**
7116
     * Sort all characters according to code points.
7117
     *
7118
     * @param string $str    <p>A UTF-8 string.</p>
7119
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7120
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7121
     *
7122
     * @return string string of sorted characters
7123
     */
7124 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7125
    {
7126 2
        $array = self::codepoints($str);
7127
7128 2
        if ($unique) {
7129 2
            $array = \array_flip(\array_flip($array));
7130
        }
7131
7132 2
        if ($desc) {
7133 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7133
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7134
        } else {
7135 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7135
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7136
        }
7137
7138 2
        return self::string($array);
7139
    }
7140
7141
    /**
7142
     * alias for "UTF8::str_split()"
7143
     *
7144
     * @see UTF8::str_split()
7145
     *
7146
     * @param string|string[] $str
7147
     * @param int             $length
7148
     * @param bool            $cleanUtf8
7149
     *
7150
     * @return string[]
7151
     */
7152 9
    public static function split(
7153
        $str,
7154
        int $length = 1,
7155
        bool $cleanUtf8 = false
7156
    ): array {
7157 9
        return self::str_split($str, $length, $cleanUtf8);
7158
    }
7159
7160
    /**
7161
     * Splits the string with the provided regular expression, returning an
7162
     * array of Stringy objects. An optional integer $limit will truncate the
7163
     * results.
7164
     *
7165
     * @param string $str
7166
     * @param string $pattern <p>The regex with which to split the string.</p>
7167
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7168
     *
7169
     * @return string[] an array of strings
7170
     */
7171 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7172
    {
7173 16
        if ($limit === 0) {
7174 2
            return [];
7175
        }
7176
7177 14
        if ($pattern === '') {
7178 1
            return [$str];
7179
        }
7180
7181 13
        if (self::$SUPPORT['mbstring'] === true) {
7182 13
            if ($limit >= 0) {
7183
                /** @noinspection PhpComposerExtensionStubsInspection */
7184 8
                return \array_filter(
7185 8
                    \mb_split($pattern, $str),
7186
                    static function () use (&$limit): bool {
7187 8
                        return --$limit >= 0;
7188 8
                    }
7189
                );
7190
            }
7191
7192
            /** @noinspection PhpComposerExtensionStubsInspection */
7193 5
            return \mb_split($pattern, $str);
7194
        }
7195
7196
        if ($limit > 0) {
7197
            ++$limit;
7198
        } else {
7199
            $limit = -1;
7200
        }
7201
7202
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7203
7204
        if ($array === false) {
7205
            return [];
7206
        }
7207
7208
        if ($limit > 0 && \count($array) === $limit) {
7209
            \array_pop($array);
7210
        }
7211
7212
        return $array;
7213
    }
7214
7215
    /**
7216
     * Check if the string starts with the given substring.
7217
     *
7218
     * @param string $haystack <p>The string to search in.</p>
7219
     * @param string $needle   <p>The substring to search for.</p>
7220
     *
7221
     * @return bool
7222
     */
7223 19
    public static function str_starts_with(string $haystack, string $needle): bool
7224
    {
7225 19
        return \strpos($haystack, $needle) === 0;
7226
    }
7227
7228
    /**
7229
     * Returns true if the string begins with any of $substrings, false otherwise.
7230
     *
7231
     * - case-sensitive
7232
     *
7233
     * @param string $str        <p>The input string.</p>
7234
     * @param array  $substrings <p>Substrings to look for.</p>
7235
     *
7236
     * @return bool whether or not $str starts with $substring
7237
     */
7238 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7239
    {
7240 8
        if ($str === '') {
7241
            return false;
7242
        }
7243
7244 8
        if ($substrings === []) {
7245
            return false;
7246
        }
7247
7248 8
        foreach ($substrings as &$substring) {
7249 8
            if (self::str_starts_with($str, $substring)) {
7250 8
                return true;
7251
            }
7252
        }
7253
7254 6
        return false;
7255
    }
7256
7257
    /**
7258
     * Gets the substring after the first occurrence of a separator.
7259
     *
7260
     * @param string $str       <p>The input string.</p>
7261
     * @param string $separator <p>The string separator.</p>
7262
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7263
     *
7264
     * @return string
7265
     */
7266 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7267
    {
7268 1
        if ($separator === '' || $str === '') {
7269 1
            return '';
7270
        }
7271
7272 1
        if ($encoding === 'UTF-8') {
7273 1
            $offset = \mb_strpos($str, $separator);
7274 1
            if ($offset === false) {
7275 1
                return '';
7276
            }
7277
7278 1
            return (string) \mb_substr(
7279 1
                $str,
7280 1
                $offset + (int) \mb_strlen($separator)
7281
            );
7282
        }
7283
7284
        $offset = self::strpos($str, $separator, 0, $encoding);
7285
        if ($offset === false) {
7286
            return '';
7287
        }
7288
7289
        return (string) \mb_substr(
7290
            $str,
7291
            $offset + (int) self::strlen($separator, $encoding),
7292
            null,
7293
            $encoding
7294
        );
7295
    }
7296
7297
    /**
7298
     * Gets the substring after the last occurrence of a separator.
7299
     *
7300
     * @param string $str       <p>The input string.</p>
7301
     * @param string $separator <p>The string separator.</p>
7302
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7303
     *
7304
     * @return string
7305
     */
7306 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7307
    {
7308 1
        if ($separator === '' || $str === '') {
7309 1
            return '';
7310
        }
7311
7312 1
        if ($encoding === 'UTF-8') {
7313 1
            $offset = \mb_strrpos($str, $separator);
7314 1
            if ($offset === false) {
7315 1
                return '';
7316
            }
7317
7318 1
            return (string) \mb_substr(
7319 1
                $str,
7320 1
                $offset + (int) \mb_strlen($separator)
7321
            );
7322
        }
7323
7324
        $offset = self::strrpos($str, $separator, 0, $encoding);
7325
        if ($offset === false) {
7326
            return '';
7327
        }
7328
7329
        return (string) self::substr(
7330
            $str,
7331
            $offset + (int) self::strlen($separator, $encoding),
7332
            null,
7333
            $encoding
7334
        );
7335
    }
7336
7337
    /**
7338
     * Gets the substring before the first occurrence of a separator.
7339
     *
7340
     * @param string $str       <p>The input string.</p>
7341
     * @param string $separator <p>The string separator.</p>
7342
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7343
     *
7344
     * @return string
7345
     */
7346 1
    public static function str_substr_before_first_separator(
7347
        string $str,
7348
        string $separator,
7349
        string $encoding = 'UTF-8'
7350
    ): string {
7351 1
        if ($separator === '' || $str === '') {
7352 1
            return '';
7353
        }
7354
7355 1
        if ($encoding === 'UTF-8') {
7356 1
            $offset = \mb_strpos($str, $separator);
7357 1
            if ($offset === false) {
7358 1
                return '';
7359
            }
7360
7361 1
            return (string) \mb_substr(
7362 1
                $str,
7363 1
                0,
7364 1
                $offset
7365
            );
7366
        }
7367
7368
        $offset = self::strpos($str, $separator, 0, $encoding);
7369
        if ($offset === false) {
7370
            return '';
7371
        }
7372
7373
        return (string) self::substr(
7374
            $str,
7375
            0,
7376
            $offset,
7377
            $encoding
7378
        );
7379
    }
7380
7381
    /**
7382
     * Gets the substring before the last occurrence of a separator.
7383
     *
7384
     * @param string $str       <p>The input string.</p>
7385
     * @param string $separator <p>The string separator.</p>
7386
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7387
     *
7388
     * @return string
7389
     */
7390 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7391
    {
7392 1
        if ($separator === '' || $str === '') {
7393 1
            return '';
7394
        }
7395
7396 1
        if ($encoding === 'UTF-8') {
7397 1
            $offset = \mb_strrpos($str, $separator);
7398 1
            if ($offset === false) {
7399 1
                return '';
7400
            }
7401
7402 1
            return (string) \mb_substr(
7403 1
                $str,
7404 1
                0,
7405 1
                $offset
7406
            );
7407
        }
7408
7409
        $offset = self::strrpos($str, $separator, 0, $encoding);
7410
        if ($offset === false) {
7411
            return '';
7412
        }
7413
7414
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7415
7416
        return (string) self::substr(
7417
            $str,
7418
            0,
7419
            $offset,
7420
            $encoding
7421
        );
7422
    }
7423
7424
    /**
7425
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7426
     *
7427
     * @param string $str          <p>The input string.</p>
7428
     * @param string $needle       <p>The string to look for.</p>
7429
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7430
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7431
     *
7432
     * @return string
7433
     */
7434 2
    public static function str_substr_first(
7435
        string $str,
7436
        string $needle,
7437
        bool $beforeNeedle = false,
7438
        string $encoding = 'UTF-8'
7439
    ): string {
7440 2
        if ($str === '' || $needle === '') {
7441 2
            return '';
7442
        }
7443
7444 2
        if ($encoding === 'UTF-8') {
7445 2
            if ($beforeNeedle === true) {
7446 1
                $part = \mb_strstr(
7447 1
                    $str,
7448 1
                    $needle,
7449 1
                    $beforeNeedle
7450
                );
7451
            } else {
7452 1
                $part = \mb_strstr(
7453 1
                    $str,
7454 2
                    $needle
7455
                );
7456
            }
7457
        } else {
7458
            $part = self::strstr(
7459
                $str,
7460
                $needle,
7461
                $beforeNeedle,
7462
                $encoding
7463
            );
7464
        }
7465
7466 2
        return $part === false ? '' : $part;
7467
    }
7468
7469
    /**
7470
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7471
     *
7472
     * @param string $str          <p>The input string.</p>
7473
     * @param string $needle       <p>The string to look for.</p>
7474
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7475
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7476
     *
7477
     * @return string
7478
     */
7479 2
    public static function str_substr_last(
7480
        string $str,
7481
        string $needle,
7482
        bool $beforeNeedle = false,
7483
        string $encoding = 'UTF-8'
7484
    ): string {
7485 2
        if ($str === '' || $needle === '') {
7486 2
            return '';
7487
        }
7488
7489 2
        if ($encoding === 'UTF-8') {
7490 2
            if ($beforeNeedle === true) {
7491 1
                $part = \mb_strrchr(
7492 1
                    $str,
7493 1
                    $needle,
7494 1
                    $beforeNeedle
7495
                );
7496
            } else {
7497 1
                $part = \mb_strrchr(
7498 1
                    $str,
7499 2
                    $needle
7500
                );
7501
            }
7502
        } else {
7503
            $part = self::strrchr(
7504
                $str,
7505
                $needle,
7506
                $beforeNeedle,
7507
                $encoding
7508
            );
7509
        }
7510
7511 2
        return $part === false ? '' : $part;
7512
    }
7513
7514
    /**
7515
     * Surrounds $str with the given substring.
7516
     *
7517
     * @param string $str
7518
     * @param string $substring <p>The substring to add to both sides.</P>
7519
     *
7520
     * @return string string with the substring both prepended and appended
7521
     */
7522 5
    public static function str_surround(string $str, string $substring): string
7523
    {
7524 5
        return $substring . $str . $substring;
7525
    }
7526
7527
    /**
7528
     * Returns a trimmed string with the first letter of each word capitalized.
7529
     * Also accepts an array, $ignore, allowing you to list words not to be
7530
     * capitalized.
7531
     *
7532
     * @param string              $str
7533
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7534
     *                                                   Default: null</p>
7535
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7536
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7537
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7538
     *                                                   tr</p>
7539
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7540
     *                                                   ß</p>
7541
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7542
     *
7543
     * @return string the titleized string
7544
     */
7545 5
    public static function str_titleize(
7546
        string $str,
7547
        array $ignore = null,
7548
        string $encoding = 'UTF-8',
7549
        bool $cleanUtf8 = false,
7550
        string $lang = null,
7551
        bool $tryToKeepStringLength = false,
7552
        bool $useTrimFirst = true
7553
    ): string {
7554 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7555 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7556
        }
7557
7558 5
        if ($useTrimFirst === true) {
7559 5
            $str = \trim($str);
7560
        }
7561
7562 5
        if ($cleanUtf8 === true) {
7563
            $str = self::clean($str);
7564
        }
7565
7566 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7567
7568 5
        return (string) \preg_replace_callback(
7569 5
            '/([\S]+)/u',
7570
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7571 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7572 2
                    return $match[0];
7573
                }
7574
7575 5
                if ($useMbFunction === true) {
7576 5
                    if ($encoding === 'UTF-8') {
7577 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7578 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7579
                    }
7580
7581
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7582
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7583
                }
7584
7585
                return self::ucfirst(
7586
                    self::strtolower(
7587
                        $match[0],
7588
                        $encoding,
7589
                        false,
7590
                        $lang,
7591
                        $tryToKeepStringLength
7592
                    ),
7593
                    $encoding,
7594
                    false,
7595
                    $lang,
7596
                    $tryToKeepStringLength
7597
                );
7598 5
            },
7599 5
            $str
7600
        );
7601
    }
7602
7603
    /**
7604
     * Returns a trimmed string in proper title case.
7605
     *
7606
     * Also accepts an array, $ignore, allowing you to list words not to be
7607
     * capitalized.
7608
     *
7609
     * Adapted from John Gruber's script.
7610
     *
7611
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7612
     *
7613
     * @param string $str
7614
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7615
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7616
     *
7617
     * @return string the titleized string
7618
     */
7619 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7620
    {
7621 35
        $smallWords = \array_merge(
7622
            [
7623 35
                '(?<!q&)a',
7624
                'an',
7625
                'and',
7626
                'as',
7627
                'at(?!&t)',
7628
                'but',
7629
                'by',
7630
                'en',
7631
                'for',
7632
                'if',
7633
                'in',
7634
                'of',
7635
                'on',
7636
                'or',
7637
                'the',
7638
                'to',
7639
                'v[.]?',
7640
                'via',
7641
                'vs[.]?',
7642
            ],
7643 35
            $ignore
7644
        );
7645
7646 35
        $smallWordsRx = \implode('|', $smallWords);
7647 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7648
7649 35
        $str = \trim($str);
7650
7651 35
        if (self::has_lowercase($str) === false) {
7652 2
            $str = self::strtolower($str, $encoding);
7653
        }
7654
7655
        // the main substitutions
7656 35
        $str = (string) \preg_replace_callback(
7657
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7658
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7659 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7660
                        |
7661 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7662
                        |
7663 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7664
                        |
7665 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7666
                      ) (_*) \b                                                           # 6. With trailing underscore
7667
                    ~ux',
7668
            /**
7669
             * @param string[] $matches
7670
             *
7671
             * @return string
7672
             */
7673
            static function (array $matches) use ($encoding): string {
7674
                // preserve leading underscore
7675 35
                $str = $matches[1];
7676 35
                if ($matches[2]) {
7677
                    // preserve URLs, domains, emails and file paths
7678 5
                    $str .= $matches[2];
7679 35
                } elseif ($matches[3]) {
7680
                    // lower-case small words
7681 25
                    $str .= self::strtolower($matches[3], $encoding);
7682 35
                } elseif ($matches[4]) {
7683
                    // capitalize word w/o internal caps
7684 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7685
                } else {
7686
                    // preserve other kinds of word (iPhone)
7687 7
                    $str .= $matches[5];
7688
                }
7689
                // Preserve trailing underscore
7690 35
                $str .= $matches[6];
7691
7692 35
                return $str;
7693 35
            },
7694 35
            $str
7695
        );
7696
7697
        // Exceptions for small words: capitalize at start of title...
7698 35
        $str = (string) \preg_replace_callback(
7699
            '~(  \A [[:punct:]]*                # start of title...
7700
                      |  [:.;?!][ ]+               # or of subsentence...
7701
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7702 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7703
                     ~uxi',
7704
            /**
7705
             * @param string[] $matches
7706
             *
7707
             * @return string
7708
             */
7709
            static function (array $matches) use ($encoding): string {
7710 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7711 35
            },
7712 35
            $str
7713
        );
7714
7715
        // ...and end of title
7716 35
        $str = (string) \preg_replace_callback(
7717 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7718
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7719
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7720
                     ~uxi',
7721
            /**
7722
             * @param string[] $matches
7723
             *
7724
             * @return string
7725
             */
7726
            static function (array $matches) use ($encoding): string {
7727 3
                return static::str_upper_first($matches[1], $encoding);
7728 35
            },
7729 35
            $str
7730
        );
7731
7732
        // Exceptions for small words in hyphenated compound words.
7733
        // e.g. "in-flight" -> In-Flight
7734 35
        $str = (string) \preg_replace_callback(
7735
            '~\b
7736
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7737 35
                        ( ' . $smallWordsRx . ' )
7738
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7739
                       ~uxi',
7740
            /**
7741
             * @param string[] $matches
7742
             *
7743
             * @return string
7744
             */
7745
            static function (array $matches) use ($encoding): string {
7746
                return static::str_upper_first($matches[1], $encoding);
7747 35
            },
7748 35
            $str
7749
        );
7750
7751
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7752 35
        $str = (string) \preg_replace_callback(
7753
            '~\b
7754
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7755
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7756 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7757
                      (?!	- )                   # Negative lookahead for another -
7758
                     ~uxi',
7759
            /**
7760
             * @param string[] $matches
7761
             *
7762
             * @return string
7763
             */
7764
            static function (array $matches) use ($encoding): string {
7765
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7766 35
            },
7767 35
            $str
7768
        );
7769
7770 35
        return $str;
7771
    }
7772
7773
    /**
7774
     * Get a binary representation of a specific string.
7775
     *
7776
     * @param string $str <p>The input string.</p>
7777
     *
7778
     * @return string
7779
     */
7780 2
    public static function str_to_binary(string $str): string
7781
    {
7782 2
        $value = \unpack('H*', $str);
7783
7784 2
        return \base_convert($value[1], 16, 2);
7785
    }
7786
7787
    /**
7788
     * @param string   $str
7789
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7790
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7791
     *
7792
     * @return string[]
7793
     */
7794 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7795
    {
7796 17
        if ($str === '') {
7797 1
            return $removeEmptyValues === true ? [] : [''];
7798
        }
7799
7800 16
        if (self::$SUPPORT['mbstring'] === true) {
7801
            /** @noinspection PhpComposerExtensionStubsInspection */
7802 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7803
        } else {
7804
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7805
        }
7806
7807 16
        if ($return === false) {
7808
            return $removeEmptyValues === true ? [] : [''];
7809
        }
7810
7811
        if (
7812 16
            $removeShortValues === null
7813
            &&
7814 16
            $removeEmptyValues === false
7815
        ) {
7816 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7817
        }
7818
7819
        return self::reduce_string_array(
7820
            $return,
7821
            $removeEmptyValues,
7822
            $removeShortValues
7823
        );
7824
    }
7825
7826
    /**
7827
     * Convert a string into an array of words.
7828
     *
7829
     * @param string   $str
7830
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7831
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7832
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7833
     *
7834
     * @return string[]
7835
     */
7836 13
    public static function str_to_words(
7837
        string $str,
7838
        string $charList = '',
7839
        bool $removeEmptyValues = false,
7840
        int $removeShortValues = null
7841
    ): array {
7842 13
        if ($str === '') {
7843 4
            return $removeEmptyValues === true ? [] : [''];
7844
        }
7845
7846 13
        $charList = self::rxClass($charList, '\pL');
7847
7848 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7849 13
        if ($return === false) {
7850
            return $removeEmptyValues === true ? [] : [''];
7851
        }
7852
7853
        if (
7854 13
            $removeShortValues === null
7855
            &&
7856 13
            $removeEmptyValues === false
7857
        ) {
7858 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7859
        }
7860
7861 2
        $tmpReturn = self::reduce_string_array(
7862 2
            $return,
7863 2
            $removeEmptyValues,
7864 2
            $removeShortValues
7865
        );
7866
7867 2
        foreach ($tmpReturn as &$item) {
7868 2
            $item = (string) $item;
7869
        }
7870
7871 2
        return $tmpReturn;
7872
    }
7873
7874
    /**
7875
     * alias for "UTF8::to_ascii()"
7876
     *
7877
     * @see UTF8::to_ascii()
7878
     *
7879
     * @param string $str
7880
     * @param string $unknown
7881
     * @param bool   $strict
7882
     *
7883
     * @return string
7884
     */
7885 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7886
    {
7887 8
        return self::to_ascii($str, $unknown, $strict);
7888
    }
7889
7890
    /**
7891
     * Truncates the string to a given length. If $substring is provided, and
7892
     * truncating occurs, the string is further truncated so that the substring
7893
     * may be appended without exceeding the desired length.
7894
     *
7895
     * @param string $str
7896
     * @param int    $length    <p>Desired length of the truncated string.</p>
7897
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7898
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7899
     *
7900
     * @return string string after truncating
7901
     */
7902 22
    public static function str_truncate(
7903
        string $str,
7904
        int $length,
7905
        string $substring = '',
7906
        string $encoding = 'UTF-8'
7907
    ): string {
7908 22
        if ($str === '') {
7909
            return '';
7910
        }
7911
7912 22
        if ($encoding === 'UTF-8') {
7913 10
            if ($length >= (int) \mb_strlen($str)) {
7914 2
                return $str;
7915
            }
7916
7917 8
            if ($substring !== '') {
7918 4
                $length -= (int) \mb_strlen($substring);
7919
7920
                /** @noinspection UnnecessaryCastingInspection */
7921 4
                return (string) \mb_substr($str, 0, $length) . $substring;
7922
            }
7923
7924
            /** @noinspection UnnecessaryCastingInspection */
7925 4
            return (string) \mb_substr($str, 0, $length);
7926
        }
7927
7928 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7929
7930 12
        if ($length >= (int) self::strlen($str, $encoding)) {
7931 2
            return $str;
7932
        }
7933
7934 10
        if ($substring !== '') {
7935 6
            $length -= (int) self::strlen($substring, $encoding);
7936
        }
7937
7938 10
        return ((string) self::substr(
7939 10
                $str,
7940 10
                0,
7941 10
                $length,
7942 10
                $encoding
7943 10
            )) . $substring;
7944
    }
7945
7946
    /**
7947
     * Truncates the string to a given length, while ensuring that it does not
7948
     * split words. If $substring is provided, and truncating occurs, the
7949
     * string is further truncated so that the substring may be appended without
7950
     * exceeding the desired length.
7951
     *
7952
     * @param string $str
7953
     * @param int    $length                          <p>Desired length of the truncated string.</p>
7954
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
7955
     *                                                ''</p>
7956
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
7957
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7958
     *
7959
     * @return string string after truncating
7960
     */
7961 47
    public static function str_truncate_safe(
7962
        string $str,
7963
        int $length,
7964
        string $substring = '',
7965
        string $encoding = 'UTF-8',
7966
        bool $ignoreDoNotSplitWordsForOneWord = false
7967
    ): string {
7968 47
        if ($str === '' || $length <= 0) {
7969 1
            return $substring;
7970
        }
7971
7972 47
        if ($encoding === 'UTF-8') {
7973 21
            if ($length >= (int) \mb_strlen($str)) {
7974 5
                return $str;
7975
            }
7976
7977
            // need to further trim the string so we can append the substring
7978 17
            $length -= (int) \mb_strlen($substring);
7979 17
            if ($length <= 0) {
7980 1
                return $substring;
7981
            }
7982
7983 17
            $truncated = \mb_substr($str, 0, $length);
7984
7985 17
            if ($truncated === false) {
7986
                return '';
7987
            }
7988
7989
            // if the last word was truncated
7990 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
7991 17
            if ($strPosSpace !== $length) {
7992
                // find pos of the last occurrence of a space, get up to that
7993 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
7994
7995
                if (
7996 13
                    $lastPos !== false
7997
                    ||
7998 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7999
                ) {
8000 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8001
                }
8002
            }
8003
        } else {
8004 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8005
8006 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8007 4
                return $str;
8008
            }
8009
8010
            // need to further trim the string so we can append the substring
8011 22
            $length -= (int) self::strlen($substring, $encoding);
8012 22
            if ($length <= 0) {
8013
                return $substring;
8014
            }
8015
8016 22
            $truncated = self::substr($str, 0, $length, $encoding);
8017
8018 22
            if ($truncated === false) {
8019
                return '';
8020
            }
8021
8022
            // if the last word was truncated
8023 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8024 22
            if ($strPosSpace !== $length) {
8025
                // find pos of the last occurrence of a space, get up to that
8026 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8027
8028
                if (
8029 12
                    $lastPos !== false
8030
                    ||
8031 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8032
                ) {
8033 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8034
                }
8035
            }
8036
        }
8037
8038 39
        return $truncated . $substring;
8039
    }
8040
8041
    /**
8042
     * Returns a lowercase and trimmed string separated by underscores.
8043
     * Underscores are inserted before uppercase characters (with the exception
8044
     * of the first character of the string), and in place of spaces as well as
8045
     * dashes.
8046
     *
8047
     * @param string $str
8048
     *
8049
     * @return string the underscored string
8050
     */
8051 16
    public static function str_underscored(string $str): string
8052
    {
8053 16
        return self::str_delimit($str, '_');
8054
    }
8055
8056
    /**
8057
     * Returns an UpperCamelCase version of the supplied string. It trims
8058
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8059
     * and underscores, and removes spaces, dashes, underscores.
8060
     *
8061
     * @param string      $str                   <p>The input string.</p>
8062
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8063
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8064
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8065
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8066
     *
8067
     * @return string string in UpperCamelCase
8068
     */
8069 13
    public static function str_upper_camelize(
8070
        string $str,
8071
        string $encoding = 'UTF-8',
8072
        bool $cleanUtf8 = false,
8073
        string $lang = null,
8074
        bool $tryToKeepStringLength = false
8075
    ): string {
8076 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8077
    }
8078
8079
    /**
8080
     * alias for "UTF8::ucfirst()"
8081
     *
8082
     * @see UTF8::ucfirst()
8083
     *
8084
     * @param string      $str
8085
     * @param string      $encoding
8086
     * @param bool        $cleanUtf8
8087
     * @param string|null $lang
8088
     * @param bool        $tryToKeepStringLength
8089
     *
8090
     * @return string
8091
     */
8092 39
    public static function str_upper_first(
8093
        string $str,
8094
        string $encoding = 'UTF-8',
8095
        bool $cleanUtf8 = false,
8096
        string $lang = null,
8097
        bool $tryToKeepStringLength = false
8098
    ): string {
8099 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8100
    }
8101
8102
    /**
8103
     * Counts number of words in the UTF-8 string.
8104
     *
8105
     * @param string $str      <p>The input string.</p>
8106
     * @param int    $format   [optional] <p>
8107
     *                         <strong>0</strong> => return a number of words (default)<br>
8108
     *                         <strong>1</strong> => return an array of words<br>
8109
     *                         <strong>2</strong> => return an array of words with word-offset as key
8110
     *                         </p>
8111
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8112
     *
8113
     * @return int|string[] The number of words in the string
8114
     */
8115 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8116
    {
8117 2
        $strParts = self::str_to_words($str, $charlist);
8118
8119 2
        $len = \count($strParts);
8120
8121 2
        if ($format === 1) {
8122 2
            $numberOfWords = [];
8123 2
            for ($i = 1; $i < $len; $i += 2) {
8124 2
                $numberOfWords[] = $strParts[$i];
8125
            }
8126 2
        } elseif ($format === 2) {
8127 2
            $numberOfWords = [];
8128 2
            $offset = (int) self::strlen($strParts[0]);
8129 2
            for ($i = 1; $i < $len; $i += 2) {
8130 2
                $numberOfWords[$offset] = $strParts[$i];
8131 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8132
            }
8133
        } else {
8134 2
            $numberOfWords = (int) (($len - 1) / 2);
8135
        }
8136
8137 2
        return $numberOfWords;
8138
    }
8139
8140
    /**
8141
     * Case-insensitive string comparison.
8142
     *
8143
     * INFO: Case-insensitive version of UTF8::strcmp()
8144
     *
8145
     * @param string $str1     <p>The first string.</p>
8146
     * @param string $str2     <p>The second string.</p>
8147
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8148
     *
8149
     * @return int
8150
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8151
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8152
     *             <strong>0</strong> if they are equal
8153
     */
8154 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8155
    {
8156 23
        return self::strcmp(
8157 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8158 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8159
        );
8160
    }
8161
8162
    /**
8163
     * alias for "UTF8::strstr()"
8164
     *
8165
     * @see UTF8::strstr()
8166
     *
8167
     * @param string $haystack
8168
     * @param string $needle
8169
     * @param bool   $before_needle
8170
     * @param string $encoding
8171
     * @param bool   $cleanUtf8
8172
     *
8173
     * @return false|string
8174
     */
8175 2
    public static function strchr(
8176
        string $haystack,
8177
        string $needle,
8178
        bool $before_needle = false,
8179
        string $encoding = 'UTF-8',
8180
        bool $cleanUtf8 = false
8181
    ) {
8182 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8183
    }
8184
8185
    /**
8186
     * Case-sensitive string comparison.
8187
     *
8188
     * @param string $str1 <p>The first string.</p>
8189
     * @param string $str2 <p>The second string.</p>
8190
     *
8191
     * @return int
8192
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8193
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8194
     *             <strong>0</strong> if they are equal
8195
     */
8196 29
    public static function strcmp(string $str1, string $str2): int
8197
    {
8198 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8199 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8200 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8201
        );
8202
    }
8203
8204
    /**
8205
     * Find length of initial segment not matching mask.
8206
     *
8207
     * @param string $str
8208
     * @param string $charList
8209
     * @param int    $offset
8210
     * @param int    $length
8211
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8212
     *
8213
     * @return int
8214
     */
8215 12
    public static function strcspn(
8216
        string $str,
8217
        string $charList,
8218
        int $offset = null,
8219
        int $length = null,
8220
        string $encoding = 'UTF-8'
8221
    ): int {
8222 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8223
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8224
        }
8225
8226 12
        if ($charList === '') {
8227 2
            return (int) self::strlen($str, $encoding);
8228
        }
8229
8230 11
        if ($offset !== null || $length !== null) {
8231 3
            if ($encoding === 'UTF-8') {
8232 3
                if ($length === null) {
8233
                    /** @noinspection UnnecessaryCastingInspection */
8234 2
                    $strTmp = \mb_substr($str, (int) $offset);
8235
                } else {
8236
                    /** @noinspection UnnecessaryCastingInspection */
8237 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8238
                }
8239
            } else {
8240
                /** @noinspection UnnecessaryCastingInspection */
8241
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8242
            }
8243 3
            if ($strTmp === false) {
8244
                return 0;
8245
            }
8246 3
            $str = $strTmp;
8247
        }
8248
8249 11
        if ($str === '') {
8250 2
            return 0;
8251
        }
8252
8253 10
        $matches = [];
8254 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8255 9
            $return = self::strlen($matches[1], $encoding);
8256 9
            if ($return === false) {
8257
                return 0;
8258
            }
8259
8260 9
            return $return;
8261
        }
8262
8263 2
        return (int) self::strlen($str, $encoding);
8264
    }
8265
8266
    /**
8267
     * alias for "UTF8::stristr()"
8268
     *
8269
     * @see UTF8::stristr()
8270
     *
8271
     * @param string $haystack
8272
     * @param string $needle
8273
     * @param bool   $before_needle
8274
     * @param string $encoding
8275
     * @param bool   $cleanUtf8
8276
     *
8277
     * @return false|string
8278
     */
8279 1
    public static function strichr(
8280
        string $haystack,
8281
        string $needle,
8282
        bool $before_needle = false,
8283
        string $encoding = 'UTF-8',
8284
        bool $cleanUtf8 = false
8285
    ) {
8286 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8287
    }
8288
8289
    /**
8290
     * Create a UTF-8 string from code points.
8291
     *
8292
     * INFO: opposite to UTF8::codepoints()
8293
     *
8294
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8295
     *
8296
     * @return string UTF-8 encoded string
8297
     */
8298 4
    public static function string(array $array): string
8299
    {
8300 4
        return \implode(
8301 4
            '',
8302 4
            \array_map(
8303
                [
8304 4
                    self::class,
8305
                    'chr',
8306
                ],
8307 4
                $array
8308
            )
8309
        );
8310
    }
8311
8312
    /**
8313
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8314
     *
8315
     * @param string $str <p>The input string.</p>
8316
     *
8317
     * @return bool
8318
     *              <strong>true</strong> if the string has BOM at the start,<br>
8319
     *              <strong>false</strong> otherwise
8320
     */
8321 6
    public static function string_has_bom(string $str): bool
8322
    {
8323
        /** @noinspection PhpUnusedLocalVariableInspection */
8324 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8325 6
            if (\strpos($str, $bomString) === 0) {
8326 6
                return true;
8327
            }
8328
        }
8329
8330 6
        return false;
8331
    }
8332
8333
    /**
8334
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8335
     *
8336
     * @see http://php.net/manual/en/function.strip-tags.php
8337
     *
8338
     * @param string $str            <p>
8339
     *                               The input string.
8340
     *                               </p>
8341
     * @param string $allowable_tags [optional] <p>
8342
     *                               You can use the optional second parameter to specify tags which should
8343
     *                               not be stripped.
8344
     *                               </p>
8345
     *                               <p>
8346
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8347
     *                               can not be changed with allowable_tags.
8348
     *                               </p>
8349
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8350
     *
8351
     * @return string the stripped string
8352
     */
8353 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8354
    {
8355 4
        if ($str === '') {
8356 1
            return '';
8357
        }
8358
8359 4
        if ($cleanUtf8 === true) {
8360 2
            $str = self::clean($str);
8361
        }
8362
8363 4
        return \strip_tags($str, $allowable_tags);
8364
    }
8365
8366
    /**
8367
     * Strip all whitespace characters. This includes tabs and newline
8368
     * characters, as well as multibyte whitespace such as the thin space
8369
     * and ideographic space.
8370
     *
8371
     * @param string $str
8372
     *
8373
     * @return string
8374
     */
8375 36
    public static function strip_whitespace(string $str): string
8376
    {
8377 36
        if ($str === '') {
8378 3
            return '';
8379
        }
8380
8381 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8382
    }
8383
8384
    /**
8385
     * Finds position of first occurrence of a string within another, case insensitive.
8386
     *
8387
     * @see http://php.net/manual/en/function.mb-stripos.php
8388
     *
8389
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8390
     * @param string $needle    <p>The string to find in haystack.</p>
8391
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8392
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8393
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8394
     *
8395
     * @return false|int
8396
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8397
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8398
     */
8399 24
    public static function stripos(
8400
        string $haystack,
8401
        string $needle,
8402
        int $offset = 0,
8403
        $encoding = 'UTF-8',
8404
        bool $cleanUtf8 = false
8405
    ) {
8406 24
        if ($haystack === '' || $needle === '') {
8407 5
            return false;
8408
        }
8409
8410 23
        if ($cleanUtf8 === true) {
8411
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8412
            // if invalid characters are found in $haystack before $needle
8413 1
            $haystack = self::clean($haystack);
8414 1
            $needle = self::clean($needle);
8415
        }
8416
8417 23
        if (self::$SUPPORT['mbstring'] === true) {
8418 23
            if ($encoding === 'UTF-8') {
8419 23
                return \mb_stripos($haystack, $needle, $offset);
8420
            }
8421
8422 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8423
8424 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8425
        }
8426
8427 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8428
8429
        if (
8430 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8431
            &&
8432 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8433
            &&
8434 2
            self::$SUPPORT['intl'] === true
8435
        ) {
8436
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8437
            if ($returnTmp !== false) {
8438
                return $returnTmp;
8439
            }
8440
        }
8441
8442
        //
8443
        // fallback for ascii only
8444
        //
8445
8446 2
        if (self::is_ascii($haystack . $needle)) {
8447
            return \stripos($haystack, $needle, $offset);
8448
        }
8449
8450
        //
8451
        // fallback via vanilla php
8452
        //
8453
8454 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8455 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8456
8457 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8458
    }
8459
8460
    /**
8461
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8462
     *
8463
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8464
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8465
     * @param bool   $before_needle [optional] <p>
8466
     *                              If <b>TRUE</b>, it returns the part of the
8467
     *                              haystack before the first occurrence of the needle (excluding the needle).
8468
     *                              </p>
8469
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8470
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8471
     *
8472
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8473
     */
8474 12
    public static function stristr(
8475
        string $haystack,
8476
        string $needle,
8477
        bool $before_needle = false,
8478
        string $encoding = 'UTF-8',
8479
        bool $cleanUtf8 = false
8480
    ) {
8481 12
        if ($haystack === '' || $needle === '') {
8482 3
            return false;
8483
        }
8484
8485 9
        if ($cleanUtf8 === true) {
8486
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8487
            // if invalid characters are found in $haystack before $needle
8488 1
            $needle = self::clean($needle);
8489 1
            $haystack = self::clean($haystack);
8490
        }
8491
8492 9
        if (!$needle) {
8493
            return $haystack;
8494
        }
8495
8496 9
        if (self::$SUPPORT['mbstring'] === true) {
8497 9
            if ($encoding === 'UTF-8') {
8498 9
                return \mb_stristr($haystack, $needle, $before_needle);
8499
            }
8500
8501 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8502
8503 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8504
        }
8505
8506
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8507
8508
        if (
8509
            $encoding !== 'UTF-8'
8510
            &&
8511
            self::$SUPPORT['mbstring'] === false
8512
        ) {
8513
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8514
        }
8515
8516
        if (
8517
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8518
            &&
8519
            self::$SUPPORT['intl'] === true
8520
        ) {
8521
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8522
            if ($returnTmp !== false) {
8523
                return $returnTmp;
8524
            }
8525
        }
8526
8527
        if (self::is_ascii($needle . $haystack)) {
8528
            return \stristr($haystack, $needle, $before_needle);
8529
        }
8530
8531
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8532
8533
        if (!isset($match[1])) {
8534
            return false;
8535
        }
8536
8537
        if ($before_needle) {
8538
            return $match[1];
8539
        }
8540
8541
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8542
    }
8543
8544
    /**
8545
     * Get the string length, not the byte-length!
8546
     *
8547
     * @see     http://php.net/manual/en/function.mb-strlen.php
8548
     *
8549
     * @param string $str       <p>The string being checked for length.</p>
8550
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8551
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8552
     *
8553
     * @return false|int
8554
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8555
     *                   $encoding.
8556
     *                   (One multi-byte character counted as +1).
8557
     *                   <br>
8558
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8559
     *                   chars.
8560
     */
8561 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8562
    {
8563 173
        if ($str === '') {
8564 21
            return 0;
8565
        }
8566
8567 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8568 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8569
        }
8570
8571 171
        if ($cleanUtf8 === true) {
8572
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8573
            // if invalid characters are found in $str
8574 4
            $str = self::clean($str);
8575
        }
8576
8577
        //
8578
        // fallback via mbstring
8579
        //
8580
8581 171
        if (self::$SUPPORT['mbstring'] === true) {
8582 165
            if ($encoding === 'UTF-8') {
8583 165
                return \mb_strlen($str);
8584
            }
8585
8586 4
            return \mb_strlen($str, $encoding);
8587
        }
8588
8589
        //
8590
        // fallback for binary || ascii only
8591
        //
8592
8593
        if (
8594 8
            $encoding === 'CP850'
8595
            ||
8596 8
            $encoding === 'ASCII'
8597
        ) {
8598
            return \strlen($str);
8599
        }
8600
8601
        if (
8602 8
            $encoding !== 'UTF-8'
8603
            &&
8604 8
            self::$SUPPORT['mbstring'] === false
8605
            &&
8606 8
            self::$SUPPORT['iconv'] === false
8607
        ) {
8608 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8609
        }
8610
8611
        //
8612
        // fallback via iconv
8613
        //
8614
8615 8
        if (self::$SUPPORT['iconv'] === true) {
8616
            $returnTmp = \iconv_strlen($str, $encoding);
8617
            if ($returnTmp !== false) {
8618
                return $returnTmp;
8619
            }
8620
        }
8621
8622
        //
8623
        // fallback via intl
8624
        //
8625
8626
        if (
8627 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8628
            &&
8629 8
            self::$SUPPORT['intl'] === true
8630
        ) {
8631
            $returnTmp = \grapheme_strlen($str);
8632
            if ($returnTmp !== null) {
8633
                return $returnTmp;
8634
            }
8635
        }
8636
8637
        //
8638
        // fallback for ascii only
8639
        //
8640
8641 8
        if (self::is_ascii($str)) {
8642 4
            return \strlen($str);
8643
        }
8644
8645
        //
8646
        // fallback via vanilla php
8647
        //
8648
8649 8
        \preg_match_all('/./us', $str, $parts);
8650
8651 8
        $returnTmp = \count($parts[0]);
8652 8
        if ($returnTmp === 0) {
8653
            return false;
8654
        }
8655
8656 8
        return $returnTmp;
8657
    }
8658
8659
    /**
8660
     * Get string length in byte.
8661
     *
8662
     * @param string $str
8663
     *
8664
     * @return int
8665
     */
8666
    public static function strlen_in_byte(string $str): int
8667
    {
8668
        if ($str === '') {
8669
            return 0;
8670
        }
8671
8672
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8673
            // "mb_" is available if overload is used, so use it ...
8674
            return \mb_strlen($str, 'CP850'); // 8-BIT
8675
        }
8676
8677
        return \strlen($str);
8678
    }
8679
8680
    /**
8681
     * Case insensitive string comparisons using a "natural order" algorithm.
8682
     *
8683
     * INFO: natural order version of UTF8::strcasecmp()
8684
     *
8685
     * @param string $str1     <p>The first string.</p>
8686
     * @param string $str2     <p>The second string.</p>
8687
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8688
     *
8689
     * @return int
8690
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8691
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8692
     *             <strong>0</strong> if they are equal
8693
     */
8694 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8695
    {
8696 2
        return self::strnatcmp(
8697 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8698 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8699
        );
8700
    }
8701
8702
    /**
8703
     * String comparisons using a "natural order" algorithm
8704
     *
8705
     * INFO: natural order version of UTF8::strcmp()
8706
     *
8707
     * @see  http://php.net/manual/en/function.strnatcmp.php
8708
     *
8709
     * @param string $str1 <p>The first string.</p>
8710
     * @param string $str2 <p>The second string.</p>
8711
     *
8712
     * @return int
8713
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8714
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8715
     *             <strong>0</strong> if they are equal
8716
     */
8717 4
    public static function strnatcmp(string $str1, string $str2): int
8718
    {
8719 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8720
    }
8721
8722
    /**
8723
     * Case-insensitive string comparison of the first n characters.
8724
     *
8725
     * @see  http://php.net/manual/en/function.strncasecmp.php
8726
     *
8727
     * @param string $str1     <p>The first string.</p>
8728
     * @param string $str2     <p>The second string.</p>
8729
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8730
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8731
     *
8732
     * @return int
8733
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8734
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8735
     *             <strong>0</strong> if they are equal
8736
     */
8737 2
    public static function strncasecmp(
8738
        string $str1,
8739
        string $str2,
8740
        int $len,
8741
        string $encoding = 'UTF-8'
8742
    ): int {
8743 2
        return self::strncmp(
8744 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8745 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8746 2
            $len
8747
        );
8748
    }
8749
8750
    /**
8751
     * String comparison of the first n characters.
8752
     *
8753
     * @see  http://php.net/manual/en/function.strncmp.php
8754
     *
8755
     * @param string $str1     <p>The first string.</p>
8756
     * @param string $str2     <p>The second string.</p>
8757
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8758
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8759
     *
8760
     * @return int
8761
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8762
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8763
     *             <strong>0</strong> if they are equal
8764
     */
8765 4
    public static function strncmp(
8766
        string $str1,
8767
        string $str2,
8768
        int $len,
8769
        string $encoding = 'UTF-8'
8770
    ): int {
8771 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8772
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8773
        }
8774
8775 4
        if ($encoding === 'UTF-8') {
8776 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8777 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8778
        } else {
8779
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8780
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8781
        }
8782
8783 4
        return self::strcmp($str1, $str2);
8784
    }
8785
8786
    /**
8787
     * Search a string for any of a set of characters.
8788
     *
8789
     * @see  http://php.net/manual/en/function.strpbrk.php
8790
     *
8791
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8792
     * @param string $char_list <p>This parameter is case sensitive.</p>
8793
     *
8794
     * @return false|string string starting from the character found, or false if it is not found
8795
     */
8796 2
    public static function strpbrk(string $haystack, string $char_list)
8797
    {
8798 2
        if ($haystack === '' || $char_list === '') {
8799 2
            return false;
8800
        }
8801
8802 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8803 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8804
        }
8805
8806 2
        return false;
8807
    }
8808
8809
    /**
8810
     * Find position of first occurrence of string in a string.
8811
     *
8812
     * @see http://php.net/manual/en/function.mb-strpos.php
8813
     *
8814
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8815
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8816
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8817
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8818
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8819
     *
8820
     * @return false|int
8821
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8822
     *                   string.<br> If needle is not found it returns false.
8823
     */
8824 53
    public static function strpos(
8825
        string $haystack,
8826
        $needle,
8827
        int $offset = 0,
8828
        $encoding = 'UTF-8',
8829
        bool $cleanUtf8 = false
8830
    ) {
8831 53
        if ($haystack === '') {
8832 4
            return false;
8833
        }
8834
8835
        // iconv and mbstring do not support integer $needle
8836 52
        if ((int) $needle === $needle) {
8837
            $needle = (string) self::chr($needle);
8838
        }
8839 52
        $needle = (string) $needle;
8840
8841 52
        if ($needle === '') {
8842 2
            return false;
8843
        }
8844
8845 52
        if ($cleanUtf8 === true) {
8846
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8847
            // if invalid characters are found in $haystack before $needle
8848 3
            $needle = self::clean($needle);
8849 3
            $haystack = self::clean($haystack);
8850
        }
8851
8852 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8853 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8854
        }
8855
8856
        //
8857
        // fallback via mbstring
8858
        //
8859
8860 52
        if (self::$SUPPORT['mbstring'] === true) {
8861 50
            if ($encoding === 'UTF-8') {
8862 50
                return \mb_strpos($haystack, $needle, $offset);
8863
            }
8864
8865 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8866
        }
8867
8868
        //
8869
        // fallback for binary || ascii only
8870
        //
8871
        if (
8872 4
            $encoding === 'CP850'
8873
            ||
8874 4
            $encoding === 'ASCII'
8875
        ) {
8876 2
            return \strpos($haystack, $needle, $offset);
8877
        }
8878
8879
        if (
8880 4
            $encoding !== 'UTF-8'
8881
            &&
8882 4
            self::$SUPPORT['iconv'] === false
8883
            &&
8884 4
            self::$SUPPORT['mbstring'] === false
8885
        ) {
8886 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8887
        }
8888
8889
        //
8890
        // fallback via intl
8891
        //
8892
8893
        if (
8894 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8895
            &&
8896 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8897
            &&
8898 4
            self::$SUPPORT['intl'] === true
8899
        ) {
8900
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8901
            if ($returnTmp !== false) {
8902
                return $returnTmp;
8903
            }
8904
        }
8905
8906
        //
8907
        // fallback via iconv
8908
        //
8909
8910
        if (
8911 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
8912
            &&
8913 4
            self::$SUPPORT['iconv'] === true
8914
        ) {
8915
            // ignore invalid negative offset to keep compatibility
8916
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8917
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8918
            if ($returnTmp !== false) {
8919
                return $returnTmp;
8920
            }
8921
        }
8922
8923
        //
8924
        // fallback for ascii only
8925
        //
8926
8927 4
        if (self::is_ascii($haystack . $needle)) {
8928 2
            return \strpos($haystack, $needle, $offset);
8929
        }
8930
8931
        //
8932
        // fallback via vanilla php
8933
        //
8934
8935 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8936 4
        if ($haystackTmp === false) {
8937
            $haystackTmp = '';
8938
        }
8939 4
        $haystack = (string) $haystackTmp;
8940
8941 4
        if ($offset < 0) {
8942
            $offset = 0;
8943
        }
8944
8945 4
        $pos = \strpos($haystack, $needle);
8946 4
        if ($pos === false) {
8947 2
            return false;
8948
        }
8949
8950 4
        if ($pos) {
8951 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8952
        }
8953
8954 2
        return $offset + 0;
8955
    }
8956
8957
    /**
8958
     * Find position of first occurrence of string in a string.
8959
     *
8960
     * @param string $haystack <p>
8961
     *                         The string being checked.
8962
     *                         </p>
8963
     * @param string $needle   <p>
8964
     *                         The position counted from the beginning of haystack.
8965
     *                         </p>
8966
     * @param int    $offset   [optional] <p>
8967
     *                         The search offset. If it is not specified, 0 is used.
8968
     *                         </p>
8969
     *
8970
     * @return false|int The numeric position of the first occurrence of needle in the
8971
     *                   haystack string. If needle is not found, it returns false.
8972
     */
8973
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8974
    {
8975
        if ($haystack === '' || $needle === '') {
8976
            return false;
8977
        }
8978
8979
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8980
            // "mb_" is available if overload is used, so use it ...
8981
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8982
        }
8983
8984
        return \strpos($haystack, $needle, $offset);
8985
    }
8986
8987
    /**
8988
     * Finds the last occurrence of a character in a string within another.
8989
     *
8990
     * @see http://php.net/manual/en/function.mb-strrchr.php
8991
     *
8992
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8993
     * @param string $needle        <p>The string to find in haystack</p>
8994
     * @param bool   $before_needle [optional] <p>
8995
     *                              Determines which portion of haystack
8996
     *                              this function returns.
8997
     *                              If set to true, it returns all of haystack
8998
     *                              from the beginning to the last occurrence of needle.
8999
     *                              If set to false, it returns all of haystack
9000
     *                              from the last occurrence of needle to the end,
9001
     *                              </p>
9002
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9003
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9004
     *
9005
     * @return false|string the portion of haystack or false if needle is not found
9006
     */
9007 2
    public static function strrchr(
9008
        string $haystack,
9009
        string $needle,
9010
        bool $before_needle = false,
9011
        string $encoding = 'UTF-8',
9012
        bool $cleanUtf8 = false
9013
    ) {
9014 2
        if ($haystack === '' || $needle === '') {
9015 2
            return false;
9016
        }
9017
9018 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9019 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9020
        }
9021
9022 2
        if ($cleanUtf8 === true) {
9023
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9024
            // if invalid characters are found in $haystack before $needle
9025 2
            $needle = self::clean($needle);
9026 2
            $haystack = self::clean($haystack);
9027
        }
9028
9029
        //
9030
        // fallback via mbstring
9031
        //
9032
9033 2
        if (self::$SUPPORT['mbstring'] === true) {
9034 2
            if ($encoding === 'UTF-8') {
9035 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9036
            }
9037
9038 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9039
        }
9040
9041
        //
9042
        // fallback for binary || ascii only
9043
        //
9044
9045
        if (
9046
            $before_needle === false
9047
            &&
9048
            (
9049
                $encoding === 'CP850'
9050
                ||
9051
                $encoding === 'ASCII'
9052
            )
9053
        ) {
9054
            return \strrchr($haystack, $needle);
9055
        }
9056
9057
        if (
9058
            $encoding !== 'UTF-8'
9059
            &&
9060
            self::$SUPPORT['mbstring'] === false
9061
        ) {
9062
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9063
        }
9064
9065
        //
9066
        // fallback via iconv
9067
        //
9068
9069
        if (self::$SUPPORT['iconv'] === true) {
9070
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9071
            if ($needleTmp === false) {
9072
                return false;
9073
            }
9074
            $needle = (string) $needleTmp;
9075
9076
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9077
            if ($pos === false) {
9078
                return false;
9079
            }
9080
9081
            if ($before_needle) {
9082
                return self::substr($haystack, 0, $pos, $encoding);
9083
            }
9084
9085
            return self::substr($haystack, $pos, null, $encoding);
9086
        }
9087
9088
        //
9089
        // fallback via vanilla php
9090
        //
9091
9092
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9093
        if ($needleTmp === false) {
9094
            return false;
9095
        }
9096
        $needle = (string) $needleTmp;
9097
9098
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9099
        if ($pos === false) {
9100
            return false;
9101
        }
9102
9103
        if ($before_needle) {
9104
            return self::substr($haystack, 0, $pos, $encoding);
9105
        }
9106
9107
        return self::substr($haystack, $pos, null, $encoding);
9108
    }
9109
9110
    /**
9111
     * Reverses characters order in the string.
9112
     *
9113
     * @param string $str      <p>The input string.</p>
9114
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9115
     *
9116
     * @return string the string with characters in the reverse sequence
9117
     */
9118 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9119
    {
9120 10
        if ($str === '') {
9121 4
            return '';
9122
        }
9123
9124
        // init
9125 8
        $reversed = '';
9126
9127 8
        if ($encoding === 'UTF-8') {
9128 8
            $i = (int) \mb_strlen($str);
9129 8
            while ($i--) {
9130 8
                $reversedTmp = \mb_substr($str, $i, 1);
9131 8
                if ($reversedTmp !== false) {
9132 8
                    $reversed .= $reversedTmp;
9133
                }
9134
            }
9135
        } else {
9136
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9137
9138
            $i = (int) self::strlen($str, $encoding);
9139
            while ($i--) {
9140
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9141
                if ($reversedTmp !== false) {
9142
                    $reversed .= $reversedTmp;
9143
                }
9144
            }
9145
        }
9146
9147 8
        return $reversed;
9148
    }
9149
9150
    /**
9151
     * Finds the last occurrence of a character in a string within another, case insensitive.
9152
     *
9153
     * @see http://php.net/manual/en/function.mb-strrichr.php
9154
     *
9155
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9156
     * @param string $needle        <p>The string to find in haystack.</p>
9157
     * @param bool   $before_needle [optional] <p>
9158
     *                              Determines which portion of haystack
9159
     *                              this function returns.
9160
     *                              If set to true, it returns all of haystack
9161
     *                              from the beginning to the last occurrence of needle.
9162
     *                              If set to false, it returns all of haystack
9163
     *                              from the last occurrence of needle to the end,
9164
     *                              </p>
9165
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9166
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9167
     *
9168
     * @return false|string the portion of haystack or<br>false if needle is not found
9169
     */
9170 3
    public static function strrichr(
9171
        string $haystack,
9172
        string $needle,
9173
        bool $before_needle = false,
9174
        string $encoding = 'UTF-8',
9175
        bool $cleanUtf8 = false
9176
    ) {
9177 3
        if ($haystack === '' || $needle === '') {
9178 2
            return false;
9179
        }
9180
9181 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9182 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9183
        }
9184
9185 3
        if ($cleanUtf8 === true) {
9186
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9187
            // if invalid characters are found in $haystack before $needle
9188 2
            $needle = self::clean($needle);
9189 2
            $haystack = self::clean($haystack);
9190
        }
9191
9192
        //
9193
        // fallback via mbstring
9194
        //
9195
9196 3
        if (self::$SUPPORT['mbstring'] === true) {
9197 3
            if ($encoding === 'UTF-8') {
9198 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9199
            }
9200
9201 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9202
        }
9203
9204
        //
9205
        // fallback via vanilla php
9206
        //
9207
9208
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9209
        if ($needleTmp === false) {
9210
            return false;
9211
        }
9212
        $needle = (string) $needleTmp;
9213
9214
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9215
        if ($pos === false) {
9216
            return false;
9217
        }
9218
9219
        if ($before_needle) {
9220
            return self::substr($haystack, 0, $pos, $encoding);
9221
        }
9222
9223
        return self::substr($haystack, $pos, null, $encoding);
9224
    }
9225
9226
    /**
9227
     * Find position of last occurrence of a case-insensitive string.
9228
     *
9229
     * @param string     $haystack  <p>The string to look in.</p>
9230
     * @param int|string $needle    <p>The string to look for.</p>
9231
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9232
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9233
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9234
     *
9235
     * @return false|int
9236
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9237
     *                   string.<br>If needle is not found, it returns false.
9238
     */
9239 3
    public static function strripos(
9240
        string $haystack,
9241
        $needle,
9242
        int $offset = 0,
9243
        string $encoding = 'UTF-8',
9244
        bool $cleanUtf8 = false
9245
    ) {
9246 3
        if ($haystack === '') {
9247
            return false;
9248
        }
9249
9250
        // iconv and mbstring do not support integer $needle
9251 3
        if ((int) $needle === $needle && $needle >= 0) {
9252
            $needle = (string) self::chr($needle);
9253
        }
9254 3
        $needle = (string) $needle;
9255
9256 3
        if ($needle === '') {
9257
            return false;
9258
        }
9259
9260 3
        if ($cleanUtf8 === true) {
9261
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9262 2
            $needle = self::clean($needle);
9263 2
            $haystack = self::clean($haystack);
9264
        }
9265
9266 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9267 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9268
        }
9269
9270
        //
9271
        // fallback via mbstrig
9272
        //
9273
9274 3
        if (self::$SUPPORT['mbstring'] === true) {
9275 3
            if ($encoding === 'UTF-8') {
9276 3
                return \mb_strripos($haystack, $needle, $offset);
9277
            }
9278
9279
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9280
        }
9281
9282
        //
9283
        // fallback for binary || ascii only
9284
        //
9285
9286
        if (
9287
            $encoding === 'CP850'
9288
            ||
9289
            $encoding === 'ASCII'
9290
        ) {
9291
            return \strripos($haystack, $needle, $offset);
9292
        }
9293
9294
        if (
9295
            $encoding !== 'UTF-8'
9296
            &&
9297
            self::$SUPPORT['mbstring'] === false
9298
        ) {
9299
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9300
        }
9301
9302
        //
9303
        // fallback via intl
9304
        //
9305
9306
        if (
9307
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9308
            &&
9309
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9310
            &&
9311
            self::$SUPPORT['intl'] === true
9312
        ) {
9313
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9314
            if ($returnTmp !== false) {
9315
                return $returnTmp;
9316
            }
9317
        }
9318
9319
        //
9320
        // fallback for ascii only
9321
        //
9322
9323
        if (self::is_ascii($haystack . $needle)) {
9324
            return \strripos($haystack, $needle, $offset);
9325
        }
9326
9327
        //
9328
        // fallback via vanilla php
9329
        //
9330
9331
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9332
        $needle = self::strtocasefold($needle, true, false, $encoding);
9333
9334
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9335
    }
9336
9337
    /**
9338
     * Finds position of last occurrence of a string within another, case insensitive.
9339
     *
9340
     * @param string $haystack <p>
9341
     *                         The string from which to get the position of the last occurrence
9342
     *                         of needle.
9343
     *                         </p>
9344
     * @param string $needle   <p>
9345
     *                         The string to find in haystack.
9346
     *                         </p>
9347
     * @param int    $offset   [optional] <p>
9348
     *                         The position in haystack
9349
     *                         to start searching.
9350
     *                         </p>
9351
     *
9352
     * @return false|int return the numeric position of the last occurrence of needle in the
9353
     *                   haystack string, or false if needle is not found
9354
     */
9355
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9356
    {
9357
        if ($haystack === '' || $needle === '') {
9358
            return false;
9359
        }
9360
9361
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9362
            // "mb_" is available if overload is used, so use it ...
9363
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9364
        }
9365
9366
        return \strripos($haystack, $needle, $offset);
9367
    }
9368
9369
    /**
9370
     * Find position of last occurrence of a string in a string.
9371
     *
9372
     * @see http://php.net/manual/en/function.mb-strrpos.php
9373
     *
9374
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9375
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9376
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9377
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9378
     *                              the end of the string.
9379
     *                              </p>
9380
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9381
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9382
     *
9383
     * @return false|int
9384
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9385
     *                   string.<br>If needle is not found, it returns false.
9386
     */
9387 35
    public static function strrpos(
9388
        string $haystack,
9389
        $needle,
9390
        int $offset = 0,
9391
        string $encoding = 'UTF-8',
9392
        bool $cleanUtf8 = false
9393
    ) {
9394 35
        if ($haystack === '') {
9395 3
            return false;
9396
        }
9397
9398
        // iconv and mbstring do not support integer $needle
9399 34
        if ((int) $needle === $needle && $needle >= 0) {
9400 2
            $needle = (string) self::chr($needle);
9401
        }
9402 34
        $needle = (string) $needle;
9403
9404 34
        if ($needle === '' || $haystack === '') {
9405 2
            return false;
9406
        }
9407
9408 34
        if ($cleanUtf8 === true) {
9409
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9410 4
            $needle = self::clean($needle);
9411 4
            $haystack = self::clean($haystack);
9412
        }
9413
9414 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9415 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9416
        }
9417
9418
        //
9419
        // fallback via mbstring
9420
        //
9421
9422 34
        if (self::$SUPPORT['mbstring'] === true) {
9423 34
            if ($encoding === 'UTF-8') {
9424 34
                return \mb_strrpos($haystack, $needle, $offset);
9425
            }
9426
9427 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9428
        }
9429
9430
        //
9431
        // fallback for binary || ascii only
9432
        //
9433
9434
        if (
9435
            $encoding === 'CP850'
9436
            ||
9437
            $encoding === 'ASCII'
9438
        ) {
9439
            return \strrpos($haystack, $needle, $offset);
9440
        }
9441
9442
        if (
9443
            $encoding !== 'UTF-8'
9444
            &&
9445
            self::$SUPPORT['mbstring'] === false
9446
        ) {
9447
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9448
        }
9449
9450
        //
9451
        // fallback via intl
9452
        //
9453
9454
        if (
9455
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9456
            &&
9457
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9458
            &&
9459
            self::$SUPPORT['intl'] === true
9460
        ) {
9461
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9462
            if ($returnTmp !== false) {
9463
                return $returnTmp;
9464
            }
9465
        }
9466
9467
        //
9468
        // fallback for ascii only
9469
        //
9470
9471
        if (self::is_ascii($haystack . $needle)) {
9472
            return \strrpos($haystack, $needle, $offset);
9473
        }
9474
9475
        //
9476
        // fallback via vanilla php
9477
        //
9478
9479
        $haystackTmp = null;
9480
        if ($offset > 0) {
9481
            $haystackTmp = self::substr($haystack, $offset);
9482
        } elseif ($offset < 0) {
9483
            $haystackTmp = self::substr($haystack, 0, $offset);
9484
            $offset = 0;
9485
        }
9486
9487
        if ($haystackTmp !== null) {
9488
            if ($haystackTmp === false) {
9489
                $haystackTmp = '';
9490
            }
9491
            $haystack = (string) $haystackTmp;
9492
        }
9493
9494
        $pos = \strrpos($haystack, $needle);
9495
        if ($pos === false) {
9496
            return false;
9497
        }
9498
9499
        $strTmp = \substr($haystack, 0, $pos);
9500
        if ($strTmp === false) {
9501
            return false;
9502
        }
9503
9504
        return $offset + (int) self::strlen($strTmp);
9505
    }
9506
9507
    /**
9508
     * Find position of last occurrence of a string in a string.
9509
     *
9510
     * @param string $haystack <p>
9511
     *                         The string being checked, for the last occurrence
9512
     *                         of needle.
9513
     *                         </p>
9514
     * @param string $needle   <p>
9515
     *                         The string to find in haystack.
9516
     *                         </p>
9517
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9518
     *                         the string. Negative values will stop searching at an arbitrary point
9519
     *                         prior to the end of the string.
9520
     *
9521
     * @return false|int The numeric position of the last occurrence of needle in the
9522
     *                   haystack string. If needle is not found, it returns false.
9523
     */
9524
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9525
    {
9526
        if ($haystack === '' || $needle === '') {
9527
            return false;
9528
        }
9529
9530
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9531
            // "mb_" is available if overload is used, so use it ...
9532
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9533
        }
9534
9535
        return \strrpos($haystack, $needle, $offset);
9536
    }
9537
9538
    /**
9539
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9540
     * mask.
9541
     *
9542
     * @param string $str      <p>The input string.</p>
9543
     * @param string $mask     <p>The mask of chars</p>
9544
     * @param int    $offset   [optional]
9545
     * @param int    $length   [optional]
9546
     * @param string $encoding [optional] <p>Set the charset.</p>
9547
     *
9548
     * @return false|int
9549
     */
9550 10
    public static function strspn(
9551
        string $str,
9552
        string $mask,
9553
        int $offset = 0,
9554
        int $length = null,
9555
        string $encoding = 'UTF-8'
9556
    ) {
9557 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9558
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9559
        }
9560
9561 10
        if ($offset || $length !== null) {
9562 2
            if ($encoding === 'UTF-8') {
9563 2
                if ($length === null) {
9564
                    $str = (string) \mb_substr($str, $offset);
9565
                } else {
9566 2
                    $str = (string) \mb_substr($str, $offset, $length);
9567
                }
9568
            } else {
9569
                $str = (string) self::substr($str, $offset, $length, $encoding);
9570
            }
9571
        }
9572
9573 10
        if ($str === '' || $mask === '') {
9574 2
            return 0;
9575
        }
9576
9577 8
        $matches = [];
9578
9579 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9580
    }
9581
9582
    /**
9583
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9584
     *
9585
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9586
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9587
     * @param bool   $before_needle [optional] <p>
9588
     *                              If <b>TRUE</b>, strstr() returns the part of the
9589
     *                              haystack before the first occurrence of the needle (excluding the needle).
9590
     *                              </p>
9591
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9592
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9593
     *
9594
     * @return false|string
9595
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9596
     */
9597 3
    public static function strstr(
9598
        string $haystack,
9599
        string $needle,
9600
        bool $before_needle = false,
9601
        string $encoding = 'UTF-8',
9602
        $cleanUtf8 = false
9603
    ) {
9604 3
        if ($haystack === '' || $needle === '') {
9605 2
            return false;
9606
        }
9607
9608 3
        if ($cleanUtf8 === true) {
9609
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9610
            // if invalid characters are found in $haystack before $needle
9611
            $needle = self::clean($needle);
9612
            $haystack = self::clean($haystack);
9613
        }
9614
9615 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9616 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9617
        }
9618
9619
        //
9620
        // fallback via mbstring
9621
        //
9622
9623 3
        if (self::$SUPPORT['mbstring'] === true) {
9624 3
            if ($encoding === 'UTF-8') {
9625 3
                return \mb_strstr($haystack, $needle, $before_needle);
9626
            }
9627
9628 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9629
        }
9630
9631
        //
9632
        // fallback for binary || ascii only
9633
        //
9634
9635
        if (
9636
            $encoding === 'CP850'
9637
            ||
9638
            $encoding === 'ASCII'
9639
        ) {
9640
            return \strstr($haystack, $needle, $before_needle);
9641
        }
9642
9643
        if (
9644
            $encoding !== 'UTF-8'
9645
            &&
9646
            self::$SUPPORT['mbstring'] === false
9647
        ) {
9648
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9649
        }
9650
9651
        //
9652
        // fallback via intl
9653
        //
9654
9655
        if (
9656
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9657
            &&
9658
            self::$SUPPORT['intl'] === true
9659
        ) {
9660
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9661
            if ($returnTmp !== false) {
9662
                return $returnTmp;
9663
            }
9664
        }
9665
9666
        //
9667
        // fallback for ascii only
9668
        //
9669
9670
        if (self::is_ascii($haystack . $needle)) {
9671
            return \strstr($haystack, $needle, $before_needle);
9672
        }
9673
9674
        //
9675
        // fallback via vanilla php
9676
        //
9677
9678
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9679
9680
        if (!isset($match[1])) {
9681
            return false;
9682
        }
9683
9684
        if ($before_needle) {
9685
            return $match[1];
9686
        }
9687
9688
        return self::substr($haystack, (int) self::strlen($match[1]));
9689
    }
9690
9691
    /**
9692
     *  * Finds first occurrence of a string within another.
9693
     *
9694
     * @param string $haystack      <p>
9695
     *                              The string from which to get the first occurrence
9696
     *                              of needle.
9697
     *                              </p>
9698
     * @param string $needle        <p>
9699
     *                              The string to find in haystack.
9700
     *                              </p>
9701
     * @param bool   $before_needle [optional] <p>
9702
     *                              Determines which portion of haystack
9703
     *                              this function returns.
9704
     *                              If set to true, it returns all of haystack
9705
     *                              from the beginning to the first occurrence of needle.
9706
     *                              If set to false, it returns all of haystack
9707
     *                              from the first occurrence of needle to the end,
9708
     *                              </p>
9709
     *
9710
     * @return false|string the portion of haystack,
9711
     *                      or false if needle is not found
9712
     */
9713
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9714
    {
9715
        if ($haystack === '' || $needle === '') {
9716
            return false;
9717
        }
9718
9719
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9720
            // "mb_" is available if overload is used, so use it ...
9721
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9722
        }
9723
9724
        return \strstr($haystack, $needle, $before_needle);
9725
    }
9726
9727
    /**
9728
     * Unicode transformation for case-less matching.
9729
     *
9730
     * @see http://unicode.org/reports/tr21/tr21-5.html
9731
     *
9732
     * @param string      $str       <p>The input string.</p>
9733
     * @param bool        $full      [optional] <p>
9734
     *                               <b>true</b>, replace full case folding chars (default)<br>
9735
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9736
     *                               </p>
9737
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9738
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9739
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9740
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9741
     *                               is for some languages better ...</p>
9742
     *
9743
     * @return string
9744
     */
9745 32
    public static function strtocasefold(
9746
        string $str,
9747
        bool $full = true,
9748
        bool $cleanUtf8 = false,
9749
        string $encoding = 'UTF-8',
9750
        string $lang = null,
9751
        $lower = true
9752
    ): string {
9753 32
        if ($str === '') {
9754 5
            return '';
9755
        }
9756
9757 31
        if ($cleanUtf8 === true) {
9758
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9759
            // if invalid characters are found in $haystack before $needle
9760 2
            $str = self::clean($str);
9761
        }
9762
9763 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9764
9765 31
        if ($lang === null && $encoding === 'UTF-8') {
9766 31
            if ($lower === true) {
9767 2
                return \mb_strtolower($str);
9768
            }
9769
9770 29
            return \mb_strtoupper($str);
9771
        }
9772
9773 2
        if ($lower === true) {
9774
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9775
        }
9776
9777 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9778
    }
9779
9780
    /**
9781
     * Make a string lowercase.
9782
     *
9783
     * @see http://php.net/manual/en/function.mb-strtolower.php
9784
     *
9785
     * @param string      $str                   <p>The string being lowercased.</p>
9786
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9787
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9788
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9789
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9790
     *
9791
     * @return string
9792
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9793
     */
9794 73
    public static function strtolower(
9795
        $str,
9796
        string $encoding = 'UTF-8',
9797
        bool $cleanUtf8 = false,
9798
        string $lang = null,
9799
        bool $tryToKeepStringLength = false
9800
    ): string {
9801
        // init
9802 73
        $str = (string) $str;
9803
9804 73
        if ($str === '') {
9805 1
            return '';
9806
        }
9807
9808 72
        if ($cleanUtf8 === true) {
9809
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9810
            // if invalid characters are found in $haystack before $needle
9811 2
            $str = self::clean($str);
9812
        }
9813
9814
        // hack for old php version or for the polyfill ...
9815 72
        if ($tryToKeepStringLength === true) {
9816
            $str = self::fixStrCaseHelper($str, true);
9817
        }
9818
9819 72
        if ($lang === null && $encoding === 'UTF-8') {
9820 13
            return \mb_strtolower($str);
9821
        }
9822
9823 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9824
9825 61
        if ($lang !== null) {
9826 2
            if (self::$SUPPORT['intl'] === true) {
9827 2
                $langCode = $lang . '-Lower';
9828 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9829
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9830
9831
                    $langCode = 'Any-Lower';
9832
                }
9833
9834
                /** @noinspection PhpComposerExtensionStubsInspection */
9835
                /** @noinspection UnnecessaryCastingInspection */
9836 2
                return (string) \transliterator_transliterate($langCode, $str);
9837
            }
9838
9839
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9840
        }
9841
9842
        // always fallback via symfony polyfill
9843 61
        return \mb_strtolower($str, $encoding);
9844
    }
9845
9846
    /**
9847
     * Make a string uppercase.
9848
     *
9849
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9850
     *
9851
     * @param string      $str                   <p>The string being uppercased.</p>
9852
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9853
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9854
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9855
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9856
     *
9857
     * @return string
9858
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9859
     */
9860 17
    public static function strtoupper(
9861
        $str,
9862
        string $encoding = 'UTF-8',
9863
        bool $cleanUtf8 = false,
9864
        string $lang = null,
9865
        bool $tryToKeepStringLength = false
9866
    ): string {
9867
        // init
9868 17
        $str = (string) $str;
9869
9870 17
        if ($str === '') {
9871 1
            return '';
9872
        }
9873
9874 16
        if ($cleanUtf8 === true) {
9875
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9876
            // if invalid characters are found in $haystack before $needle
9877 2
            $str = self::clean($str);
9878
        }
9879
9880
        // hack for old php version or for the polyfill ...
9881 16
        if ($tryToKeepStringLength === true) {
9882 2
            $str = self::fixStrCaseHelper($str, false);
9883
        }
9884
9885 16
        if ($lang === null && $encoding === 'UTF-8') {
9886 8
            return \mb_strtoupper($str);
9887
        }
9888
9889 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9890
9891 10
        if ($lang !== null) {
9892 2
            if (self::$SUPPORT['intl'] === true) {
9893 2
                $langCode = $lang . '-Upper';
9894 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9895
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9896
9897
                    $langCode = 'Any-Upper';
9898
                }
9899
9900
                /** @noinspection PhpComposerExtensionStubsInspection */
9901
                /** @noinspection UnnecessaryCastingInspection */
9902 2
                return (string) \transliterator_transliterate($langCode, $str);
9903
            }
9904
9905
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9906
        }
9907
9908
        // always fallback via symfony polyfill
9909 10
        return \mb_strtoupper($str, $encoding);
9910
    }
9911
9912
    /**
9913
     * Translate characters or replace sub-strings.
9914
     *
9915
     * @see  http://php.net/manual/en/function.strtr.php
9916
     *
9917
     * @param string          $str  <p>The string being translated.</p>
9918
     * @param string|string[] $from <p>The string replacing from.</p>
9919
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
9920
     *
9921
     * @return string
9922
     *                This function returns a copy of str, translating all occurrences of each character in from to the
9923
     *                corresponding character in to
9924
     */
9925 2
    public static function strtr(string $str, $from, $to = ''): string
9926
    {
9927 2
        if ($str === '') {
9928
            return '';
9929
        }
9930
9931 2
        if ($from === $to) {
9932
            return $str;
9933
        }
9934
9935 2
        if ($to !== '') {
9936 2
            $from = self::str_split($from);
9937 2
            $to = self::str_split($to);
9938 2
            $countFrom = \count($from);
9939 2
            $countTo = \count($to);
9940
9941 2
            if ($countFrom > $countTo) {
9942 2
                $from = \array_slice($from, 0, $countTo);
9943 2
            } elseif ($countFrom < $countTo) {
9944 2
                $to = \array_slice($to, 0, $countFrom);
9945
            }
9946
9947 2
            $from = \array_combine($from, $to);
9948 2
            if ($from === false) {
9949
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
9950
            }
9951
        }
9952
9953 2
        if (\is_string($from)) {
9954 2
            return \str_replace($from, '', $str);
9955
        }
9956
9957 2
        return \strtr($str, $from);
9958
    }
9959
9960
    /**
9961
     * Return the width of a string.
9962
     *
9963
     * @param string $str       <p>The input string.</p>
9964
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9965
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9966
     *
9967
     * @return int
9968
     */
9969 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9970
    {
9971 2
        if ($str === '') {
9972 2
            return 0;
9973
        }
9974
9975 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9976 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9977
        }
9978
9979 2
        if ($cleanUtf8 === true) {
9980
            // iconv and mbstring are not tolerant to invalid encoding
9981
            // further, their behaviour is inconsistent with that of PHP's substr
9982 2
            $str = self::clean($str);
9983
        }
9984
9985
        //
9986
        // fallback via mbstring
9987
        //
9988
9989 2
        if (self::$SUPPORT['mbstring'] === true) {
9990 2
            if ($encoding === 'UTF-8') {
9991 2
                return \mb_strwidth($str);
9992
            }
9993
9994
            return \mb_strwidth($str, $encoding);
9995
        }
9996
9997
        //
9998
        // fallback via vanilla php
9999
        //
10000
10001
        if ($encoding !== 'UTF-8') {
10002
            $str = self::encode('UTF-8', $str, false, $encoding);
10003
        }
10004
10005
        $wide = 0;
10006
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10007
10008
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10009
    }
10010
10011
    /**
10012
     * Get part of a string.
10013
     *
10014
     * @see http://php.net/manual/en/function.mb-substr.php
10015
     *
10016
     * @param string $str       <p>The string being checked.</p>
10017
     * @param int    $offset    <p>The first position used in str.</p>
10018
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10019
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10020
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10021
     *
10022
     * @return false|string
10023
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10024
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10025
     *                      characters long, <b>FALSE</b> will be returned.
10026
     */
10027 172
    public static function substr(
10028
        string $str,
10029
        int $offset = 0,
10030
        int $length = null,
10031
        string $encoding = 'UTF-8',
10032
        bool $cleanUtf8 = false
10033
    ) {
10034
        // empty string
10035 172
        if ($str === '' || $length === 0) {
10036 8
            return '';
10037
        }
10038
10039 168
        if ($cleanUtf8 === true) {
10040
            // iconv and mbstring are not tolerant to invalid encoding
10041
            // further, their behaviour is inconsistent with that of PHP's substr
10042 2
            $str = self::clean($str);
10043
        }
10044
10045
        // whole string
10046 168
        if (!$offset && $length === null) {
10047 7
            return $str;
10048
        }
10049
10050 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10051 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10052
        }
10053
10054
        //
10055
        // fallback via mbstring
10056
        //
10057
10058 163
        if (self::$SUPPORT['mbstring'] === true) {
10059 161
            if ($encoding === 'UTF-8') {
10060 161
                if ($length === null) {
10061 64
                    return \mb_substr($str, $offset);
10062
                }
10063
10064 102
                return \mb_substr($str, $offset, $length);
10065
            }
10066
10067
            return self::substr($str, $offset, $length, $encoding);
10068
        }
10069
10070
        //
10071
        // fallback for binary || ascii only
10072
        //
10073
10074
        if (
10075 4
            $encoding === 'CP850'
10076
            ||
10077 4
            $encoding === 'ASCII'
10078
        ) {
10079
            return \substr($str, $offset, $length);
10080
        }
10081
10082
        // otherwise we need the string-length
10083 4
        $str_length = 0;
10084 4
        if ($offset || $length === null) {
10085 4
            $str_length = self::strlen($str, $encoding);
10086
        }
10087
10088
        // e.g.: invalid chars + mbstring not installed
10089 4
        if ($str_length === false) {
10090
            return false;
10091
        }
10092
10093
        // empty string
10094 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10095
            return '';
10096
        }
10097
10098
        // impossible
10099 4
        if ($offset && $offset > $str_length) {
10100
            return '';
10101
        }
10102
10103 4
        if ($length === null) {
10104 4
            $length = (int) $str_length;
10105
        } else {
10106 2
            $length = (int) $length;
10107
        }
10108
10109
        if (
10110 4
            $encoding !== 'UTF-8'
10111
            &&
10112 4
            self::$SUPPORT['mbstring'] === false
10113
        ) {
10114 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10115
        }
10116
10117
        //
10118
        // fallback via intl
10119
        //
10120
10121
        if (
10122 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10123
            &&
10124 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10125
            &&
10126 4
            self::$SUPPORT['intl'] === true
10127
        ) {
10128
            $returnTmp = \grapheme_substr($str, $offset, $length);
10129
            if ($returnTmp !== false) {
10130
                return $returnTmp;
10131
            }
10132
        }
10133
10134
        //
10135
        // fallback via iconv
10136
        //
10137
10138
        if (
10139 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10140
            &&
10141 4
            self::$SUPPORT['iconv'] === true
10142
        ) {
10143
            $returnTmp = \iconv_substr($str, $offset, $length);
10144
            if ($returnTmp !== false) {
10145
                return $returnTmp;
10146
            }
10147
        }
10148
10149
        //
10150
        // fallback for ascii only
10151
        //
10152
10153 4
        if (self::is_ascii($str)) {
10154
            return \substr($str, $offset, $length);
10155
        }
10156
10157
        //
10158
        // fallback via vanilla php
10159
        //
10160
10161
        // split to array, and remove invalid characters
10162 4
        $array = self::str_split($str);
10163
10164
        // extract relevant part, and join to make sting again
10165 4
        return \implode('', \array_slice($array, $offset, $length));
10166
    }
10167
10168
    /**
10169
     * Binary safe comparison of two strings from an offset, up to length characters.
10170
     *
10171
     * @param string   $str1               <p>The main string being compared.</p>
10172
     * @param string   $str2               <p>The secondary string being compared.</p>
10173
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10174
     *                                     counting from the end of the string.</p>
10175
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10176
     *                                     of the length of the str compared to the length of main_str less the
10177
     *                                     offset.</p>
10178
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10179
     *                                     insensitive.</p>
10180
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10181
     *
10182
     * @return int
10183
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10184
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10185
     *             <strong>0</strong> if they are equal
10186
     */
10187 2
    public static function substr_compare(
10188
        string $str1,
10189
        string $str2,
10190
        int $offset = 0,
10191
        int $length = null,
10192
        bool $case_insensitivity = false,
10193
        string $encoding = 'UTF-8'
10194
    ): int {
10195
        if (
10196 2
            $offset !== 0
10197
            ||
10198 2
            $length !== null
10199
        ) {
10200 2
            if ($encoding === 'UTF-8') {
10201 2
                if ($length === null) {
10202 2
                    $str1 = (string) \mb_substr($str1, $offset);
10203
                } else {
10204 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10205
                }
10206 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10207
            } else {
10208
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10209
10210
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10211
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10212
            }
10213
        }
10214
10215 2
        if ($case_insensitivity === true) {
10216 2
            return self::strcasecmp($str1, $str2, $encoding);
10217
        }
10218
10219 2
        return self::strcmp($str1, $str2);
10220
    }
10221
10222
    /**
10223
     * Count the number of substring occurrences.
10224
     *
10225
     * @see  http://php.net/manual/en/function.substr-count.php
10226
     *
10227
     * @param string $haystack  <p>The string to search in.</p>
10228
     * @param string $needle    <p>The substring to search for.</p>
10229
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10230
     * @param int    $length    [optional] <p>
10231
     *                          The maximum length after the specified offset to search for the
10232
     *                          substring. It outputs a warning if the offset plus the length is
10233
     *                          greater than the haystack length.
10234
     *                          </p>
10235
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10236
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10237
     *
10238
     * @return false|int this functions returns an integer or false if there isn't a string
10239
     */
10240 5
    public static function substr_count(
10241
        string $haystack,
10242
        string $needle,
10243
        int $offset = 0,
10244
        int $length = null,
10245
        string $encoding = 'UTF-8',
10246
        bool $cleanUtf8 = false
10247
    ) {
10248 5
        if ($haystack === '' || $needle === '') {
10249 2
            return false;
10250
        }
10251
10252 5
        if ($length === 0) {
10253 2
            return 0;
10254
        }
10255
10256 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10257 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10258
        }
10259
10260 5
        if ($cleanUtf8 === true) {
10261
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10262
            // if invalid characters are found in $haystack before $needle
10263
            $needle = self::clean($needle);
10264
            $haystack = self::clean($haystack);
10265
        }
10266
10267 5
        if ($offset || $length > 0) {
10268 2
            if ($length === null) {
10269 2
                $lengthTmp = self::strlen($haystack, $encoding);
10270 2
                if ($lengthTmp === false) {
10271
                    return false;
10272
                }
10273 2
                $length = (int) $lengthTmp;
10274
            }
10275
10276 2
            if ($encoding === 'UTF-8') {
10277 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10278
            } else {
10279 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10280
            }
10281
        }
10282
10283
        if (
10284 5
            $encoding !== 'UTF-8'
10285
            &&
10286 5
            self::$SUPPORT['mbstring'] === false
10287
        ) {
10288
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10289
        }
10290
10291 5
        if (self::$SUPPORT['mbstring'] === true) {
10292 5
            if ($encoding === 'UTF-8') {
10293 5
                return \mb_substr_count($haystack, $needle);
10294
            }
10295
10296 2
            return \mb_substr_count($haystack, $needle, $encoding);
10297
        }
10298
10299
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10300
10301
        return \count($matches);
10302
    }
10303
10304
    /**
10305
     * Count the number of substring occurrences.
10306
     *
10307
     * @param string $haystack <p>
10308
     *                         The string being checked.
10309
     *                         </p>
10310
     * @param string $needle   <p>
10311
     *                         The string being found.
10312
     *                         </p>
10313
     * @param int    $offset   [optional] <p>
10314
     *                         The offset where to start counting
10315
     *                         </p>
10316
     * @param int    $length   [optional] <p>
10317
     *                         The maximum length after the specified offset to search for the
10318
     *                         substring. It outputs a warning if the offset plus the length is
10319
     *                         greater than the haystack length.
10320
     *                         </p>
10321
     *
10322
     * @return false|int the number of times the
10323
     *                   needle substring occurs in the
10324
     *                   haystack string
10325
     */
10326
    public static function substr_count_in_byte(
10327
        string $haystack,
10328
        string $needle,
10329
        int $offset = 0,
10330
        int $length = null
10331
    ) {
10332
        if ($haystack === '' || $needle === '') {
10333
            return 0;
10334
        }
10335
10336
        if (
10337
            ($offset || $length !== null)
10338
            &&
10339
            self::$SUPPORT['mbstring_func_overload'] === true
10340
        ) {
10341
            if ($length === null) {
10342
                $lengthTmp = self::strlen($haystack);
10343
                if ($lengthTmp === false) {
10344
                    return false;
10345
                }
10346
                $length = (int) $lengthTmp;
10347
            }
10348
10349
            if (
10350
                (
10351
                    $length !== 0
10352
                    &&
10353
                    $offset !== 0
10354
                )
10355
                &&
10356
                ($length + $offset) <= 0
10357
                &&
10358
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10359
            ) {
10360
                return false;
10361
            }
10362
10363
            $haystackTmp = \substr($haystack, $offset, $length);
10364
            if ($haystackTmp === false) {
10365
                $haystackTmp = '';
10366
            }
10367
            $haystack = (string) $haystackTmp;
10368
        }
10369
10370
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10371
            // "mb_" is available if overload is used, so use it ...
10372
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10373
        }
10374
10375
        if ($length === null) {
10376
            return \substr_count($haystack, $needle, $offset);
10377
        }
10378
10379
        return \substr_count($haystack, $needle, $offset, $length);
10380
    }
10381
10382
    /**
10383
     * Returns the number of occurrences of $substring in the given string.
10384
     * By default, the comparison is case-sensitive, but can be made insensitive
10385
     * by setting $caseSensitive to false.
10386
     *
10387
     * @param string $str           <p>The input string.</p>
10388
     * @param string $substring     <p>The substring to search for.</p>
10389
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10390
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10391
     *
10392
     * @return int
10393
     */
10394 15
    public static function substr_count_simple(
10395
        string $str,
10396
        string $substring,
10397
        bool $caseSensitive = true,
10398
        string $encoding = 'UTF-8'
10399
    ): int {
10400 15
        if ($str === '' || $substring === '') {
10401 2
            return 0;
10402
        }
10403
10404 13
        if ($encoding === 'UTF-8') {
10405 7
            if ($caseSensitive) {
10406
                return (int) \mb_substr_count($str, $substring);
10407
            }
10408
10409 7
            return (int) \mb_substr_count(
10410 7
                \mb_strtoupper($str),
10411 7
                \mb_strtoupper($substring)
10412
10413
            );
10414
        }
10415
10416 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10417
10418 6
        if ($caseSensitive) {
10419 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10420
        }
10421
10422 3
        return (int) \mb_substr_count(
10423 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10424 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10425 3
            $encoding
10426
        );
10427
    }
10428
10429
    /**
10430
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10431
     *
10432
     * @param string $haystack <p>The string to search in.</p>
10433
     * @param string $needle   <p>The substring to search for.</p>
10434
     *
10435
     * @return string return the sub-string
10436
     */
10437 2
    public static function substr_ileft(string $haystack, string $needle): string
10438
    {
10439 2
        if ($haystack === '') {
10440 2
            return '';
10441
        }
10442
10443 2
        if ($needle === '') {
10444 2
            return $haystack;
10445
        }
10446
10447 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10448 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10449
        }
10450
10451 2
        return $haystack;
10452
    }
10453
10454
    /**
10455
     * Get part of a string process in bytes.
10456
     *
10457
     * @param string $str    <p>The string being checked.</p>
10458
     * @param int    $offset <p>The first position used in str.</p>
10459
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10460
     *
10461
     * @return false|string
10462
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10463
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10464
     *                      characters long, <b>FALSE</b> will be returned.
10465
     */
10466
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10467
    {
10468
        // empty string
10469
        if ($str === '' || $length === 0) {
10470
            return '';
10471
        }
10472
10473
        // whole string
10474
        if (!$offset && $length === null) {
10475
            return $str;
10476
        }
10477
10478
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10479
            // "mb_" is available if overload is used, so use it ...
10480
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10481
        }
10482
10483
        return \substr($str, $offset, $length ?? 2147483647);
10484
    }
10485
10486
    /**
10487
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10488
     *
10489
     * @param string $haystack <p>The string to search in.</p>
10490
     * @param string $needle   <p>The substring to search for.</p>
10491
     *
10492
     * @return string return the sub-string
10493
     */
10494 2
    public static function substr_iright(string $haystack, string $needle): string
10495
    {
10496 2
        if ($haystack === '') {
10497 2
            return '';
10498
        }
10499
10500 2
        if ($needle === '') {
10501 2
            return $haystack;
10502
        }
10503
10504 2
        if (self::str_iends_with($haystack, $needle) === true) {
10505 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10506
        }
10507
10508 2
        return $haystack;
10509
    }
10510
10511
    /**
10512
     * Removes an prefix ($needle) from start of the string ($haystack).
10513
     *
10514
     * @param string $haystack <p>The string to search in.</p>
10515
     * @param string $needle   <p>The substring to search for.</p>
10516
     *
10517
     * @return string return the sub-string
10518
     */
10519 2
    public static function substr_left(string $haystack, string $needle): string
10520
    {
10521 2
        if ($haystack === '') {
10522 2
            return '';
10523
        }
10524
10525 2
        if ($needle === '') {
10526 2
            return $haystack;
10527
        }
10528
10529 2
        if (self::str_starts_with($haystack, $needle) === true) {
10530 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10531
        }
10532
10533 2
        return $haystack;
10534
    }
10535
10536
    /**
10537
     * Replace text within a portion of a string.
10538
     *
10539
     * source: https://gist.github.com/stemar/8287074
10540
     *
10541
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10542
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10543
     * @param int|int[]       $offset      <p>
10544
     *                                     If start is positive, the replacing will begin at the start'th offset
10545
     *                                     into string.
10546
     *                                     <br><br>
10547
     *                                     If start is negative, the replacing will begin at the start'th character
10548
     *                                     from the end of string.
10549
     *                                     </p>
10550
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10551
     *                                     portion of string which is to be replaced. If it is negative, it
10552
     *                                     represents the number of characters from the end of string at which to
10553
     *                                     stop replacing. If it is not given, then it will default to strlen(
10554
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10555
     *                                     length is zero then this function will have the effect of inserting
10556
     *                                     replacement into string at the given start offset.</p>
10557
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10558
     *
10559
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10560
     */
10561 10
    public static function substr_replace(
10562
        $str,
10563
        $replacement,
10564
        $offset,
10565
        $length = null,
10566
        string $encoding = 'UTF-8'
10567
    ) {
10568 10
        if (\is_array($str) === true) {
10569 1
            $num = \count($str);
10570
10571
            // the replacement
10572 1
            if (\is_array($replacement) === true) {
10573 1
                $replacement = \array_slice($replacement, 0, $num);
10574
            } else {
10575 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10576
            }
10577
10578
            // the offset
10579 1
            if (\is_array($offset) === true) {
10580 1
                $offset = \array_slice($offset, 0, $num);
10581 1
                foreach ($offset as &$valueTmp) {
10582 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10583
                }
10584 1
                unset($valueTmp);
10585
            } else {
10586 1
                $offset = \array_pad([$offset], $num, $offset);
10587
            }
10588
10589
            // the length
10590 1
            if ($length === null) {
10591 1
                $length = \array_fill(0, $num, 0);
10592 1
            } elseif (\is_array($length) === true) {
10593 1
                $length = \array_slice($length, 0, $num);
10594 1
                foreach ($length as &$valueTmpV2) {
10595 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10596
                }
10597 1
                unset($valueTmpV2);
10598
            } else {
10599 1
                $length = \array_pad([$length], $num, $length);
10600
            }
10601
10602
            // recursive call
10603 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10604
        }
10605
10606 10
        if (\is_array($replacement) === true) {
10607 1
            if (\count($replacement) > 0) {
10608 1
                $replacement = $replacement[0];
10609
            } else {
10610 1
                $replacement = '';
10611
            }
10612
        }
10613
10614
        // init
10615 10
        $str = (string) $str;
10616 10
        $replacement = (string) $replacement;
10617
10618 10
        if (\is_array($length) === true) {
10619
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10620
        }
10621
10622 10
        if (\is_array($offset) === true) {
10623
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10624
        }
10625
10626 10
        if ($str === '') {
10627 1
            return $replacement;
10628
        }
10629
10630 9
        if (self::$SUPPORT['mbstring'] === true) {
10631 9
            $string_length = (int) self::strlen($str, $encoding);
10632
10633 9
            if ($offset < 0) {
10634 1
                $offset = (int) \max(0, $string_length + $offset);
10635 9
            } elseif ($offset > $string_length) {
10636 1
                $offset = $string_length;
10637
            }
10638
10639 9
            if ($length !== null && $length < 0) {
10640 1
                $length = (int) \max(0, $string_length - $offset + $length);
10641 9
            } elseif ($length === null || $length > $string_length) {
10642 4
                $length = $string_length;
10643
            }
10644
10645
            /** @noinspection AdditionOperationOnArraysInspection */
10646 9
            if (($offset + $length) > $string_length) {
10647 4
                $length = $string_length - $offset;
10648
            }
10649
10650
            /** @noinspection AdditionOperationOnArraysInspection */
10651 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10652 9
                   $replacement .
10653 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10654
        }
10655
10656
        //
10657
        // fallback for ascii only
10658
        //
10659
10660
        if (self::is_ascii($str)) {
10661
            return ($length === null) ?
10662
                \substr_replace($str, $replacement, $offset) :
10663
                \substr_replace($str, $replacement, $offset, $length);
10664
        }
10665
10666
        //
10667
        // fallback via vanilla php
10668
        //
10669
10670
        \preg_match_all('/./us', $str, $smatches);
10671
        \preg_match_all('/./us', $replacement, $rmatches);
10672
10673
        if ($length === null) {
10674
            $lengthTmp = self::strlen($str, $encoding);
10675
            if ($lengthTmp === false) {
10676
                // e.g.: non mbstring support + invalid chars
10677
                return '';
10678
            }
10679
            $length = (int) $lengthTmp;
10680
        }
10681
10682
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10683
10684
        return \implode('', $smatches[0]);
10685
    }
10686
10687
    /**
10688
     * Removes an suffix ($needle) from end of the string ($haystack).
10689
     *
10690
     * @param string $haystack <p>The string to search in.</p>
10691
     * @param string $needle   <p>The substring to search for.</p>
10692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10693
     *
10694
     * @return string return the sub-string
10695
     */
10696 2
    public static function substr_right(
10697
        string $haystack,
10698
        string $needle,
10699
        string $encoding = 'UTF-8'
10700
    ): string {
10701 2
        if ($haystack === '') {
10702 2
            return '';
10703
        }
10704
10705 2
        if ($needle === '') {
10706 2
            return $haystack;
10707
        }
10708
10709
        if (
10710 2
            $encoding === 'UTF-8'
10711
            &&
10712 2
            \substr($haystack, -\strlen($needle)) === $needle
10713
        ) {
10714 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10715
        }
10716
10717 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10718
            return (string) self::substr(
10719
                $haystack,
10720
                0,
10721
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10722
                $encoding
10723
            );
10724
        }
10725
10726 2
        return $haystack;
10727
    }
10728
10729
    /**
10730
     * Returns a case swapped version of the string.
10731
     *
10732
     * @param string $str       <p>The input string.</p>
10733
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10734
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10735
     *
10736
     * @return string each character's case swapped
10737
     */
10738 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10739
    {
10740 6
        if ($str === '') {
10741 1
            return '';
10742
        }
10743
10744 6
        if ($cleanUtf8 === true) {
10745
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10746
            // if invalid characters are found in $haystack before $needle
10747 2
            $str = self::clean($str);
10748
        }
10749
10750 6
        if ($encoding === 'UTF-8') {
10751 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10752
        }
10753
10754 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10755
    }
10756
10757
    /**
10758
     * Checks whether symfony-polyfills are used.
10759
     *
10760
     * @return bool
10761
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10762
     */
10763
    public static function symfony_polyfill_used(): bool
10764
    {
10765
        // init
10766
        $return = false;
10767
10768
        $returnTmp = \extension_loaded('mbstring');
10769
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10770
            $return = true;
10771
        }
10772
10773
        $returnTmp = \extension_loaded('iconv');
10774
        if ($returnTmp === false && \function_exists('iconv')) {
10775
            $return = true;
10776
        }
10777
10778
        return $return;
10779
    }
10780
10781
    /**
10782
     * @param string $str
10783
     * @param int    $tabLength
10784
     *
10785
     * @return string
10786
     */
10787 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10788
    {
10789 6
        if ($tabLength === 4) {
10790 3
            $spaces = '    ';
10791 3
        } elseif ($tabLength === 2) {
10792 1
            $spaces = '  ';
10793
        } else {
10794 2
            $spaces = \str_repeat(' ', $tabLength);
10795
        }
10796
10797 6
        return \str_replace("\t", $spaces, $str);
10798
    }
10799
10800
    /**
10801
     * Converts the first character of each word in the string to uppercase
10802
     * and all other chars to lowercase.
10803
     *
10804
     * @param string      $str                   <p>The input string.</p>
10805
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10806
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10807
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10808
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10809
     *
10810
     * @return string string with all characters of $str being title-cased
10811
     */
10812 5
    public static function titlecase(
10813
        string $str,
10814
        string $encoding = 'UTF-8',
10815
        bool $cleanUtf8 = false,
10816
        string $lang = null,
10817
        bool $tryToKeepStringLength = false
10818
    ): string {
10819 5
        if ($cleanUtf8 === true) {
10820
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10821
            // if invalid characters are found in $haystack before $needle
10822
            $str = self::clean($str);
10823
        }
10824
10825 5
        if ($lang === null && $tryToKeepStringLength === false) {
10826 5
            if ($encoding === 'UTF-8') {
10827 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10828
            }
10829
10830 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10831
10832 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10833
        }
10834
10835
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10836
    }
10837
10838
    /**
10839
     * alias for "UTF8::to_ascii()"
10840
     *
10841
     * @see        UTF8::to_ascii()
10842
     *
10843
     * @param string $str
10844
     * @param string $subst_chr
10845
     * @param bool   $strict
10846
     *
10847
     * @return string
10848
     *
10849
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10850
     */
10851 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10852
    {
10853 7
        return self::to_ascii($str, $subst_chr, $strict);
10854
    }
10855
10856
    /**
10857
     * alias for "UTF8::to_iso8859()"
10858
     *
10859
     * @see        UTF8::to_iso8859()
10860
     *
10861
     * @param string|string[] $str
10862
     *
10863
     * @return string|string[]
10864
     *
10865
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10866
     */
10867 2
    public static function toIso8859($str)
10868
    {
10869 2
        return self::to_iso8859($str);
10870
    }
10871
10872
    /**
10873
     * alias for "UTF8::to_latin1()"
10874
     *
10875
     * @see        UTF8::to_latin1()
10876
     *
10877
     * @param string|string[] $str
10878
     *
10879
     * @return string|string[]
10880
     *
10881
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10882
     */
10883 2
    public static function toLatin1($str)
10884
    {
10885 2
        return self::to_latin1($str);
10886
    }
10887
10888
    /**
10889
     * alias for "UTF8::to_utf8()"
10890
     *
10891
     * @see        UTF8::to_utf8()
10892
     *
10893
     * @param string|string[] $str
10894
     *
10895
     * @return string|string[]
10896
     *
10897
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10898
     */
10899 2
    public static function toUTF8($str)
10900
    {
10901 2
        return self::to_utf8($str);
10902
    }
10903
10904
    /**
10905
     * Convert a string into ASCII.
10906
     *
10907
     * @param string $str     <p>The input string.</p>
10908
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10909
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10910
     *                        performance</p>
10911
     *
10912
     * @return string
10913
     */
10914 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10915
    {
10916 38
        static $UTF8_TO_ASCII;
10917
10918 38
        if ($str === '') {
10919 3
            return '';
10920
        }
10921
10922
        // check if we only have ASCII, first (better performance)
10923 35
        if (self::is_ascii($str) === true) {
10924 9
            return $str;
10925
        }
10926
10927 28
        $str = self::clean(
10928 28
            $str,
10929 28
            true,
10930 28
            true,
10931 28
            true,
10932 28
            false,
10933 28
            true,
10934 28
            true
10935
        );
10936
10937
        // check again, if we only have ASCII, now ...
10938 28
        if (self::is_ascii($str) === true) {
10939 10
            return $str;
10940
        }
10941
10942
        if (
10943 19
            $strict === true
10944
            &&
10945 19
            self::$SUPPORT['intl'] === true
10946
        ) {
10947
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10948
            /** @noinspection PhpComposerExtensionStubsInspection */
10949
            /** @noinspection UnnecessaryCastingInspection */
10950 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10951
10952
            // check again, if we only have ASCII, now ...
10953 1
            if (self::is_ascii($str) === true) {
10954 1
                return $str;
10955
            }
10956
        }
10957
10958 19
        if (self::$ORD === null) {
10959
            self::$ORD = self::getData('ord');
10960
        }
10961
10962 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10963 19
        $chars = $ar[0];
10964 19
        $ord = null;
10965 19
        foreach ($chars as &$c) {
10966 19
            $ordC0 = self::$ORD[$c[0]];
10967
10968 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
10969 15
                continue;
10970
            }
10971
10972 19
            $ordC1 = self::$ORD[$c[1]];
10973
10974
            // ASCII - next please
10975 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
10976 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10977
            }
10978
10979 19
            if ($ordC0 >= 224) {
10980 8
                $ordC2 = self::$ORD[$c[2]];
10981
10982 8
                if ($ordC0 <= 239) {
10983 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10984
                }
10985
10986 8
                if ($ordC0 >= 240) {
10987 2
                    $ordC3 = self::$ORD[$c[3]];
10988
10989 2
                    if ($ordC0 <= 247) {
10990 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10991
                    }
10992
10993 2
                    if ($ordC0 >= 248) {
10994
                        $ordC4 = self::$ORD[$c[4]];
10995
10996
                        if ($ordC0 <= 251) {
10997
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10998
                        }
10999
11000
                        if ($ordC0 >= 252) {
11001
                            $ordC5 = self::$ORD[$c[5]];
11002
11003
                            if ($ordC0 <= 253) {
11004
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11005
                            }
11006
                        }
11007
                    }
11008
                }
11009
            }
11010
11011 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11012
                $c = $unknown;
11013
11014
                continue;
11015
            }
11016
11017 19
            if ($ord === null) {
11018
                $c = $unknown;
11019
11020
                continue;
11021
            }
11022
11023 19
            $bank = $ord >> 8;
11024 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11025 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11026 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11027 2
                    $UTF8_TO_ASCII[$bank] = [];
11028
                }
11029
            }
11030
11031 19
            $newchar = $ord & 255;
11032
11033
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11034 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11035
11036
                // keep for debugging
11037
                /*
11038
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11039
                echo "char: " . $c . "\n";
11040
                echo "ord: " . $ord . "\n";
11041
                echo "newchar: " . $newchar . "\n";
11042
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11043
                echo "bank:" . $bank . "\n\n";
11044
                 */
11045
11046 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11047
            } else {
11048
11049
                // keep for debugging missing chars
11050
                /*
11051
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11052
                echo "char: " . $c . "\n";
11053
                echo "ord: " . $ord . "\n";
11054
                echo "newchar: " . $newchar . "\n";
11055
                echo "bank:" . $bank . "\n\n";
11056
                 */
11057
11058 19
                $c = $unknown;
11059
            }
11060
        }
11061
11062 19
        return \implode('', $chars);
11063
    }
11064
11065
    /**
11066
     * @param mixed $str
11067
     *
11068
     * @return bool
11069
     */
11070 19
    public static function to_boolean($str): bool
11071
    {
11072
        // init
11073 19
        $str = (string) $str;
11074
11075 19
        if ($str === '') {
11076 2
            return false;
11077
        }
11078
11079
        // Info: http://php.net/manual/en/filter.filters.validate.php
11080
        $map = [
11081 17
            'true'  => true,
11082
            '1'     => true,
11083
            'on'    => true,
11084
            'yes'   => true,
11085
            'false' => false,
11086
            '0'     => false,
11087
            'off'   => false,
11088
            'no'    => false,
11089
        ];
11090
11091 17
        if (isset($map[$str])) {
11092 11
            return $map[$str];
11093
        }
11094
11095 6
        $key = \strtolower($str);
11096 6
        if (isset($map[$key])) {
11097 2
            return $map[$key];
11098
        }
11099
11100 4
        if (\is_numeric($str)) {
11101 2
            return ((float) $str + 0) > 0;
11102
        }
11103
11104 2
        return (bool) \trim($str);
11105
    }
11106
11107
    /**
11108
     * Convert given string to safe filename (and keep string case).
11109
     *
11110
     * @param string $string
11111
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11112
     *                                  simply replaced with hyphen.
11113
     * @param string $fallback_char
11114
     *
11115
     * @return string
11116
     */
11117 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11118
    {
11119 1
        if ($use_transliterate === true) {
11120 1
            $string = self::str_transliterate($string, $fallback_char);
11121
        }
11122
11123 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11124
11125 1
        $string = (string) \preg_replace(
11126
            [
11127 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11128 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
11129 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
11130
            ],
11131
            [
11132 1
                '',
11133 1
                $fallback_char,
11134 1
                $fallback_char,
11135
            ],
11136 1
            $string
11137
        );
11138
11139
        // trim "$fallback_char" from beginning and end of the string
11140 1
        return \trim($string, $fallback_char);
11141
    }
11142
11143
    /**
11144
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11145
     *
11146
     * @param string|string[] $str
11147
     *
11148
     * @return string|string[]
11149
     */
11150 8
    public static function to_iso8859($str)
11151
    {
11152 8
        if (\is_array($str) === true) {
11153 2
            foreach ($str as $k => &$v) {
11154 2
                $v = self::to_iso8859($v);
11155
            }
11156
11157 2
            return $str;
11158
        }
11159
11160 8
        $str = (string) $str;
11161 8
        if ($str === '') {
11162 2
            return '';
11163
        }
11164
11165 8
        return self::utf8_decode($str);
11166
    }
11167
11168
    /**
11169
     * alias for "UTF8::to_iso8859()"
11170
     *
11171
     * @see UTF8::to_iso8859()
11172
     *
11173
     * @param string|string[] $str
11174
     *
11175
     * @return string|string[]
11176
     */
11177 2
    public static function to_latin1($str)
11178
    {
11179 2
        return self::to_iso8859($str);
11180
    }
11181
11182
    /**
11183
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11184
     *
11185
     * <ul>
11186
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11187
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11188
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11189
     * case.</li>
11190
     * </ul>
11191
     *
11192
     * @param string|string[] $str                    <p>Any string or array.</p>
11193
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11194
     *
11195
     * @return string|string[] the UTF-8 encoded string
11196
     */
11197 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11198
    {
11199 37
        if (\is_array($str) === true) {
11200 4
            foreach ($str as $k => &$v) {
11201 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11202
            }
11203
11204 4
            return $str;
11205
        }
11206
11207 37
        $str = (string) $str;
11208 37
        if ($str === '') {
11209 6
            return $str;
11210
        }
11211
11212 37
        $max = \strlen($str);
11213 37
        $buf = '';
11214
11215 37
        for ($i = 0; $i < $max; ++$i) {
11216 37
            $c1 = $str[$i];
11217
11218 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11219
11220 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11221
11222 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11223
11224 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11225 16
                        $buf .= $c1 . $c2;
11226 16
                        ++$i;
11227
                    } else { // not valid UTF8 - convert it
11228 30
                        $buf .= self::to_utf8_convert_helper($c1);
11229
                    }
11230 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11231
11232 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11233 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11234
11235 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11236 14
                        $buf .= $c1 . $c2 . $c3;
11237 14
                        $i += 2;
11238
                    } else { // not valid UTF8 - convert it
11239 32
                        $buf .= self::to_utf8_convert_helper($c1);
11240
                    }
11241 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11242
11243 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11244 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11245 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11246
11247 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11248 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11249 8
                        $i += 3;
11250
                    } else { // not valid UTF8 - convert it
11251 25
                        $buf .= self::to_utf8_convert_helper($c1);
11252
                    }
11253
                } else { // doesn't look like UTF8, but should be converted
11254 33
                    $buf .= self::to_utf8_convert_helper($c1);
11255
                }
11256 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11257
11258 3
                $buf .= self::to_utf8_convert_helper($c1);
11259
            } else { // it doesn't need conversion
11260 34
                $buf .= $c1;
11261
            }
11262
        }
11263
11264
        // decode unicode escape sequences
11265 37
        $buf = \preg_replace_callback(
11266 37
            '/\\\\u([0-9a-f]{4})/i',
11267
            /**
11268
             * @param array $match
11269
             *
11270
             * @return string
11271
             */
11272
            static function (array $match): string {
11273 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
11274 37
            },
11275 37
            $buf
11276
        );
11277
11278 37
        if ($buf === null) {
11279
            return '';
11280
        }
11281
11282
        // decode UTF-8 codepoints
11283 37
        if ($decodeHtmlEntityToUtf8 === true) {
11284 2
            $buf = self::html_entity_decode($buf);
11285
        }
11286
11287 37
        return $buf;
11288
    }
11289
11290
    /**
11291
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11292
     *
11293
     * INFO: This is slower then "trim()"
11294
     *
11295
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11296
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11297
     *
11298
     * @param string      $str   <p>The string to be trimmed</p>
11299
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11300
     *
11301
     * @return string the trimmed string
11302
     */
11303 55
    public static function trim(string $str = '', string $chars = null): string
11304
    {
11305 55
        if ($str === '') {
11306 9
            return '';
11307
        }
11308
11309 48
        if ($chars) {
11310 27
            $chars = \preg_quote($chars, '/');
11311 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11312
        } else {
11313 21
            $pattern = "^[\s]+|[\s]+\$";
11314
        }
11315
11316 48
        if (self::$SUPPORT['mbstring'] === true) {
11317
            /** @noinspection PhpComposerExtensionStubsInspection */
11318 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11319
        }
11320
11321 8
        return self::regex_replace($str, $pattern, '', '', '/');
11322
    }
11323
11324
    /**
11325
     * Makes string's first char uppercase.
11326
     *
11327
     * @param string      $str                   <p>The input string.</p>
11328
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11329
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11330
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11331
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11332
     *
11333
     * @return string the resulting string
11334
     */
11335 69
    public static function ucfirst(
11336
        string $str,
11337
        string $encoding = 'UTF-8',
11338
        bool $cleanUtf8 = false,
11339
        string $lang = null,
11340
        bool $tryToKeepStringLength = false
11341
    ): string {
11342 69
        if ($str === '') {
11343 3
            return '';
11344
        }
11345
11346 68
        if ($cleanUtf8 === true) {
11347
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11348
            // if invalid characters are found in $haystack before $needle
11349 1
            $str = self::clean($str);
11350
        }
11351
11352 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11353
11354 68
        if ($encoding === 'UTF-8') {
11355 22
            $strPartTwo = (string) \mb_substr($str, 1);
11356
11357 22
            if ($useMbFunction === true) {
11358 22
                $strPartOne = \mb_strtoupper(
11359 22
                    (string) \mb_substr($str, 0, 1)
11360
                );
11361
            } else {
11362
                $strPartOne = self::strtoupper(
11363
                    (string) \mb_substr($str, 0, 1),
11364
                    $encoding,
11365
                    false,
11366
                    $lang,
11367 22
                    $tryToKeepStringLength
11368
                );
11369
            }
11370
        } else {
11371 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11372
11373 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11374
11375 47
            if ($useMbFunction === true) {
11376 47
                $strPartOne = \mb_strtoupper(
11377 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11378 47
                    $encoding
11379
                );
11380
            } else {
11381
                $strPartOne = self::strtoupper(
11382
                    (string) self::substr($str, 0, 1, $encoding),
11383
                    $encoding,
11384
                    false,
11385
                    $lang,
11386
                    $tryToKeepStringLength
11387
                );
11388
            }
11389
        }
11390
11391 68
        return $strPartOne . $strPartTwo;
11392
    }
11393
11394
    /**
11395
     * alias for "UTF8::ucfirst()"
11396
     *
11397
     * @see UTF8::ucfirst()
11398
     *
11399
     * @param string $str
11400
     * @param string $encoding
11401
     * @param bool   $cleanUtf8
11402
     *
11403
     * @return string
11404
     */
11405 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11406
    {
11407 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11408
    }
11409
11410
    /**
11411
     * Uppercase for all words in the string.
11412
     *
11413
     * @param string   $str        <p>The input string.</p>
11414
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11415
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11416
     *                             word.</p>
11417
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11418
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11419
     *
11420
     * @return string
11421
     */
11422 8
    public static function ucwords(
11423
        string $str,
11424
        array $exceptions = [],
11425
        string $charlist = '',
11426
        string $encoding = 'UTF-8',
11427
        bool $cleanUtf8 = false
11428
    ): string {
11429 8
        if (!$str) {
11430 2
            return '';
11431
        }
11432
11433
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11434
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11435
11436 7
        if ($cleanUtf8 === true) {
11437
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11438
            // if invalid characters are found in $haystack before $needle
11439 1
            $str = self::clean($str);
11440
        }
11441
11442 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11443
11444
        if (
11445 7
            $usePhpDefaultFunctions === true
11446
            &&
11447 7
            self::is_ascii($str) === true
11448
        ) {
11449
            return \ucwords($str);
11450
        }
11451
11452 7
        $words = self::str_to_words($str, $charlist);
11453 7
        $useExceptions = \count($exceptions) > 0;
11454
11455 7
        foreach ($words as &$word) {
11456 7
            if (!$word) {
11457 7
                continue;
11458
            }
11459
11460
            if (
11461 7
                $useExceptions === false
11462
                ||
11463 7
                !\in_array($word, $exceptions, true)
11464
            ) {
11465 7
                $word = self::ucfirst($word, $encoding);
11466
            }
11467
        }
11468
11469 7
        return \implode('', $words);
11470
    }
11471
11472
    /**
11473
     * Multi decode html entity & fix urlencoded-win1252-chars.
11474
     *
11475
     * e.g:
11476
     * 'test+test'                     => 'test test'
11477
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11478
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11479
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11480
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11481
     * 'Düsseldorf'                   => 'Düsseldorf'
11482
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11483
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11484
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11485
     *
11486
     * @param string $str          <p>The input string.</p>
11487
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11488
     *
11489
     * @return string
11490
     */
11491 2
    public static function urldecode(string $str, bool $multi_decode = true): string
11492
    {
11493 2
        if ($str === '') {
11494 2
            return '';
11495
        }
11496
11497 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
11498 2
        if (\preg_match($pattern, $str)) {
11499 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11500
        }
11501
11502 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11503
11504 2
        if ($multi_decode === true) {
11505
            do {
11506 2
                $str_compare = $str;
11507
11508
                /**
11509
                 * @psalm-suppress PossiblyInvalidArgument
11510
                 */
11511 2
                $str = self::fix_simple_utf8(
11512 2
                    \urldecode(
11513 2
                        self::html_entity_decode(
11514 2
                            self::to_utf8($str),
11515 2
                            $flags
11516
                        )
11517
                    )
11518
                );
11519 2
            } while ($str_compare !== $str);
11520
        }
11521
11522 2
        return $str;
11523
    }
11524
11525
    /**
11526
     * Return a array with "urlencoded"-win1252 -> UTF-8
11527
     *
11528
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11529
     *
11530
     * @return string[]
11531
     */
11532 2
    public static function urldecode_fix_win1252_chars(): array
11533
    {
11534
        return [
11535 2
            '%20' => ' ',
11536
            '%21' => '!',
11537
            '%22' => '"',
11538
            '%23' => '#',
11539
            '%24' => '$',
11540
            '%25' => '%',
11541
            '%26' => '&',
11542
            '%27' => "'",
11543
            '%28' => '(',
11544
            '%29' => ')',
11545
            '%2A' => '*',
11546
            '%2B' => '+',
11547
            '%2C' => ',',
11548
            '%2D' => '-',
11549
            '%2E' => '.',
11550
            '%2F' => '/',
11551
            '%30' => '0',
11552
            '%31' => '1',
11553
            '%32' => '2',
11554
            '%33' => '3',
11555
            '%34' => '4',
11556
            '%35' => '5',
11557
            '%36' => '6',
11558
            '%37' => '7',
11559
            '%38' => '8',
11560
            '%39' => '9',
11561
            '%3A' => ':',
11562
            '%3B' => ';',
11563
            '%3C' => '<',
11564
            '%3D' => '=',
11565
            '%3E' => '>',
11566
            '%3F' => '?',
11567
            '%40' => '@',
11568
            '%41' => 'A',
11569
            '%42' => 'B',
11570
            '%43' => 'C',
11571
            '%44' => 'D',
11572
            '%45' => 'E',
11573
            '%46' => 'F',
11574
            '%47' => 'G',
11575
            '%48' => 'H',
11576
            '%49' => 'I',
11577
            '%4A' => 'J',
11578
            '%4B' => 'K',
11579
            '%4C' => 'L',
11580
            '%4D' => 'M',
11581
            '%4E' => 'N',
11582
            '%4F' => 'O',
11583
            '%50' => 'P',
11584
            '%51' => 'Q',
11585
            '%52' => 'R',
11586
            '%53' => 'S',
11587
            '%54' => 'T',
11588
            '%55' => 'U',
11589
            '%56' => 'V',
11590
            '%57' => 'W',
11591
            '%58' => 'X',
11592
            '%59' => 'Y',
11593
            '%5A' => 'Z',
11594
            '%5B' => '[',
11595
            '%5C' => '\\',
11596
            '%5D' => ']',
11597
            '%5E' => '^',
11598
            '%5F' => '_',
11599
            '%60' => '`',
11600
            '%61' => 'a',
11601
            '%62' => 'b',
11602
            '%63' => 'c',
11603
            '%64' => 'd',
11604
            '%65' => 'e',
11605
            '%66' => 'f',
11606
            '%67' => 'g',
11607
            '%68' => 'h',
11608
            '%69' => 'i',
11609
            '%6A' => 'j',
11610
            '%6B' => 'k',
11611
            '%6C' => 'l',
11612
            '%6D' => 'm',
11613
            '%6E' => 'n',
11614
            '%6F' => 'o',
11615
            '%70' => 'p',
11616
            '%71' => 'q',
11617
            '%72' => 'r',
11618
            '%73' => 's',
11619
            '%74' => 't',
11620
            '%75' => 'u',
11621
            '%76' => 'v',
11622
            '%77' => 'w',
11623
            '%78' => 'x',
11624
            '%79' => 'y',
11625
            '%7A' => 'z',
11626
            '%7B' => '{',
11627
            '%7C' => '|',
11628
            '%7D' => '}',
11629
            '%7E' => '~',
11630
            '%7F' => '',
11631
            '%80' => '`',
11632
            '%81' => '',
11633
            '%82' => '‚',
11634
            '%83' => 'ƒ',
11635
            '%84' => '„',
11636
            '%85' => '…',
11637
            '%86' => '†',
11638
            '%87' => '‡',
11639
            '%88' => 'ˆ',
11640
            '%89' => '‰',
11641
            '%8A' => 'Š',
11642
            '%8B' => '‹',
11643
            '%8C' => 'Œ',
11644
            '%8D' => '',
11645
            '%8E' => 'Ž',
11646
            '%8F' => '',
11647
            '%90' => '',
11648
            '%91' => '‘',
11649
            '%92' => '’',
11650
            '%93' => '“',
11651
            '%94' => '”',
11652
            '%95' => '•',
11653
            '%96' => '–',
11654
            '%97' => '—',
11655
            '%98' => '˜',
11656
            '%99' => '™',
11657
            '%9A' => 'š',
11658
            '%9B' => '›',
11659
            '%9C' => 'œ',
11660
            '%9D' => '',
11661
            '%9E' => 'ž',
11662
            '%9F' => 'Ÿ',
11663
            '%A0' => '',
11664
            '%A1' => '¡',
11665
            '%A2' => '¢',
11666
            '%A3' => '£',
11667
            '%A4' => '¤',
11668
            '%A5' => '¥',
11669
            '%A6' => '¦',
11670
            '%A7' => '§',
11671
            '%A8' => '¨',
11672
            '%A9' => '©',
11673
            '%AA' => 'ª',
11674
            '%AB' => '«',
11675
            '%AC' => '¬',
11676
            '%AD' => '',
11677
            '%AE' => '®',
11678
            '%AF' => '¯',
11679
            '%B0' => '°',
11680
            '%B1' => '±',
11681
            '%B2' => '²',
11682
            '%B3' => '³',
11683
            '%B4' => '´',
11684
            '%B5' => 'µ',
11685
            '%B6' => '¶',
11686
            '%B7' => '·',
11687
            '%B8' => '¸',
11688
            '%B9' => '¹',
11689
            '%BA' => 'º',
11690
            '%BB' => '»',
11691
            '%BC' => '¼',
11692
            '%BD' => '½',
11693
            '%BE' => '¾',
11694
            '%BF' => '¿',
11695
            '%C0' => 'À',
11696
            '%C1' => 'Á',
11697
            '%C2' => 'Â',
11698
            '%C3' => 'Ã',
11699
            '%C4' => 'Ä',
11700
            '%C5' => 'Å',
11701
            '%C6' => 'Æ',
11702
            '%C7' => 'Ç',
11703
            '%C8' => 'È',
11704
            '%C9' => 'É',
11705
            '%CA' => 'Ê',
11706
            '%CB' => 'Ë',
11707
            '%CC' => 'Ì',
11708
            '%CD' => 'Í',
11709
            '%CE' => 'Î',
11710
            '%CF' => 'Ï',
11711
            '%D0' => 'Ð',
11712
            '%D1' => 'Ñ',
11713
            '%D2' => 'Ò',
11714
            '%D3' => 'Ó',
11715
            '%D4' => 'Ô',
11716
            '%D5' => 'Õ',
11717
            '%D6' => 'Ö',
11718
            '%D7' => '×',
11719
            '%D8' => 'Ø',
11720
            '%D9' => 'Ù',
11721
            '%DA' => 'Ú',
11722
            '%DB' => 'Û',
11723
            '%DC' => 'Ü',
11724
            '%DD' => 'Ý',
11725
            '%DE' => 'Þ',
11726
            '%DF' => 'ß',
11727
            '%E0' => 'à',
11728
            '%E1' => 'á',
11729
            '%E2' => 'â',
11730
            '%E3' => 'ã',
11731
            '%E4' => 'ä',
11732
            '%E5' => 'å',
11733
            '%E6' => 'æ',
11734
            '%E7' => 'ç',
11735
            '%E8' => 'è',
11736
            '%E9' => 'é',
11737
            '%EA' => 'ê',
11738
            '%EB' => 'ë',
11739
            '%EC' => 'ì',
11740
            '%ED' => 'í',
11741
            '%EE' => 'î',
11742
            '%EF' => 'ï',
11743
            '%F0' => 'ð',
11744
            '%F1' => 'ñ',
11745
            '%F2' => 'ò',
11746
            '%F3' => 'ó',
11747
            '%F4' => 'ô',
11748
            '%F5' => 'õ',
11749
            '%F6' => 'ö',
11750
            '%F7' => '÷',
11751
            '%F8' => 'ø',
11752
            '%F9' => 'ù',
11753
            '%FA' => 'ú',
11754
            '%FB' => 'û',
11755
            '%FC' => 'ü',
11756
            '%FD' => 'ý',
11757
            '%FE' => 'þ',
11758
            '%FF' => 'ÿ',
11759
        ];
11760
    }
11761
11762
    /**
11763
     * Decodes an UTF-8 string to ISO-8859-1.
11764
     *
11765
     * @param string $str           <p>The input string.</p>
11766
     * @param bool   $keepUtf8Chars
11767
     *
11768
     * @return string
11769
     */
11770 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11771
    {
11772 14
        if ($str === '') {
11773 5
            return '';
11774
        }
11775
11776 14
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11777 14
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11778
11779 14
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11780 1
            if (self::$WIN1252_TO_UTF8 === null) {
11781
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11782
            }
11783
11784 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11785 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11786
        }
11787
11788 14
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11789
11790
        // save for later comparision
11791 14
        $str_backup = $str;
11792 14
        $len = \strlen($str);
11793
11794 14
        if (self::$ORD === null) {
11795
            self::$ORD = self::getData('ord');
11796
        }
11797
11798 14
        if (self::$CHR === null) {
11799
            self::$CHR = self::getData('chr');
11800
        }
11801
11802 14
        $noCharFound = '?';
11803
        /** @noinspection ForeachInvariantsInspection */
11804 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11805 14
            switch ($str[$i] & "\xF0") {
11806 14
                case "\xC0":
11807 13
                case "\xD0":
11808 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11809 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11810
11811 13
                    break;
11812
11813
                /** @noinspection PhpMissingBreakStatementInspection */
11814 13
                case "\xF0":
11815
                    ++$i;
11816
11817
                // no break
11818
11819 13
                case "\xE0":
11820 11
                    $str[$j] = $noCharFound;
11821 11
                    $i += 2;
11822
11823 11
                    break;
11824
11825
                default:
11826 12
                    $str[$j] = $str[$i];
11827
            }
11828
        }
11829
11830 14
        $return = \substr($str, 0, $j);
11831 14
        if ($return === false) {
11832
            $return = '';
11833
        }
11834
11835
        if (
11836 14
            $keepUtf8Chars === true
11837
            &&
11838 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11839
        ) {
11840 2
            return $str_backup;
11841
        }
11842
11843 14
        return $return;
11844
    }
11845
11846
    /**
11847
     * Encodes an ISO-8859-1 string to UTF-8.
11848
     *
11849
     * @param string $str <p>The input string.</p>
11850
     *
11851
     * @return string
11852
     */
11853 14
    public static function utf8_encode(string $str): string
11854
    {
11855 14
        if ($str === '') {
11856 13
            return '';
11857
        }
11858
11859 14
        $str = \utf8_encode($str);
11860
11861
        // the polyfill maybe return false
11862
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11863
        /** @psalm-suppress TypeDoesNotContainType */
11864 14
        if ($str === false) {
11865
            return '';
11866
        }
11867
11868 14
        if (\strpos($str, "\xC2") === false) {
11869 6
            return $str;
11870
        }
11871
11872 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11873 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11874
11875 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11876 1
            if (self::$WIN1252_TO_UTF8 === null) {
11877
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11878
            }
11879
11880 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11881 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11882
        }
11883
11884 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11885
    }
11886
11887
    /**
11888
     * fix -> utf8-win1252 chars
11889
     *
11890
     * @param string $str <p>The input string.</p>
11891
     *
11892
     * @return string
11893
     *
11894
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11895
     */
11896 2
    public static function utf8_fix_win1252_chars(string $str): string
11897
    {
11898 2
        return self::fix_simple_utf8($str);
11899
    }
11900
11901
    /**
11902
     * Returns an array with all utf8 whitespace characters.
11903
     *
11904
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11905
     *
11906
     * @author: Derek E. [email protected]
11907
     *
11908
     * @return string[]
11909
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
11910
     *                  as defined in above URL
11911
     */
11912 2
    public static function whitespace_table(): array
11913
    {
11914 2
        return self::$WHITESPACE_TABLE;
11915
    }
11916
11917
    /**
11918
     * Limit the number of words in a string.
11919
     *
11920
     * @param string $str      <p>The input string.</p>
11921
     * @param int    $limit    <p>The limit of words as integer.</p>
11922
     * @param string $strAddOn <p>Replacement for the striped string.</p>
11923
     *
11924
     * @return string
11925
     */
11926 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11927
    {
11928 2
        if ($str === '' || $limit < 1) {
11929 2
            return '';
11930
        }
11931
11932 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11933
11934
        if (
11935 2
            !isset($matches[0])
11936
            ||
11937 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
11938
        ) {
11939 2
            return $str;
11940
        }
11941
11942 2
        return \rtrim($matches[0]) . $strAddOn;
11943
    }
11944
11945
    /**
11946
     * Wraps a string to a given number of characters
11947
     *
11948
     * @see  http://php.net/manual/en/function.wordwrap.php
11949
     *
11950
     * @param string $str   <p>The input string.</p>
11951
     * @param int    $width [optional] <p>The column width.</p>
11952
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11953
     * @param bool   $cut   [optional] <p>
11954
     *                      If the cut is set to true, the string is
11955
     *                      always wrapped at or before the specified width. So if you have
11956
     *                      a word that is larger than the given width, it is broken apart.
11957
     *                      </p>
11958
     *
11959
     * @return string
11960
     *                <p>The given string wrapped at the specified column.</p>
11961
     */
11962 10
    public static function wordwrap(
11963
        string $str,
11964
        int $width = 75,
11965
        string $break = "\n",
11966
        bool $cut = false
11967
    ): string {
11968 10
        if ($str === '' || $break === '') {
11969 3
            return '';
11970
        }
11971
11972 8
        $w = '';
11973 8
        $strSplit = \explode($break, $str);
11974 8
        if ($strSplit === false) {
11975
            return '';
11976
        }
11977 8
        $chars = [];
11978
11979 8
        foreach ($strSplit as $i => $iValue) {
11980 8
            if ($i) {
11981 1
                $chars[] = $break;
11982 1
                $w .= '#';
11983
            }
11984
11985 8
            $c = $iValue;
11986 8
            unset($strSplit[$i]);
11987
11988 8
            foreach (self::str_split($c) as $c) {
11989 8
                $chars[] = $c;
11990 8
                $w .= $c === ' ' ? ' ' : '?';
11991
            }
11992
        }
11993
11994 8
        $strReturn = '';
11995 8
        $j = 0;
11996 8
        $b = $i = -1;
11997 8
        $w = \wordwrap($w, $width, '#', $cut);
11998
11999 8
        while (false !== $b = \mb_strpos($w, '#', $b + 1)) {
12000 6
            for (++$i; $i < $b; ++$i) {
12001 6
                $strReturn .= $chars[$j];
12002 6
                unset($chars[$j++]);
12003
            }
12004
12005 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
12006 3
                unset($chars[$j++]);
12007
            }
12008
12009 6
            $strReturn .= $break;
12010
        }
12011
12012 8
        return $strReturn . \implode('', $chars);
12013
    }
12014
12015
    /**
12016
     * Line-Wrap the string after $limit, but also after the next word.
12017
     *
12018
     * @param string $str
12019
     * @param int    $limit
12020
     *
12021
     * @return string
12022
     */
12023 1
    public static function wordwrap_per_line(string $str, int $limit): string
12024
    {
12025 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12026
12027 1
        $string = '';
12028 1
        foreach ($strings as &$value) {
12029 1
            if ($value === false) {
12030
                continue;
12031
            }
12032
12033 1
            $string .= \wordwrap($value, $limit);
12034 1
            $string .= "\n";
12035
        }
12036
12037 1
        return $string;
12038
    }
12039
12040
    /**
12041
     * Returns an array of Unicode White Space characters.
12042
     *
12043
     * @return string[] an array with numeric code point as key and White Space Character as value
12044
     */
12045 2
    public static function ws(): array
12046
    {
12047 2
        return self::$WHITESPACE;
12048
    }
12049
12050
    /**
12051
     * @param string $str
12052
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12053
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12054
     *
12055
     * @return string
12056
     */
12057 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12058
    {
12059 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12060 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12061
12062 33
        if ($useLower === true) {
12063 2
            $str = \str_replace(
12064 2
                $upper,
12065 2
                $lower,
12066 2
                $str
12067
            );
12068
        } else {
12069 31
            $str = \str_replace(
12070 31
                $lower,
12071 31
                $upper,
12072 31
                $str
12073
            );
12074
        }
12075
12076 33
        if ($fullCaseFold) {
12077 31
            static $FULL_CASE_FOLD = null;
12078 31
            if ($FULL_CASE_FOLD === null) {
12079 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12080
            }
12081
12082 31
            if ($useLower === true) {
12083 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12084
            } else {
12085 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12086
            }
12087
        }
12088
12089 33
        return $str;
12090
    }
12091
12092
    /**
12093
     * get data from "/data/*.php"
12094
     *
12095
     * @param string $file
12096
     *
12097
     * @return mixed
12098
     */
12099 4
    private static function getData(string $file)
12100
    {
12101
        /** @noinspection PhpIncludeInspection */
12102
        /** @noinspection UsingInclusionReturnValueInspection */
12103
        /** @psalm-suppress UnresolvableInclude */
12104 4
        return include __DIR__ . '/data/' . $file . '.php';
12105
    }
12106
12107
    /**
12108
     * get data from "/data/*.php"
12109
     *
12110
     * @param string $file
12111
     *
12112
     * @return false|mixed will return false on error
12113
     */
12114 9
    private static function getDataIfExists(string $file)
12115
    {
12116 9
        $file = __DIR__ . '/data/' . $file . '.php';
12117 9
        if (\file_exists($file)) {
12118
            /** @noinspection PhpIncludeInspection */
12119
            /** @noinspection UsingInclusionReturnValueInspection */
12120 8
            return include $file;
12121
        }
12122
12123 2
        return false;
12124
    }
12125
12126
    /**
12127
     * Checks whether mbstring "overloaded" is active on the server.
12128
     *
12129
     * @return bool
12130
     */
12131
    private static function mbstring_overloaded(): bool
12132
    {
12133
        /**
12134
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12135
         */
12136
12137
        /** @noinspection PhpComposerExtensionStubsInspection */
12138
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12139
        return \defined('MB_OVERLOAD_STRING')
12140
               &&
12141
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12142
    }
12143
12144
    /**
12145
     * @param array $strings
12146
     * @param bool  $removeEmptyValues
12147
     * @param int   $removeShortValues
12148
     *
12149
     * @return array
12150
     */
12151 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12152
    {
12153
        // init
12154 2
        $return = [];
12155
12156 2
        foreach ($strings as &$str) {
12157
            if (
12158 2
                $removeShortValues !== null
12159
                &&
12160 2
                \mb_strlen($str) <= $removeShortValues
12161
            ) {
12162 2
                continue;
12163
            }
12164
12165
            if (
12166 2
                $removeEmptyValues === true
12167
                &&
12168 2
                \trim($str) === ''
12169
            ) {
12170 2
                continue;
12171
            }
12172
12173 2
            $return[] = $str;
12174
        }
12175
12176 2
        return $return;
12177
    }
12178
12179
    /**
12180
     * rxClass
12181
     *
12182
     * @param string $s
12183
     * @param string $class
12184
     *
12185
     * @return string
12186
     */
12187 33
    private static function rxClass(string $s, string $class = ''): string
12188
    {
12189 33
        static $RX_CLASSS_CACHE = [];
12190
12191 33
        $cacheKey = $s . $class;
12192
12193 33
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
12194 21
            return $RX_CLASSS_CACHE[$cacheKey];
12195
        }
12196
12197 16
        $class = [$class];
12198
12199
        /** @noinspection SuspiciousLoopInspection */
12200
        /** @noinspection AlterInForeachInspection */
12201 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12202 15
            if ($s === '-') {
12203
                $class[0] = '-' . $class[0];
12204 15
            } elseif (!isset($s[2])) {
12205 15
                $class[0] .= \preg_quote($s, '/');
12206 1
            } elseif (self::strlen($s) === 1) {
12207 1
                $class[0] .= $s;
12208
            } else {
12209 15
                $class[] = $s;
12210
            }
12211
        }
12212
12213 16
        if ($class[0]) {
12214 16
            $class[0] = '[' . $class[0] . ']';
12215
        }
12216
12217 16
        if (\count($class) === 1) {
12218 16
            $return = $class[0];
12219
        } else {
12220
            $return = '(?:' . \implode('|', $class) . ')';
12221
        }
12222
12223 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
12224
12225 16
        return $return;
12226
    }
12227
12228
    /**
12229
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12230
     *
12231
     * @param string $names
12232
     * @param string $delimiter
12233
     * @param string $encoding
12234
     *
12235
     * @return string
12236
     */
12237 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12238
    {
12239
        // init
12240 1
        $namesArray = \explode($delimiter, $names);
12241
12242 1
        if ($namesArray === false) {
12243
            return '';
12244
        }
12245
12246
        $specialCases = [
12247 1
            'names' => [
12248
                'ab',
12249
                'af',
12250
                'al',
12251
                'and',
12252
                'ap',
12253
                'bint',
12254
                'binte',
12255
                'da',
12256
                'de',
12257
                'del',
12258
                'den',
12259
                'der',
12260
                'di',
12261
                'dit',
12262
                'ibn',
12263
                'la',
12264
                'mac',
12265
                'nic',
12266
                'of',
12267
                'ter',
12268
                'the',
12269
                'und',
12270
                'van',
12271
                'von',
12272
                'y',
12273
                'zu',
12274
            ],
12275
            'prefixes' => [
12276
                'al-',
12277
                "d'",
12278
                'ff',
12279
                "l'",
12280
                'mac',
12281
                'mc',
12282
                'nic',
12283
            ],
12284
        ];
12285
12286 1
        foreach ($namesArray as &$name) {
12287 1
            if (\in_array($name, $specialCases['names'], true)) {
12288 1
                continue;
12289
            }
12290
12291 1
            $continue = false;
12292
12293 1
            if ($delimiter === '-') {
12294 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12295 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12296 1
                        $continue = true;
12297
                    }
12298
                }
12299 1
                unset($beginning);
12300
            }
12301
12302 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12303 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12304 1
                    $continue = true;
12305
                }
12306
            }
12307 1
            unset($beginning);
12308
12309 1
            if ($continue === true) {
12310 1
                continue;
12311
            }
12312
12313 1
            $name = self::ucfirst($name);
12314
        }
12315
12316 1
        return \implode($delimiter, $namesArray);
12317
    }
12318
12319
    /**
12320
     * Generic case sensitive transformation for collation matching.
12321
     *
12322
     * @param string $str <p>The input string</p>
12323
     *
12324
     * @return string|null
12325
     */
12326 6
    private static function strtonatfold(string $str)
12327
    {
12328 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
12329
    }
12330
12331
    /**
12332
     * @param int|string $input
12333
     *
12334
     * @return string
12335
     */
12336 29
    private static function to_utf8_convert_helper($input): string
12337
    {
12338
        // init
12339 29
        $buf = '';
12340
12341 29
        if (self::$ORD === null) {
12342 1
            self::$ORD = self::getData('ord');
12343
        }
12344
12345 29
        if (self::$CHR === null) {
12346 1
            self::$CHR = self::getData('chr');
12347
        }
12348
12349 29
        if (self::$WIN1252_TO_UTF8 === null) {
12350 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12351
        }
12352
12353 29
        $ordC1 = self::$ORD[$input];
12354 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12355 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12356
        } else {
12357 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12358 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12359 1
            $buf .= $cc1 . $cc2;
12360
        }
12361
12362 29
        return $buf;
12363
    }
12364
}
12365