Passed
Push — master ( a67eb4...74eb37 )
by Lars
15:23
created

UTF8::is_binary()   C

Complexity

Conditions 13
Paths 26

Size

Total Lines 49
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 23
CRAP Score 13.2597

Importance

Changes 0
Metric Value
cc 13
eloc 27
nc 26
nop 2
dl 0
loc 49
ccs 23
cts 26
cp 0.8846
crap 13.2597
rs 6.6166
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Changes all keys in an array.
269
     *
270
     * @param array $array <p>The array to work on</p>
271
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
272
     *                     or <strong>CASE_LOWER</strong> (default)</p>
273
     *
274
     * @return string[] an array with its keys lower or uppercased
275
     */
276 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
277
    {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower($key)
290 2
                : self::strtoupper($key);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
312
    {
313 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
314 16
        if ($posStart === false) {
315 2
            return '';
316
        }
317
318 14
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
319 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
320
        if (
321 14
            $posEnd === false
322
            ||
323 14
            $posEnd === $substrIndex
324
        ) {
325 4
            return '';
326
        }
327
328 10
        return (string) self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
329
    }
330
331
    /**
332
     * Convert binary into an string.
333
     *
334
     * @param mixed $bin 1|0
335
     *
336
     * @return string
337
     */
338 2
    public static function binary_to_str($bin): string
339
    {
340 2
        if (!isset($bin[0])) {
341
            return '';
342
        }
343
344 2
        $convert = \base_convert($bin, 2, 16);
345 2
        if ($convert === '0') {
346 1
            return '';
347
        }
348
349 2
        return \pack('H*', $convert);
350
    }
351
352
    /**
353
     * Returns the UTF-8 Byte Order Mark Character.
354
     *
355
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
356
     *
357
     * @return string UTF-8 Byte Order Mark
358
     */
359 4
    public static function bom(): string
360
    {
361 4
        return "\xef\xbb\xbf";
362
    }
363
364
    /**
365
     * @alias of UTF8::chr_map()
366
     *
367
     * @see   UTF8::chr_map()
368
     *
369
     * @param array|string $callback
370
     * @param string       $str
371
     *
372
     * @return string[]
373
     */
374 2
    public static function callback($callback, string $str): array
375
    {
376 2
        return self::chr_map($callback, $str);
377
    }
378
379
    /**
380
     * Returns the character at $index, with indexes starting at 0.
381
     *
382
     * @param string $str
383
     * @param int    $index    <p>Position of the character.</p>
384
     * @param string $encoding [optional] <p>Default is UTF-8</p>
385
     *
386
     * @return string the character at $index
387
     */
388 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
389
    {
390 9
        return (string) self::substr($str, $index, 1, $encoding);
391
    }
392
393
    /**
394
     * Returns an array consisting of the characters in the string.
395
     *
396
     * @param string $str <p>The input string.</p>
397
     *
398
     * @return string[] an array of chars
399
     */
400 3
    public static function chars(string $str): array
401
    {
402 3
        return self::str_split($str, 1);
403
    }
404
405
    /**
406
     * This method will auto-detect your server environment for UTF-8 support.
407
     *
408
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
409
     */
410 37
    public static function checkForSupport()
411
    {
412 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
413
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
414
415
            // http://php.net/manual/en/book.mbstring.php
416
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
417
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
418
419
            // http://php.net/manual/en/book.iconv.php
420
            self::$SUPPORT['iconv'] = self::iconv_loaded();
421
422
            // http://php.net/manual/en/book.intl.php
423
            self::$SUPPORT['intl'] = self::intl_loaded();
424
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
425
426
            if (
427
                self::$SUPPORT['intl'] === true
428
                &&
429
                \function_exists('transliterator_list_ids') === true
430
            ) {
431
                /** @noinspection PhpComposerExtensionStubsInspection */
432
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
433
            }
434
435
            // http://php.net/manual/en/class.intlchar.php
436
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
437
438
            // http://php.net/manual/en/book.ctype.php
439
            self::$SUPPORT['ctype'] = self::ctype_loaded();
440
441
            // http://php.net/manual/en/class.finfo.php
442
            self::$SUPPORT['finfo'] = self::finfo_loaded();
443
444
            // http://php.net/manual/en/book.json.php
445
            self::$SUPPORT['json'] = self::json_loaded();
446
447
            // http://php.net/manual/en/book.pcre.php
448
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
449
450
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
451
        }
452 37
    }
453
454
    /**
455
     * Generates a UTF-8 encoded character from the given code point.
456
     *
457
     * INFO: opposite to UTF8::ord()
458
     *
459
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
460
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
461
     *
462
     * @return string|null multi-byte character, returns null on failure or empty input
463
     */
464 17
    public static function chr($code_point, string $encoding = 'UTF-8')
465
    {
466
        // init
467 17
        static $CHAR_CACHE = [];
468
469 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
470
            self::checkForSupport();
471
        }
472
473 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
474 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
475
        }
476
477
        if (
478 17
            $encoding !== 'UTF-8'
479
            &&
480 17
            $encoding !== 'ISO-8859-1'
481
            &&
482 17
            $encoding !== 'WINDOWS-1252'
483
            &&
484 17
            self::$SUPPORT['mbstring'] === false
485
        ) {
486
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
487
        }
488
489 17
        $cacheKey = $code_point . $encoding;
490 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
491 16
            return $CHAR_CACHE[$cacheKey];
492
        }
493
494 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
495
496 10
            if (self::$CHR === null) {
497
                $chrTmp = self::getData('chr');
498
                if ($chrTmp) {
499
                    self::$CHR = (array) $chrTmp;
500
                }
501
            }
502
503
            /**
504
             * @psalm-suppress PossiblyNullArrayAccess
505
             */
506 10
            $chr = self::$CHR[$code_point];
507
508 10
            if ($encoding !== 'UTF-8') {
509 1
                $chr = self::encode($encoding, $chr);
510
            }
511
512 10
            return $CHAR_CACHE[$cacheKey] = $chr;
513
        }
514
515 7
        if (self::$SUPPORT['intlChar'] === true) {
516
            /** @noinspection PhpComposerExtensionStubsInspection */
517 7
            $chr = \IntlChar::chr($code_point);
518
519 7
            if ($encoding !== 'UTF-8') {
520
                $chr = self::encode($encoding, $chr);
521
            }
522
523 7
            return $CHAR_CACHE[$cacheKey] = $chr;
524
        }
525
526
        if (self::$CHR === null) {
527
            $chrTmp = self::getData('chr');
528
            if ($chrTmp) {
529
                self::$CHR = (array) $chrTmp;
530
            }
531
        }
532
533
        $code_point = (int) $code_point;
534
        if ($code_point <= 0x7F) {
535
            /**
536
             * @psalm-suppress PossiblyNullArrayAccess
537
             */
538
            $chr = self::$CHR[$code_point];
539
        } elseif ($code_point <= 0x7FF) {
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
544
                   self::$CHR[($code_point & 0x3F) + 0x80];
545
        } elseif ($code_point <= 0xFFFF) {
546
            /**
547
             * @psalm-suppress PossiblyNullArrayAccess
548
             */
549
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
550
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
551
                   self::$CHR[($code_point & 0x3F) + 0x80];
552
        } else {
553
            /**
554
             * @psalm-suppress PossiblyNullArrayAccess
555
             */
556
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
557
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
558
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
559
                   self::$CHR[($code_point & 0x3F) + 0x80];
560
        }
561
562
        if ($encoding !== 'UTF-8') {
563
            $chr = self::encode($encoding, $chr);
564
        }
565
566
        return $CHAR_CACHE[$cacheKey] = $chr;
567
    }
568
569
    /**
570
     * Applies callback to all characters of a string.
571
     *
572
     * @param array|string $callback <p>The callback function.</p>
573
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
574
     *
575
     * @return string[] the outcome of callback
576
     */
577 2
    public static function chr_map($callback, string $str): array
578
    {
579 2
        $chars = self::split($str);
580
581 2
        return \array_map($callback, $chars);
582
    }
583
584
    /**
585
     * Generates an array of byte length of each character of a Unicode string.
586
     *
587
     * 1 byte => U+0000  - U+007F
588
     * 2 byte => U+0080  - U+07FF
589
     * 3 byte => U+0800  - U+FFFF
590
     * 4 byte => U+10000 - U+10FFFF
591
     *
592
     * @param string $str <p>The original unicode string.</p>
593
     *
594
     * @return int[] an array of byte lengths of each character
595
     */
596 4
    public static function chr_size_list(string $str): array
597
    {
598 4
        if ($str === '') {
599 4
            return [];
600
        }
601
602 4
        $strSplit = self::split($str);
603
604 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
605
            self::checkForSupport();
606
        }
607
608 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
609
            return \array_map(
610
                static function (string $data): int {
611
                    return self::strlen_in_byte($data);
612
                },
613
                $strSplit
614
            );
615
        }
616
617 4
        return \array_map('\strlen', $strSplit);
618
    }
619
620
    /**
621
     * Get a decimal code representation of a specific character.
622
     *
623
     * @param string $char <p>The input character.</p>
624
     *
625
     * @return int
626
     */
627 4
    public static function chr_to_decimal(string $char): int
628
    {
629 4
        $code = self::ord($char[0]);
630 4
        $bytes = 1;
631
632 4
        if (!($code & 0x80)) {
633
            // 0xxxxxxx
634 4
            return $code;
635
        }
636
637 4
        if (($code & 0xe0) === 0xc0) {
638
            // 110xxxxx
639 4
            $bytes = 2;
640 4
            $code &= ~0xc0;
641 4
        } elseif (($code & 0xf0) === 0xe0) {
642
            // 1110xxxx
643 4
            $bytes = 3;
644 4
            $code &= ~0xe0;
645 2
        } elseif (($code & 0xf8) === 0xf0) {
646
            // 11110xxx
647 2
            $bytes = 4;
648 2
            $code &= ~0xf0;
649
        }
650
651 4
        for ($i = 2; $i <= $bytes; ++$i) {
652
            // 10xxxxxx
653 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
654
        }
655
656 4
        return $code;
657
    }
658
659
    /**
660
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
661
     *
662
     * @param int|string $char <p>The input character</p>
663
     * @param string     $pfix [optional]
664
     *
665
     * @return string The code point encoded as U+xxxx
666
     */
667 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
668
    {
669 2
        if ($char === '') {
670 2
            return '';
671
        }
672
673 2
        if ($char === '&#0;') {
674 2
            $char = '';
675
        }
676
677 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
678
    }
679
680
    /**
681
     * alias for "UTF8::chr_to_decimal()"
682
     *
683
     * @see UTF8::chr_to_decimal()
684
     *
685
     * @param string $chr
686
     *
687
     * @return int
688
     */
689 2
    public static function chr_to_int(string $chr): int
690
    {
691 2
        return self::chr_to_decimal($chr);
692
    }
693
694
    /**
695
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
696
     *
697
     * @param string $body     <p>The original string to be split.</p>
698
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
699
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
700
     *
701
     * @return string the chunked string
702
     */
703 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
704
    {
705 4
        return \implode($end, self::split($body, $chunklen));
706
    }
707
708
    /**
709
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
710
     *
711
     * @param string $str                           <p>The string to be sanitized.</p>
712
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
713
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
714
     *                                              whitespace.</p>
715
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
716
     *                                              e.g.: "…"
717
     *                                              => "..."</p>
718
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
719
     *                                              combination with
720
     *                                              $normalize_whitespace</p>
721
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
722
     *                                              mark e.g.: "�"</p>
723
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
724
     *                                              characters e.g.: "\0"</p>
725
     *
726
     * @return string clean UTF-8 encoded string
727
     */
728 111
    public static function clean(
729
        string $str,
730
        bool $remove_bom = false,
731
        bool $normalize_whitespace = false,
732
        bool $normalize_msword = false,
733
        bool $keep_non_breaking_space = false,
734
        bool $replace_diamond_question_mark = false,
735
        bool $remove_invisible_characters = true
736
    ): string {
737
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
738
        // caused connection reset problem on larger strings
739
740 111
        $regx = '/
741
          (
742
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
743
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
744
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
745
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
746
            ){1,100}                      # ...one or more times
747
          )
748
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
749
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
750
        /x';
751 111
        $str = (string) \preg_replace($regx, '$1', $str);
752
753 111
        if ($replace_diamond_question_mark === true) {
754 60
            $str = self::replace_diamond_question_mark($str, '');
755
        }
756
757 111
        if ($remove_invisible_characters === true) {
758 111
            $str = self::remove_invisible_characters($str);
759
        }
760
761 111
        if ($normalize_whitespace === true) {
762 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
763
        }
764
765 111
        if ($normalize_msword === true) {
766 32
            $str = self::normalize_msword($str);
767
        }
768
769 111
        if ($remove_bom === true) {
770 62
            $str = self::remove_bom($str);
771
        }
772
773 111
        return $str;
774
    }
775
776
    /**
777
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
778
     *
779
     * @param string $str <p>The input string.</p>
780
     *
781
     * @return string
782
     */
783 33
    public static function cleanup($str): string
784
    {
785
        // init
786 33
        $str = (string) $str;
787
788 33
        if ($str === '') {
789 5
            return '';
790
        }
791
792
        // fixed ISO <-> UTF-8 Errors
793 33
        $str = self::fix_simple_utf8($str);
794
795
        // remove all none UTF-8 symbols
796
        // && remove diamond question mark (�)
797
        // && remove remove invisible characters (e.g. "\0")
798
        // && remove BOM
799
        // && normalize whitespace chars (but keep non-breaking-spaces)
800 33
        return self::clean(
801 33
            $str,
802 33
            true,
803 33
            true,
804 33
            false,
805 33
            true,
806 33
            true,
807 33
            true
808
        );
809
    }
810
811
    /**
812
     * Accepts a string or a array of strings and returns an array of Unicode code points.
813
     *
814
     * INFO: opposite to UTF8::string()
815
     *
816
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
817
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
818
     *                                 default, code points will be returned as integers.</p>
819
     *
820
     * @return array<int|string>
821
     *                           The array of code points:<br>
822
     *                           array<int> for $u_style === false<br>
823
     *                           array<string> for $u_style === true<br>
824
     */
825 12
    public static function codepoints($arg, bool $u_style = false): array
826
    {
827 12
        if (\is_string($arg) === true) {
828 12
            $arg = self::split($arg);
829
        }
830
831 12
        $arg = \array_map(
832
            [
833 12
                self::class,
834
                'ord',
835
            ],
836 12
            $arg
837
        );
838
839 12
        if (\count($arg) === 0) {
840 7
            return [];
841
        }
842
843 11
        if ($u_style) {
844 2
            $arg = \array_map(
845
                [
846 2
                    self::class,
847
                    'int_to_hex',
848
                ],
849 2
                $arg
850
            );
851
        }
852
853 11
        return $arg;
854
    }
855
856
    /**
857
     * Trims the string and replaces consecutive whitespace characters with a
858
     * single space. This includes tabs and newline characters, as well as
859
     * multibyte whitespace such as the thin space and ideographic space.
860
     *
861
     * @param string $str <p>The input string.</p>
862
     *
863
     * @return string string with a trimmed $str and condensed whitespace
864
     */
865 13
    public static function collapse_whitespace(string $str): string
866
    {
867 13
        return self::trim(
868 13
            self::regex_replace($str, '[[:space:]]+', ' ')
869
        );
870
    }
871
872
    /**
873
     * Returns count of characters used in a string.
874
     *
875
     * @param string $str       <p>The input string.</p>
876
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
877
     *
878
     * @return int[] an associative array of Character as keys and
879
     *               their count as values
880
     */
881 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
882
    {
883 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
884
    }
885
886
    /**
887
     * Remove css media-queries.
888
     *
889
     * @param string $str
890
     *
891
     * @return string
892
     */
893 1
    public static function css_stripe_media_queries(string $str): string
894
    {
895 1
        return (string) \preg_replace(
896 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
897 1
            '',
898 1
            $str
899
        );
900
    }
901
902
    /**
903
     * Checks whether ctype is available on the server.
904
     *
905
     * @return bool
906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
907
     */
908
    public static function ctype_loaded(): bool
909
    {
910
        return \extension_loaded('ctype');
911
    }
912
913
    /**
914
     * Converts a int-value into an UTF-8 character.
915
     *
916
     * @param mixed $int
917
     *
918
     * @return string
919
     */
920 10
    public static function decimal_to_chr($int): string
921
    {
922 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
923
    }
924
925
    /**
926
     * Decodes a MIME header field
927
     *
928
     * @param string $str
929
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
930
     *
931
     * @return false|string
932
     *                      A decoded MIME field on success,
933
     *                      or false if an error occurs during the decoding
934
     */
935
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
936
    {
937
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
938
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
939
        }
940
941
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
942
            self::checkForSupport();
943
        }
944
945
        if (self::$SUPPORT['iconv'] === true) {
946
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
947
        }
948
949
        if ($encoding !== 'UTF-8') {
950
            $str = self::encode($encoding, $str);
951
        }
952
953
        return \mb_decode_mimeheader($str);
954
    }
955
956
    /**
957
     * Encode a string with a new charset-encoding.
958
     *
959
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
960
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
961
     *
962
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
963
     * @param string $str                    <p>The input string</p>
964
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
965
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
966
     *                                       string-encoding</p>
967
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
968
     *                                       A empty string will trigger the autodetect anyway.</p>
969
     *
970
     * @return string
971
     *
972
     * @psalm-suppress InvalidReturnStatement
973
     */
974 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
975
    {
976 28
        if ($str === '' || $toEncoding === '') {
977 12
            return $str;
978
        }
979
980 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
981 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
982
        }
983
984 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
985 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
986
        }
987
988 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
989
            return $str;
990
        }
991
992 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
993
            self::checkForSupport();
994
        }
995
996 28
        if ($toEncoding === 'JSON') {
997 1
            $return = self::json_encode($str);
998 1
            if ($return === false) {
999
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1000
            }
1001
1002 1
            return $return;
1003
        }
1004 28
        if ($fromEncoding === 'JSON') {
1005 1
            $str = self::json_decode($str);
1006 1
            $fromEncoding = '';
1007
        }
1008
1009 28
        if ($toEncoding === 'BASE64') {
1010 2
            return \base64_encode($str);
1011
        }
1012 28
        if ($fromEncoding === 'BASE64') {
1013 2
            $str = \base64_decode($str, true);
1014 2
            $fromEncoding = '';
1015
        }
1016
1017 28
        if ($toEncoding === 'HTML-ENTITIES') {
1018 2
            return self::html_encode($str, true, 'UTF-8');
1019
        }
1020 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1021 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1022 2
            $fromEncoding = '';
1023
        }
1024
1025 28
        $fromEncodingDetected = false;
1026
        if (
1027 28
            $autodetectFromEncoding === true
1028
            ||
1029 28
            !$fromEncoding
1030
        ) {
1031 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1032
        }
1033
1034
        // DEBUG
1035
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1036
1037 28
        if ($fromEncodingDetected !== false) {
1038 24
            $fromEncoding = $fromEncodingDetected;
1039 6
        } elseif ($autodetectFromEncoding === true) {
1040
            // fallback for the "autodetect"-mode
1041 6
            return self::to_utf8($str);
1042
        }
1043
1044
        if (
1045 24
            !$fromEncoding
1046
            ||
1047 24
            $fromEncoding === $toEncoding
1048
        ) {
1049 15
            return $str;
1050
        }
1051
1052
        if (
1053 18
            $toEncoding === 'UTF-8'
1054
            &&
1055
            (
1056 17
                $fromEncoding === 'WINDOWS-1252'
1057
                ||
1058 18
                $fromEncoding === 'ISO-8859-1'
1059
            )
1060
        ) {
1061 14
            return self::to_utf8($str);
1062
        }
1063
1064
        if (
1065 10
            $toEncoding === 'ISO-8859-1'
1066
            &&
1067
            (
1068 5
                $fromEncoding === 'WINDOWS-1252'
1069
                ||
1070 10
                $fromEncoding === 'UTF-8'
1071
            )
1072
        ) {
1073 5
            return self::to_iso8859($str);
1074
        }
1075
1076
        if (
1077 9
            $toEncoding !== 'UTF-8'
1078
            &&
1079 9
            $toEncoding !== 'ISO-8859-1'
1080
            &&
1081 9
            $toEncoding !== 'WINDOWS-1252'
1082
            &&
1083 9
            self::$SUPPORT['mbstring'] === false
1084
        ) {
1085
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1086
        }
1087
1088 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1089
            self::checkForSupport();
1090
        }
1091
1092 9
        if (self::$SUPPORT['mbstring'] === true) {
1093
            // info: do not use the symfony polyfill here
1094 9
            $strEncoded = \mb_convert_encoding(
1095 9
                $str,
1096 9
                $toEncoding,
1097 9
                $fromEncoding
1098
            );
1099
1100 9
            if ($strEncoded) {
1101 9
                return $strEncoded;
1102
            }
1103
        }
1104
1105
        $return = \iconv($fromEncoding, $toEncoding, $str);
1106
        if ($return !== false) {
1107
            return $return;
1108
        }
1109
1110
        return $str;
1111
    }
1112
1113
    /**
1114
     * @param string $str
1115
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1116
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1117
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1118
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1119
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1120
     *
1121
     * @return false|string
1122
     *                      An encoded MIME field on success,
1123
     *                      or false if an error occurs during the encoding
1124
     */
1125
    public static function encode_mimeheader(
1126
        $str,
1127
        $fromCharset = 'UTF-8',
1128
        $toCharset = 'UTF-8',
1129
        $transferEncoding = 'Q',
1130
        $linefeed = "\r\n",
1131
        $indent = 76
1132
    ) {
1133
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1134
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1135
        }
1136
1137
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1138
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1139
        }
1140
1141
        return \iconv_mime_encode(
1142
            '',
1143
            $str,
1144
            [
1145
                'scheme'           => $transferEncoding,
1146
                'line-length'      => $indent,
1147
                'input-charset'    => $fromCharset,
1148
                'output-charset'   => $toCharset,
1149
                'line-break-chars' => $linefeed,
1150
            ]
1151
        );
1152
    }
1153
1154
    /**
1155
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1156
     *
1157
     * @param string   $str                    <p>The input string.</p>
1158
     * @param string   $search                 <p>The searched string.</p>
1159
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1160
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1161
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1162
     *
1163
     * @return string
1164
     */
1165 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1166
    {
1167 1
        if ($str === '') {
1168 1
            return '';
1169
        }
1170
1171 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1172
1173 1
        if ($length === null) {
1174 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1175
        }
1176
1177 1
        if (empty($search)) {
1178 1
            if ($length > 0) {
1179 1
                $stringLength = (int) self::strlen($str, $encoding);
1180 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1181
            } else {
1182 1
                $end = 0;
1183
            }
1184
1185 1
            $pos = (int) \min(
1186 1
                self::strpos($str, ' ', $end, $encoding),
1187 1
                self::strpos($str, '.', $end, $encoding)
1188
            );
1189
1190 1
            if ($pos) {
1191 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1192 1
                if ($strSub === false) {
1193
                    return '';
1194
                }
1195
1196 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1197
            }
1198
1199
            return $str;
1200
        }
1201
1202 1
        $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1203 1
        $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1204
1205 1
        $pos_start = 0;
1206 1
        if ($halfSide > 0) {
1207 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1208 1
            if ($halfText !== false) {
1209 1
                $pos_start = (int) \max(
1210 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1211 1
                    self::strrpos($halfText, '.', 0, $encoding)
1212
                );
1213
            }
1214
        }
1215
1216 1
        if ($wordPos && $halfSide > 0) {
1217 1
            $offset = $pos_start + $length - 1;
1218 1
            $realLength = (int) self::strlen($str, $encoding);
1219
1220 1
            if ($offset > $realLength) {
1221
                $offset = $realLength;
1222
            }
1223
1224 1
            $pos_end = (int) \min(
1225 1
                    self::strpos($str, ' ', $offset, $encoding),
1226 1
                    self::strpos($str, '.', $offset, $encoding)
1227 1
                ) - $pos_start;
1228
1229 1
            if (!$pos_end || $pos_end <= 0) {
1230 1
                $strSub = self::substr($str, $pos_start, (int) self::strlen($str), $encoding);
1231 1
                if ($strSub !== false) {
1232 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1233
                } else {
1234 1
                    $extract = '';
1235
                }
1236
            } else {
1237 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1238 1
                if ($strSub !== false) {
1239 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1240
                } else {
1241 1
                    $extract = '';
1242
                }
1243
            }
1244
        } else {
1245 1
            $offset = $length - 1;
1246 1
            $trueLength = (int) self::strlen($str, $encoding);
1247
1248 1
            if ($offset > $trueLength) {
1249
                $offset = $trueLength;
1250
            }
1251
1252 1
            $pos_end = \min(
1253 1
                self::strpos($str, ' ', $offset, $encoding),
1254 1
                self::strpos($str, '.', $offset, $encoding)
1255
            );
1256
1257 1
            if ($pos_end) {
1258 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1259 1
                if ($strSub !== false) {
1260 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1261
                } else {
1262 1
                    $extract = '';
1263
                }
1264
            } else {
1265 1
                $extract = $str;
1266
            }
1267
        }
1268
1269 1
        return $extract;
1270
    }
1271
1272
    /**
1273
     * Reads entire file into a string.
1274
     *
1275
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1276
     *
1277
     * @see http://php.net/manual/en/function.file-get-contents.php
1278
     *
1279
     * @param string        $filename         <p>
1280
     *                                        Name of the file to read.
1281
     *                                        </p>
1282
     * @param bool          $use_include_path [optional] <p>
1283
     *                                        Prior to PHP 5, this parameter is called
1284
     *                                        use_include_path and is a bool.
1285
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1286
     *                                        to trigger include path
1287
     *                                        search.
1288
     *                                        </p>
1289
     * @param resource|null $context          [optional] <p>
1290
     *                                        A valid context resource created with
1291
     *                                        stream_context_create. If you don't need to use a
1292
     *                                        custom context, you can skip this parameter by &null;.
1293
     *                                        </p>
1294
     * @param int|null      $offset           [optional] <p>
1295
     *                                        The offset where the reading starts.
1296
     *                                        </p>
1297
     * @param int|null      $maxLength        [optional] <p>
1298
     *                                        Maximum length of data read. The default is to read until end
1299
     *                                        of file is reached.
1300
     *                                        </p>
1301
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1302
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1303
     *                                        some files, because they used non default utf-8 chars. Binary files
1304
     *                                        like images or pdf will not be converted.</p>
1305
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1306
     *                                        A empty string will trigger the autodetect anyway.</p>
1307
     *
1308
     * @return false|string the function returns the read data or false on failure
1309
     */
1310 12
    public static function file_get_contents(
1311
        string $filename,
1312
        bool $use_include_path = false,
1313
        $context = null,
1314
        int $offset = null,
1315
        int $maxLength = null,
1316
        int $timeout = 10,
1317
        bool $convertToUtf8 = true,
1318
        string $fromEncoding = ''
1319
    ) {
1320
        // init
1321 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1322
1323 12
        if ($timeout && $context === null) {
1324 9
            $context = \stream_context_create(
1325
                [
1326
                    'http' => [
1327 9
                        'timeout' => $timeout,
1328
                    ],
1329
                ]
1330
            );
1331
        }
1332
1333 12
        if ($offset === null) {
1334 12
            $offset = 0;
1335
        }
1336
1337 12
        if (\is_int($maxLength) === true) {
1338 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1339
        } else {
1340 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1341
        }
1342
1343
        // return false on error
1344 12
        if ($data === false) {
1345
            return false;
1346
        }
1347
1348 12
        if ($convertToUtf8 === true) {
1349
            if (
1350 12
                self::is_binary($data, true) === true
1351
                &&
1352 12
                self::is_utf16($data, false) === false
1353
                &&
1354 12
                self::is_utf32($data, false) === false
1355 7
            ) {
1356
                // do nothing, it's binary and not UTF16 or UTF32
1357
            } else {
1358 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1359 9
                $data = self::cleanup($data);
1360
            }
1361
        }
1362
1363 12
        return $data;
1364
    }
1365
1366
    /**
1367
     * Checks if a file starts with BOM (Byte Order Mark) character.
1368
     *
1369
     * @param string $file_path <p>Path to a valid file.</p>
1370
     *
1371
     * @throws \RuntimeException if file_get_contents() returned false
1372
     *
1373
     * @return bool
1374
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1375
     */
1376 2
    public static function file_has_bom(string $file_path): bool
1377
    {
1378 2
        $file_content = \file_get_contents($file_path);
1379 2
        if ($file_content === false) {
1380
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1381
        }
1382
1383 2
        return self::string_has_bom($file_content);
1384
    }
1385
1386
    /**
1387
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1388
     *
1389
     * @param mixed  $var
1390
     * @param int    $normalization_form
1391
     * @param string $leading_combining
1392
     *
1393
     * @return mixed
1394
     */
1395 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1396
    {
1397 43
        switch (\gettype($var)) {
1398 43
            case 'array':
1399 6
                foreach ($var as $k => &$v) {
1400 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1401
                }
1402 6
                unset($v);
1403
1404 6
                break;
1405 43
            case 'object':
1406 4
                foreach ($var as $k => &$v) {
1407 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1408
                }
1409 4
                unset($v);
1410
1411 4
                break;
1412 43
            case 'string':
1413
1414 43
                if (\strpos($var, "\r") !== false) {
1415
                    // Workaround https://bugs.php.net/65732
1416 3
                    $var = self::normalize_line_ending($var);
1417
                }
1418
1419 43
                if (self::is_ascii($var) === false) {
1420
                    /** @noinspection PhpUndefinedClassInspection */
1421 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1422 21
                        $n = '-';
1423
                    } else {
1424
                        /** @noinspection PhpUndefinedClassInspection */
1425 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1426
1427 13
                        if (isset($n[0])) {
1428 7
                            $var = $n;
1429
                        } else {
1430 9
                            $var = self::encode('UTF-8', $var, true);
1431
                        }
1432
                    }
1433
1434
                    if (
1435 26
                        $var[0] >= "\x80"
1436
                        &&
1437 26
                        isset($n[0], $leading_combining[0])
1438
                        &&
1439 26
                        \preg_match('/^\p{Mn}/u', $var)
1440
                    ) {
1441
                        // Prevent leading combining chars
1442
                        // for NFC-safe concatenations.
1443 3
                        $var = $leading_combining . $var;
1444
                    }
1445
                }
1446
1447 43
                break;
1448
        }
1449
1450 43
        return $var;
1451
    }
1452
1453
    /**
1454
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1455
     *
1456
     * Gets a specific external variable by name and optionally filters it
1457
     *
1458
     * @see  http://php.net/manual/en/function.filter-input.php
1459
     *
1460
     * @param int    $type          <p>
1461
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1462
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1463
     *                              <b>INPUT_ENV</b>.
1464
     *                              </p>
1465
     * @param string $variable_name <p>
1466
     *                              Name of a variable to get.
1467
     *                              </p>
1468
     * @param int    $filter        [optional] <p>
1469
     *                              The ID of the filter to apply. The
1470
     *                              manual page lists the available filters.
1471
     *                              </p>
1472
     * @param mixed  $options       [optional] <p>
1473
     *                              Associative array of options or bitwise disjunction of flags. If filter
1474
     *                              accepts options, flags can be provided in "flags" field of array.
1475
     *                              </p>
1476
     *
1477
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1478
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1479
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1480
     */
1481
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1482
    {
1483
        if (\func_num_args() < 4) {
1484
            $var = \filter_input($type, $variable_name, $filter);
1485
        } else {
1486
            $var = \filter_input($type, $variable_name, $filter, $options);
1487
        }
1488
1489
        return self::filter($var);
1490
    }
1491
1492
    /**
1493
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1494
     *
1495
     * Gets external variables and optionally filters them
1496
     *
1497
     * @see  http://php.net/manual/en/function.filter-input-array.php
1498
     *
1499
     * @param int   $type       <p>
1500
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1501
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1502
     *                          <b>INPUT_ENV</b>.
1503
     *                          </p>
1504
     * @param mixed $definition [optional] <p>
1505
     *                          An array defining the arguments. A valid key is a string
1506
     *                          containing a variable name and a valid value is either a filter type, or an array
1507
     *                          optionally specifying the filter, flags and options. If the value is an
1508
     *                          array, valid keys are filter which specifies the
1509
     *                          filter type,
1510
     *                          flags which specifies any flags that apply to the
1511
     *                          filter, and options which specifies any options that
1512
     *                          apply to the filter. See the example below for a better understanding.
1513
     *                          </p>
1514
     *                          <p>
1515
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1516
     *                          input array are filtered by this filter.
1517
     *                          </p>
1518
     * @param bool  $add_empty  [optional] <p>
1519
     *                          Add missing keys as <b>NULL</b> to the return value.
1520
     *                          </p>
1521
     *
1522
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1523
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1524
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1525
     *               is not set and <b>NULL</b> if the filter fails.
1526
     */
1527
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1528
    {
1529
        if (\func_num_args() < 2) {
1530
            $a = \filter_input_array($type);
1531
        } else {
1532
            $a = \filter_input_array($type, $definition, $add_empty);
1533
        }
1534
1535
        return self::filter($a);
1536
    }
1537
1538
    /**
1539
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1540
     *
1541
     * Filters a variable with a specified filter
1542
     *
1543
     * @see  http://php.net/manual/en/function.filter-var.php
1544
     *
1545
     * @param mixed $variable <p>
1546
     *                        Value to filter.
1547
     *                        </p>
1548
     * @param int   $filter   [optional] <p>
1549
     *                        The ID of the filter to apply. The
1550
     *                        manual page lists the available filters.
1551
     *                        </p>
1552
     * @param mixed $options  [optional] <p>
1553
     *                        Associative array of options or bitwise disjunction of flags. If filter
1554
     *                        accepts options, flags can be provided in "flags" field of array. For
1555
     *                        the "callback" filter, callable type should be passed. The
1556
     *                        callback must accept one argument, the value to be filtered, and return
1557
     *                        the value after filtering/sanitizing it.
1558
     *                        </p>
1559
     *                        <p>
1560
     *                        <code>
1561
     *                        // for filters that accept options, use this format
1562
     *                        $options = array(
1563
     *                        'options' => array(
1564
     *                        'default' => 3, // value to return if the filter fails
1565
     *                        // other options here
1566
     *                        'min_range' => 0
1567
     *                        ),
1568
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1569
     *                        );
1570
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1571
     *                        // for filter that only accept flags, you can pass them directly
1572
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1573
     *                        // for filter that only accept flags, you can also pass as an array
1574
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1575
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1576
     *                        // callback validate filter
1577
     *                        function foo($value)
1578
     *                        {
1579
     *                        // Expected format: Surname, GivenNames
1580
     *                        if (strpos($value, ", ") === false) return false;
1581
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1582
     *                        $empty = (empty($surname) || empty($givennames));
1583
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1584
     *                        if ($empty || $notstrings) {
1585
     *                        return false;
1586
     *                        } else {
1587
     *                        return $value;
1588
     *                        }
1589
     *                        }
1590
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1591
     *                        </code>
1592
     *                        </p>
1593
     *
1594
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1595
     */
1596 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1597
    {
1598 2
        if (\func_num_args() < 3) {
1599 2
            $variable = \filter_var($variable, $filter);
1600
        } else {
1601 2
            $variable = \filter_var($variable, $filter, $options);
1602
        }
1603
1604 2
        return self::filter($variable);
1605
    }
1606
1607
    /**
1608
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1609
     *
1610
     * Gets multiple variables and optionally filters them
1611
     *
1612
     * @see  http://php.net/manual/en/function.filter-var-array.php
1613
     *
1614
     * @param array $data       <p>
1615
     *                          An array with string keys containing the data to filter.
1616
     *                          </p>
1617
     * @param mixed $definition [optional] <p>
1618
     *                          An array defining the arguments. A valid key is a string
1619
     *                          containing a variable name and a valid value is either a
1620
     *                          filter type, or an
1621
     *                          array optionally specifying the filter, flags and options.
1622
     *                          If the value is an array, valid keys are filter
1623
     *                          which specifies the filter type,
1624
     *                          flags which specifies any flags that apply to the
1625
     *                          filter, and options which specifies any options that
1626
     *                          apply to the filter. See the example below for a better understanding.
1627
     *                          </p>
1628
     *                          <p>
1629
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1630
     *                          input array are filtered by this filter.
1631
     *                          </p>
1632
     * @param bool  $add_empty  [optional] <p>
1633
     *                          Add missing keys as <b>NULL</b> to the return value.
1634
     *                          </p>
1635
     *
1636
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1637
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1638
     *               set
1639
     */
1640 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1641
    {
1642 2
        if (\func_num_args() < 2) {
1643 2
            $a = \filter_var_array($data);
1644
        } else {
1645 2
            $a = \filter_var_array($data, $definition, $add_empty);
1646
        }
1647
1648 2
        return self::filter($a);
1649
    }
1650
1651
    /**
1652
     * Checks whether finfo is available on the server.
1653
     *
1654
     * @return bool
1655
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1656
     */
1657
    public static function finfo_loaded(): bool
1658
    {
1659
        return \class_exists('finfo');
1660
    }
1661
1662
    /**
1663
     * Returns the first $n characters of the string.
1664
     *
1665
     * @param string $str      <p>The input string.</p>
1666
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1667
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1668
     *
1669
     * @return string
1670
     */
1671 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1672
    {
1673 13
        if ($n <= 0) {
1674 4
            return '';
1675
        }
1676
1677 9
        return (string) self::substr($str, 0, $n, $encoding);
1678
    }
1679
1680
    /**
1681
     * Check if the number of unicode characters are not more than the specified integer.
1682
     *
1683
     * @param string $str      the original string to be checked
1684
     * @param int    $box_size the size in number of chars to be checked against string
1685
     *
1686
     * @return bool true if string is less than or equal to $box_size, false otherwise
1687
     */
1688 2
    public static function fits_inside(string $str, int $box_size): bool
1689
    {
1690 2
        return self::strlen($str) <= $box_size;
1691
    }
1692
1693
    /**
1694
     * Try to fix simple broken UTF-8 strings.
1695
     *
1696
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1697
     *
1698
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1699
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1700
     * See: http://en.wikipedia.org/wiki/Windows-1252
1701
     *
1702
     * @param string $str <p>The input string</p>
1703
     *
1704
     * @return string
1705
     */
1706 42
    public static function fix_simple_utf8(string $str): string
1707
    {
1708 42
        if ($str === '') {
1709 4
            return '';
1710
        }
1711
1712 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1713 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1714
1715 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1716 1
            if (self::$BROKEN_UTF8_FIX === null) {
1717 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1718
            }
1719
1720 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1721 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1722
        }
1723
1724 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1725
    }
1726
1727
    /**
1728
     * Fix a double (or multiple) encoded UTF8 string.
1729
     *
1730
     * @param string|string[] $str you can use a string or an array of strings
1731
     *
1732
     * @return string|string[]
1733
     *                         Will return the fixed input-"array" or
1734
     *                         the fixed input-"string"
1735
     *
1736
     * @psalm-suppress InvalidReturnType
1737
     */
1738 2
    public static function fix_utf8($str)
1739
    {
1740 2
        if (\is_array($str) === true) {
1741 2
            foreach ($str as $k => &$v) {
1742 2
                $v = self::fix_utf8($v);
1743
            }
1744 2
            unset($v);
1745
1746
            /**
1747
             * @psalm-suppress InvalidReturnStatement
1748
             */
1749 2
            return $str;
1750
        }
1751
1752 2
        $str = (string) $str;
1753 2
        $last = '';
1754 2
        while ($last !== $str) {
1755 2
            $last = $str;
1756
            /**
1757
             * @psalm-suppress PossiblyInvalidArgument
1758
             */
1759 2
            $str = self::to_utf8(
1760 2
                self::utf8_decode($str, true)
1761
            );
1762
        }
1763
1764
        /**
1765
         * @psalm-suppress InvalidReturnStatement
1766
         */
1767 2
        return $str;
1768
    }
1769
1770
    /**
1771
     * Get character of a specific character.
1772
     *
1773
     * @param string $char
1774
     *
1775
     * @return string 'RTL' or 'LTR'
1776
     */
1777 2
    public static function getCharDirection(string $char): string
1778
    {
1779 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1780
            self::checkForSupport();
1781
        }
1782
1783 2
        if (self::$SUPPORT['intlChar'] === true) {
1784
            /** @noinspection PhpComposerExtensionStubsInspection */
1785 2
            $tmpReturn = \IntlChar::charDirection($char);
1786
1787
            // from "IntlChar"-Class
1788
            $charDirection = [
1789 2
                'RTL' => [1, 13, 14, 15, 21],
1790
                'LTR' => [0, 11, 12, 20],
1791
            ];
1792
1793 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1794
                return 'LTR';
1795
            }
1796
1797 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1798 2
                return 'RTL';
1799
            }
1800
        }
1801
1802 2
        $c = static::chr_to_decimal($char);
1803
1804 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1805 2
            return 'LTR';
1806
        }
1807
1808 2
        if ($c <= 0x85e) {
1809 2
            if ($c === 0x5be ||
1810 2
                $c === 0x5c0 ||
1811 2
                $c === 0x5c3 ||
1812 2
                $c === 0x5c6 ||
1813 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1814 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1815 2
                $c === 0x608 ||
1816 2
                $c === 0x60b ||
1817 2
                $c === 0x60d ||
1818 2
                $c === 0x61b ||
1819 2
                ($c >= 0x61e && $c <= 0x64a) ||
1820
                ($c >= 0x66d && $c <= 0x66f) ||
1821
                ($c >= 0x671 && $c <= 0x6d5) ||
1822
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1823
                ($c >= 0x6ee && $c <= 0x6ef) ||
1824
                ($c >= 0x6fa && $c <= 0x70d) ||
1825
                $c === 0x710 ||
1826
                ($c >= 0x712 && $c <= 0x72f) ||
1827
                ($c >= 0x74d && $c <= 0x7a5) ||
1828
                $c === 0x7b1 ||
1829
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1830
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1831
                $c === 0x7fa ||
1832
                ($c >= 0x800 && $c <= 0x815) ||
1833
                $c === 0x81a ||
1834
                $c === 0x824 ||
1835
                $c === 0x828 ||
1836
                ($c >= 0x830 && $c <= 0x83e) ||
1837
                ($c >= 0x840 && $c <= 0x858) ||
1838 2
                $c === 0x85e
1839
            ) {
1840 2
                return 'RTL';
1841
            }
1842 2
        } elseif ($c === 0x200f) {
1843
            return 'RTL';
1844 2
        } elseif ($c >= 0xfb1d) {
1845 2
            if ($c === 0xfb1d ||
1846 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1847 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1848 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1849 2
                $c === 0xfb3e ||
1850 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1851 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1852 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1853 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1854 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1855 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1856 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1857 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1858 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1859 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1860 2
                $c === 0x10808 ||
1861 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1862 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1863 2
                $c === 0x1083c ||
1864 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1865 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1866 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1867 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1868 2
                $c === 0x1093f ||
1869 2
                $c === 0x10a00 ||
1870 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1871 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1872 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1873 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1874 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1875 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1876 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1877 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1878 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1879 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1880
            ) {
1881 2
                return 'RTL';
1882
            }
1883
        }
1884
1885 2
        return 'LTR';
1886
    }
1887
1888
    /**
1889
     * Check for php-support.
1890
     *
1891
     * @param string|null $key
1892
     *
1893
     * @return mixed
1894
     *               Return the full support-"array", if $key === null<br>
1895
     *               return bool-value, if $key is used and available<br>
1896
     *               otherwise return <strong>null</strong>
1897
     */
1898 26
    public static function getSupportInfo(string $key = null)
1899
    {
1900 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1901
            self::checkForSupport();
1902
        }
1903
1904 26
        if ($key === null) {
1905 4
            return self::$SUPPORT;
1906
        }
1907
1908 24
        if (!isset(self::$SUPPORT[$key])) {
1909 2
            return null;
1910
        }
1911
1912 22
        return self::$SUPPORT[$key];
1913
    }
1914
1915
    /**
1916
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1917
     *          if you need more supported types, please use e.g. "finfo"
1918
     *
1919
     * @param string $str
1920
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1921
     *
1922
     * @return array
1923
     *               with this keys: 'ext', 'mime', 'type'
1924
     */
1925 39
    public static function get_file_type(
1926
        string $str,
1927
        array $fallback = [
1928
            'ext'  => null,
1929
            'mime' => 'application/octet-stream',
1930
            'type' => null,
1931
        ]
1932
    ): array {
1933 39
        if ($str === '') {
1934
            return $fallback;
1935
        }
1936
1937 39
        $str_info = self::substr_in_byte($str, 0, 2);
1938 39
        if ($str_info === false || self::strlen_in_byte($str_info) !== 2) {
1939 10
            return $fallback;
1940
        }
1941
1942 35
        $str_info = \unpack('C2chars', $str_info);
1943 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
1944
1945
        // DEBUG
1946
        //var_dump($type_code);
1947
1948
        switch ($type_code) {
1949 35
            case 3780:
1950 5
                $ext = 'pdf';
1951 5
                $mime = 'application/pdf';
1952 5
                $type = 'binary';
1953
1954 5
                break;
1955 35
            case 7790:
1956
                $ext = 'exe';
1957
                $mime = 'application/octet-stream';
1958
                $type = 'binary';
1959
1960
                break;
1961 35
            case 7784:
1962
                $ext = 'midi';
1963
                $mime = 'audio/x-midi';
1964
                $type = 'binary';
1965
1966
                break;
1967 35
            case 8075:
1968 7
                $ext = 'zip';
1969 7
                $mime = 'application/zip';
1970 7
                $type = 'binary';
1971
1972 7
                break;
1973 35
            case 8297:
1974
                $ext = 'rar';
1975
                $mime = 'application/rar';
1976
                $type = 'binary';
1977
1978
                break;
1979 35
            case 255216:
1980
                $ext = 'jpg';
1981
                $mime = 'image/jpeg';
1982
                $type = 'binary';
1983
1984
                break;
1985 35
            case 7173:
1986
                $ext = 'gif';
1987
                $mime = 'image/gif';
1988
                $type = 'binary';
1989
1990
                break;
1991 35
            case 6677:
1992
                $ext = 'bmp';
1993
                $mime = 'image/bmp';
1994
                $type = 'binary';
1995
1996
                break;
1997 35
            case 13780:
1998 7
                $ext = 'png';
1999 7
                $mime = 'image/png';
2000 7
                $type = 'binary';
2001
2002 7
                break;
2003
            default:
2004 32
                return $fallback;
2005
        }
2006
2007
        return [
2008 7
            'ext'  => $ext,
2009 7
            'mime' => $mime,
2010 7
            'type' => $type,
2011
        ];
2012
    }
2013
2014
    /**
2015
     * @param int    $length        <p>Length of the random string.</p>
2016
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2017
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2018
     *
2019
     * @return string
2020
     */
2021 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2022
    {
2023
        // init
2024 1
        $i = 0;
2025 1
        $str = '';
2026 1
        $maxlength = (int) self::strlen($possibleChars, $encoding);
2027
2028 1
        if ($maxlength === 0) {
2029 1
            return '';
2030
        }
2031
2032
        // add random chars
2033 1
        while ($i < $length) {
2034
            try {
2035 1
                $randInt = \random_int(0, $maxlength - 1);
2036
            } catch (\Exception $e) {
2037
                /** @noinspection RandomApiMigrationInspection */
2038
                $randInt = \mt_rand(0, $maxlength - 1);
2039
            }
2040 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2041 1
            if ($char !== false) {
2042 1
                $str .= $char;
2043 1
                ++$i;
2044
            }
2045
        }
2046
2047 1
        return $str;
2048
    }
2049
2050
    /**
2051
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2052
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2053
     *
2054
     * @return string
2055
     */
2056 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2057
    {
2058 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2059 1
                        \session_id() .
2060 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2061 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2062 1
                        $entropyExtra;
2063
2064 1
        $uniqueString = \uniqid($uniqueHelper, true);
2065
2066 1
        if ($md5) {
2067 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2068
        }
2069
2070 1
        return $uniqueString;
2071
    }
2072
2073
    /**
2074
     * alias for "UTF8::string_has_bom()"
2075
     *
2076
     * @see        UTF8::string_has_bom()
2077
     *
2078
     * @param string $str
2079
     *
2080
     * @return bool
2081
     *
2082
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2083
     */
2084 2
    public static function hasBom(string $str): bool
2085
    {
2086 2
        return self::string_has_bom($str);
2087
    }
2088
2089
    /**
2090
     * Returns true if the string contains a lower case char, false otherwise.
2091
     *
2092
     * @param string $str <p>The input string.</p>
2093
     *
2094
     * @return bool whether or not the string contains a lower case character
2095
     */
2096 47
    public static function has_lowercase(string $str): bool
2097
    {
2098 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2099
    }
2100
2101
    /**
2102
     * Returns true if the string contains an upper case char, false otherwise.
2103
     *
2104
     * @param string $str <p>The input string.</p>
2105
     *
2106
     * @return bool whether or not the string contains an upper case character
2107
     */
2108 12
    public static function has_uppercase(string $str): bool
2109
    {
2110 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2111
    }
2112
2113
    /**
2114
     * Converts a hexadecimal-value into an UTF-8 character.
2115
     *
2116
     * @param string $hexdec <p>The hexadecimal value.</p>
2117
     *
2118
     * @return false|string one single UTF-8 character
2119
     */
2120 4
    public static function hex_to_chr(string $hexdec)
2121
    {
2122 4
        return self::decimal_to_chr(\hexdec($hexdec));
2123
    }
2124
2125
    /**
2126
     * Converts hexadecimal U+xxxx code point representation to integer.
2127
     *
2128
     * INFO: opposite to UTF8::int_to_hex()
2129
     *
2130
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2131
     *
2132
     * @return false|int the code point, or false on failure
2133
     */
2134 2
    public static function hex_to_int($hexDec)
2135
    {
2136
        // init
2137 2
        $hexDec = (string) $hexDec;
2138
2139 2
        if ($hexDec === '') {
2140 2
            return false;
2141
        }
2142
2143 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2144 2
            return \intval($match[1], 16);
2145
        }
2146
2147 2
        return false;
2148
    }
2149
2150
    /**
2151
     * alias for "UTF8::html_entity_decode()"
2152
     *
2153
     * @see UTF8::html_entity_decode()
2154
     *
2155
     * @param string $str
2156
     * @param int    $flags
2157
     * @param string $encoding
2158
     *
2159
     * @return string
2160
     */
2161 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2162
    {
2163 4
        return self::html_entity_decode($str, $flags, $encoding);
2164
    }
2165
2166
    /**
2167
     * Converts a UTF-8 string to a series of HTML numbered entities.
2168
     *
2169
     * INFO: opposite to UTF8::html_decode()
2170
     *
2171
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2172
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2173
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2174
     *
2175
     * @return string HTML numbered entities
2176
     */
2177 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2178
    {
2179 13
        if ($str === '') {
2180 4
            return '';
2181
        }
2182
2183 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2184 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2185
        }
2186
2187 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2188
            self::checkForSupport();
2189
        }
2190
2191
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2192 13
        if (self::$SUPPORT['mbstring'] === true) {
2193 13
            $startCode = 0x00;
2194 13
            if ($keepAsciiChars === true) {
2195 13
                $startCode = 0x80;
2196
            }
2197
2198 13
            return \mb_encode_numericentity(
2199 13
                $str,
2200 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2201 13
                $encoding
2202
            );
2203
        }
2204
2205
        //
2206
        // fallback via vanilla php
2207
        //
2208
2209
        return \implode(
2210
            '',
2211
            \array_map(
2212
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2213
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2214
                },
2215
                self::split($str)
2216
            )
2217
        );
2218
    }
2219
2220
    /**
2221
     * UTF-8 version of html_entity_decode()
2222
     *
2223
     * The reason we are not using html_entity_decode() by itself is because
2224
     * while it is not technically correct to leave out the semicolon
2225
     * at the end of an entity most browsers will still interpret the entity
2226
     * correctly. html_entity_decode() does not convert entities without
2227
     * semicolons, so we are left with our own little solution here. Bummer.
2228
     *
2229
     * Convert all HTML entities to their applicable characters
2230
     *
2231
     * INFO: opposite to UTF8::html_encode()
2232
     *
2233
     * @see http://php.net/manual/en/function.html-entity-decode.php
2234
     *
2235
     * @param string $str      <p>
2236
     *                         The input string.
2237
     *                         </p>
2238
     * @param int    $flags    [optional] <p>
2239
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2240
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2241
     *                         <table>
2242
     *                         Available <i>flags</i> constants
2243
     *                         <tr valign="top">
2244
     *                         <td>Constant Name</td>
2245
     *                         <td>Description</td>
2246
     *                         </tr>
2247
     *                         <tr valign="top">
2248
     *                         <td><b>ENT_COMPAT</b></td>
2249
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2250
     *                         </tr>
2251
     *                         <tr valign="top">
2252
     *                         <td><b>ENT_QUOTES</b></td>
2253
     *                         <td>Will convert both double and single quotes.</td>
2254
     *                         </tr>
2255
     *                         <tr valign="top">
2256
     *                         <td><b>ENT_NOQUOTES</b></td>
2257
     *                         <td>Will leave both double and single quotes unconverted.</td>
2258
     *                         </tr>
2259
     *                         <tr valign="top">
2260
     *                         <td><b>ENT_HTML401</b></td>
2261
     *                         <td>
2262
     *                         Handle code as HTML 4.01.
2263
     *                         </td>
2264
     *                         </tr>
2265
     *                         <tr valign="top">
2266
     *                         <td><b>ENT_XML1</b></td>
2267
     *                         <td>
2268
     *                         Handle code as XML 1.
2269
     *                         </td>
2270
     *                         </tr>
2271
     *                         <tr valign="top">
2272
     *                         <td><b>ENT_XHTML</b></td>
2273
     *                         <td>
2274
     *                         Handle code as XHTML.
2275
     *                         </td>
2276
     *                         </tr>
2277
     *                         <tr valign="top">
2278
     *                         <td><b>ENT_HTML5</b></td>
2279
     *                         <td>
2280
     *                         Handle code as HTML 5.
2281
     *                         </td>
2282
     *                         </tr>
2283
     *                         </table>
2284
     *                         </p>
2285
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2286
     *
2287
     * @return string the decoded string
2288
     */
2289 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2290
    {
2291 40
        if ($str === '') {
2292 12
            return '';
2293
        }
2294
2295 40
        if (!isset($str[3])) { // examples: &; || &x;
2296 19
            return $str;
2297
        }
2298
2299
        if (
2300 39
            \strpos($str, '&') === false
2301
            ||
2302
            (
2303 39
                \strpos($str, '&#') === false
2304
                &&
2305 39
                \strpos($str, ';') === false
2306
            )
2307
        ) {
2308 18
            return $str;
2309
        }
2310
2311 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2312 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2313
        }
2314
2315 39
        if ($flags === null) {
2316 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2317
        }
2318
2319
        if (
2320 39
            $encoding !== 'UTF-8'
2321
            &&
2322 39
            $encoding !== 'ISO-8859-1'
2323
            &&
2324 39
            $encoding !== 'WINDOWS-1252'
2325
            &&
2326 39
            self::$SUPPORT['mbstring'] === false
2327
        ) {
2328
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2329
        }
2330
2331 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2332
            self::checkForSupport();
2333
        }
2334
2335
        do {
2336 39
            $str_compare = $str;
2337
2338
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2339 39
            if (self::$SUPPORT['mbstring'] === true) {
2340 39
                $str = \mb_decode_numericentity(
2341 39
                    $str,
2342 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2343 39
                    $encoding
2344
                );
2345
            } else {
2346
                $str = (string) \preg_replace_callback(
2347
                    "/&#\d{2,6};/",
2348
                    /**
2349
                     * @param string[] $matches
2350
                     *
2351
                     * @return string
2352
                     */
2353
                    static function (array $matches) use ($encoding): string {
2354
                        // always fallback via symfony polyfill
2355
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2356
2357
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2358
                            return $returnTmp;
2359
                        }
2360
2361
                        return $matches[0];
2362
                    },
2363
                    $str
2364
                );
2365
            }
2366
2367
            // decode numeric & UTF16 two byte entities
2368 39
            $str = \html_entity_decode(
2369 39
                (string) \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2370 39
                $flags,
2371 39
                $encoding
2372
            );
2373 39
        } while ($str_compare !== $str);
2374
2375 39
        return $str;
2376
    }
2377
2378
    /**
2379
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2380
     *
2381
     * @param string $str
2382
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2383
     *
2384
     * @return string
2385
     */
2386 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2387
    {
2388 6
        return self::htmlspecialchars(
2389 6
            $str,
2390 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2391 6
            $encoding
2392
        );
2393
    }
2394
2395
    /**
2396
     * Remove empty html-tag.
2397
     *
2398
     * e.g.: <tag></tag>
2399
     *
2400
     * @param string $str
2401
     *
2402
     * @return string
2403
     */
2404 1
    public static function html_stripe_empty_tags(string $str): string
2405
    {
2406 1
        return (string) \preg_replace(
2407 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2408 1
            '',
2409 1
            $str
2410
        );
2411
    }
2412
2413
    /**
2414
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2415
     *
2416
     * @see http://php.net/manual/en/function.htmlentities.php
2417
     *
2418
     * @param string $str           <p>
2419
     *                              The input string.
2420
     *                              </p>
2421
     * @param int    $flags         [optional] <p>
2422
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2423
     *                              invalid code unit sequences and the used document type. The default is
2424
     *                              ENT_COMPAT | ENT_HTML401.
2425
     *                              <table>
2426
     *                              Available <i>flags</i> constants
2427
     *                              <tr valign="top">
2428
     *                              <td>Constant Name</td>
2429
     *                              <td>Description</td>
2430
     *                              </tr>
2431
     *                              <tr valign="top">
2432
     *                              <td><b>ENT_COMPAT</b></td>
2433
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2434
     *                              </tr>
2435
     *                              <tr valign="top">
2436
     *                              <td><b>ENT_QUOTES</b></td>
2437
     *                              <td>Will convert both double and single quotes.</td>
2438
     *                              </tr>
2439
     *                              <tr valign="top">
2440
     *                              <td><b>ENT_NOQUOTES</b></td>
2441
     *                              <td>Will leave both double and single quotes unconverted.</td>
2442
     *                              </tr>
2443
     *                              <tr valign="top">
2444
     *                              <td><b>ENT_IGNORE</b></td>
2445
     *                              <td>
2446
     *                              Silently discard invalid code unit sequences instead of returning
2447
     *                              an empty string. Using this flag is discouraged as it
2448
     *                              may have security implications.
2449
     *                              </td>
2450
     *                              </tr>
2451
     *                              <tr valign="top">
2452
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2453
     *                              <td>
2454
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2455
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2456
     *                              </td>
2457
     *                              </tr>
2458
     *                              <tr valign="top">
2459
     *                              <td><b>ENT_DISALLOWED</b></td>
2460
     *                              <td>
2461
     *                              Replace invalid code points for the given document type with a
2462
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2463
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2464
     *                              instance, to ensure the well-formedness of XML documents with
2465
     *                              embedded external content.
2466
     *                              </td>
2467
     *                              </tr>
2468
     *                              <tr valign="top">
2469
     *                              <td><b>ENT_HTML401</b></td>
2470
     *                              <td>
2471
     *                              Handle code as HTML 4.01.
2472
     *                              </td>
2473
     *                              </tr>
2474
     *                              <tr valign="top">
2475
     *                              <td><b>ENT_XML1</b></td>
2476
     *                              <td>
2477
     *                              Handle code as XML 1.
2478
     *                              </td>
2479
     *                              </tr>
2480
     *                              <tr valign="top">
2481
     *                              <td><b>ENT_XHTML</b></td>
2482
     *                              <td>
2483
     *                              Handle code as XHTML.
2484
     *                              </td>
2485
     *                              </tr>
2486
     *                              <tr valign="top">
2487
     *                              <td><b>ENT_HTML5</b></td>
2488
     *                              <td>
2489
     *                              Handle code as HTML 5.
2490
     *                              </td>
2491
     *                              </tr>
2492
     *                              </table>
2493
     *                              </p>
2494
     * @param string $encoding      [optional] <p>
2495
     *                              Like <b>htmlspecialchars</b>,
2496
     *                              <b>htmlentities</b> takes an optional third argument
2497
     *                              <i>encoding</i> which defines encoding used in
2498
     *                              conversion.
2499
     *                              Although this argument is technically optional, you are highly
2500
     *                              encouraged to specify the correct value for your code.
2501
     *                              </p>
2502
     * @param bool   $double_encode [optional] <p>
2503
     *                              When <i>double_encode</i> is turned off PHP will not
2504
     *                              encode existing html entities. The default is to convert everything.
2505
     *                              </p>
2506
     *
2507
     * @return string
2508
     *                <p>
2509
     *                The encoded string.
2510
     *                <br><br>
2511
     *                If the input <i>string</i> contains an invalid code unit
2512
     *                sequence within the given <i>encoding</i> an empty string
2513
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2514
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2515
     *                </p>
2516
     */
2517 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2518
    {
2519 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2520 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2521
        }
2522
2523 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2524
2525
        /**
2526
         * PHP doesn't replace a backslash to its html entity since this is something
2527
         * that's mostly used to escape characters when inserting in a database. Since
2528
         * we're using a decent database layer, we don't need this shit and we're replacing
2529
         * the double backslashes by its' html entity equivalent.
2530
         *
2531
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2532
         */
2533 9
        $str = \str_replace('\\', '&#92;', $str);
2534
2535 9
        return self::html_encode($str, true, $encoding);
2536
    }
2537
2538
    /**
2539
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2540
     *
2541
     * INFO: Take a look at "UTF8::htmlentities()"
2542
     *
2543
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2544
     *
2545
     * @param string $str           <p>
2546
     *                              The string being converted.
2547
     *                              </p>
2548
     * @param int    $flags         [optional] <p>
2549
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2550
     *                              invalid code unit sequences and the used document type. The default is
2551
     *                              ENT_COMPAT | ENT_HTML401.
2552
     *                              <table>
2553
     *                              Available <i>flags</i> constants
2554
     *                              <tr valign="top">
2555
     *                              <td>Constant Name</td>
2556
     *                              <td>Description</td>
2557
     *                              </tr>
2558
     *                              <tr valign="top">
2559
     *                              <td><b>ENT_COMPAT</b></td>
2560
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2561
     *                              </tr>
2562
     *                              <tr valign="top">
2563
     *                              <td><b>ENT_QUOTES</b></td>
2564
     *                              <td>Will convert both double and single quotes.</td>
2565
     *                              </tr>
2566
     *                              <tr valign="top">
2567
     *                              <td><b>ENT_NOQUOTES</b></td>
2568
     *                              <td>Will leave both double and single quotes unconverted.</td>
2569
     *                              </tr>
2570
     *                              <tr valign="top">
2571
     *                              <td><b>ENT_IGNORE</b></td>
2572
     *                              <td>
2573
     *                              Silently discard invalid code unit sequences instead of returning
2574
     *                              an empty string. Using this flag is discouraged as it
2575
     *                              may have security implications.
2576
     *                              </td>
2577
     *                              </tr>
2578
     *                              <tr valign="top">
2579
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2580
     *                              <td>
2581
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2582
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2583
     *                              </td>
2584
     *                              </tr>
2585
     *                              <tr valign="top">
2586
     *                              <td><b>ENT_DISALLOWED</b></td>
2587
     *                              <td>
2588
     *                              Replace invalid code points for the given document type with a
2589
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2590
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2591
     *                              instance, to ensure the well-formedness of XML documents with
2592
     *                              embedded external content.
2593
     *                              </td>
2594
     *                              </tr>
2595
     *                              <tr valign="top">
2596
     *                              <td><b>ENT_HTML401</b></td>
2597
     *                              <td>
2598
     *                              Handle code as HTML 4.01.
2599
     *                              </td>
2600
     *                              </tr>
2601
     *                              <tr valign="top">
2602
     *                              <td><b>ENT_XML1</b></td>
2603
     *                              <td>
2604
     *                              Handle code as XML 1.
2605
     *                              </td>
2606
     *                              </tr>
2607
     *                              <tr valign="top">
2608
     *                              <td><b>ENT_XHTML</b></td>
2609
     *                              <td>
2610
     *                              Handle code as XHTML.
2611
     *                              </td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_HTML5</b></td>
2615
     *                              <td>
2616
     *                              Handle code as HTML 5.
2617
     *                              </td>
2618
     *                              </tr>
2619
     *                              </table>
2620
     *                              </p>
2621
     * @param string $encoding      [optional] <p>
2622
     *                              Defines encoding used in conversion.
2623
     *                              </p>
2624
     *                              <p>
2625
     *                              For the purposes of this function, the encodings
2626
     *                              ISO-8859-1, ISO-8859-15,
2627
     *                              UTF-8, cp866,
2628
     *                              cp1251, cp1252, and
2629
     *                              KOI8-R are effectively equivalent, provided the
2630
     *                              <i>string</i> itself is valid for the encoding, as
2631
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2632
     *                              the same positions in all of these encodings.
2633
     *                              </p>
2634
     * @param bool   $double_encode [optional] <p>
2635
     *                              When <i>double_encode</i> is turned off PHP will not
2636
     *                              encode existing html entities, the default is to convert everything.
2637
     *                              </p>
2638
     *
2639
     * @return string the converted string.
2640
     *                </p>
2641
     *                <p>
2642
     *                If the input <i>string</i> contains an invalid code unit
2643
     *                sequence within the given <i>encoding</i> an empty string
2644
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2645
     *                <b>ENT_SUBSTITUTE</b> flags are set
2646
     */
2647 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2648
    {
2649 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2650 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2651
        }
2652
2653 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2654
    }
2655
2656
    /**
2657
     * Checks whether iconv is available on the server.
2658
     *
2659
     * @return bool
2660
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2661
     */
2662
    public static function iconv_loaded(): bool
2663
    {
2664
        return \extension_loaded('iconv');
2665
    }
2666
2667
    /**
2668
     * alias for "UTF8::decimal_to_chr()"
2669
     *
2670
     * @see UTF8::decimal_to_chr()
2671
     *
2672
     * @param mixed $int
2673
     *
2674
     * @return string
2675
     */
2676 4
    public static function int_to_chr($int): string
2677
    {
2678 4
        return self::decimal_to_chr($int);
2679
    }
2680
2681
    /**
2682
     * Converts Integer to hexadecimal U+xxxx code point representation.
2683
     *
2684
     * INFO: opposite to UTF8::hex_to_int()
2685
     *
2686
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2687
     * @param string $pfix [optional]
2688
     *
2689
     * @return string the code point, or empty string on failure
2690
     */
2691 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2692
    {
2693 6
        $hex = \dechex($int);
2694
2695 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2696
2697 6
        return $pfix . $hex . '';
2698
    }
2699
2700
    /**
2701
     * Checks whether intl-char is available on the server.
2702
     *
2703
     * @return bool
2704
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2705
     */
2706
    public static function intlChar_loaded(): bool
2707
    {
2708
        return \class_exists('IntlChar');
2709
    }
2710
2711
    /**
2712
     * Checks whether intl is available on the server.
2713
     *
2714
     * @return bool
2715
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2716
     */
2717 5
    public static function intl_loaded(): bool
2718
    {
2719 5
        return \extension_loaded('intl');
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::is_ascii()"
2724
     *
2725
     * @see        UTF8::is_ascii()
2726
     *
2727
     * @param string $str
2728
     *
2729
     * @return bool
2730
     *
2731
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2732
     */
2733 2
    public static function isAscii(string $str): bool
2734
    {
2735 2
        return self::is_ascii($str);
2736
    }
2737
2738
    /**
2739
     * alias for "UTF8::is_base64()"
2740
     *
2741
     * @see        UTF8::is_base64()
2742
     *
2743
     * @param string $str
2744
     *
2745
     * @return bool
2746
     *
2747
     * @deprecated <p>use "UTF8::is_base64()"</p>
2748
     */
2749 2
    public static function isBase64($str): bool
2750
    {
2751 2
        return self::is_base64($str);
2752
    }
2753
2754
    /**
2755
     * alias for "UTF8::is_binary()"
2756
     *
2757
     * @see        UTF8::is_binary()
2758
     *
2759
     * @param mixed $str
2760
     * @param bool  $strict
2761
     *
2762
     * @return bool
2763
     *
2764
     * @deprecated <p>use "UTF8::is_binary()"</p>
2765
     */
2766 4
    public static function isBinary($str, $strict = false): bool
2767
    {
2768 4
        return self::is_binary($str, $strict);
2769
    }
2770
2771
    /**
2772
     * alias for "UTF8::is_bom()"
2773
     *
2774
     * @see        UTF8::is_bom()
2775
     *
2776
     * @param string $utf8_chr
2777
     *
2778
     * @return bool
2779
     *
2780
     * @deprecated <p>use "UTF8::is_bom()"</p>
2781
     */
2782 2
    public static function isBom(string $utf8_chr): bool
2783
    {
2784 2
        return self::is_bom($utf8_chr);
2785
    }
2786
2787
    /**
2788
     * alias for "UTF8::is_html()"
2789
     *
2790
     * @see        UTF8::is_html()
2791
     *
2792
     * @param string $str
2793
     *
2794
     * @return bool
2795
     *
2796
     * @deprecated <p>use "UTF8::is_html()"</p>
2797
     */
2798 2
    public static function isHtml(string $str): bool
2799
    {
2800 2
        return self::is_html($str);
2801
    }
2802
2803
    /**
2804
     * alias for "UTF8::is_json()"
2805
     *
2806
     * @see        UTF8::is_json()
2807
     *
2808
     * @param string $str
2809
     *
2810
     * @return bool
2811
     *
2812
     * @deprecated <p>use "UTF8::is_json()"</p>
2813
     */
2814
    public static function isJson(string $str): bool
2815
    {
2816
        return self::is_json($str);
2817
    }
2818
2819
    /**
2820
     * alias for "UTF8::is_utf16()"
2821
     *
2822
     * @see        UTF8::is_utf16()
2823
     *
2824
     * @param mixed $str
2825
     *
2826
     * @return false|int
2827
     *                   <strong>false</strong> if is't not UTF16,<br>
2828
     *                   <strong>1</strong> for UTF-16LE,<br>
2829
     *                   <strong>2</strong> for UTF-16BE
2830
     *
2831
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2832
     */
2833 2
    public static function isUtf16($str)
2834
    {
2835 2
        return self::is_utf16($str);
2836
    }
2837
2838
    /**
2839
     * alias for "UTF8::is_utf32()"
2840
     *
2841
     * @see        UTF8::is_utf32()
2842
     *
2843
     * @param mixed $str
2844
     *
2845
     * @return false|int
2846
     *                   <strong>false</strong> if is't not UTF16,
2847
     *                   <strong>1</strong> for UTF-32LE,
2848
     *                   <strong>2</strong> for UTF-32BE
2849
     *
2850
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2851
     */
2852 2
    public static function isUtf32($str)
2853
    {
2854 2
        return self::is_utf32($str);
2855
    }
2856
2857
    /**
2858
     * alias for "UTF8::is_utf8()"
2859
     *
2860
     * @see        UTF8::is_utf8()
2861
     *
2862
     * @param string $str
2863
     * @param bool   $strict
2864
     *
2865
     * @return bool
2866
     *
2867
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2868
     */
2869 17
    public static function isUtf8($str, $strict = false): bool
2870
    {
2871 17
        return self::is_utf8($str, $strict);
2872
    }
2873
2874
    /**
2875
     * Returns true if the string contains only alphabetic chars, false otherwise.
2876
     *
2877
     * @param string $str
2878
     *
2879
     * @return bool
2880
     *              Whether or not $str contains only alphabetic chars
2881
     */
2882 10
    public static function is_alpha(string $str): bool
2883
    {
2884 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2885
    }
2886
2887
    /**
2888
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2889
     *
2890
     * @param string $str
2891
     *
2892
     * @return bool
2893
     *              Whether or not $str contains only alphanumeric chars
2894
     */
2895 13
    public static function is_alphanumeric(string $str): bool
2896
    {
2897 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2898
    }
2899
2900
    /**
2901
     * Checks if a string is 7 bit ASCII.
2902
     *
2903
     * @param string $str <p>The string to check.</p>
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if it is ASCII<br>
2907
     *              <strong>false</strong> otherwise
2908
     */
2909 204
    public static function is_ascii(string $str): bool
2910
    {
2911 204
        if ($str === '') {
2912 10
            return true;
2913
        }
2914
2915 203
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2916
    }
2917
2918
    /**
2919
     * Returns true if the string is base64 encoded, false otherwise.
2920
     *
2921
     * @param mixed|string $str                <p>The input string.</p>
2922
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
2923
     *
2924
     * @return bool whether or not $str is base64 encoded
2925
     */
2926 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
2927
    {
2928 16
        if ($emptyStringIsValid === false && $str === '') {
2929 3
            return false;
2930
        }
2931
2932
        /**
2933
         * @psalm-suppress RedundantConditionGivenDocblockType
2934
         */
2935 15
        if (\is_string($str) === false) {
2936 2
            return false;
2937
        }
2938
2939 15
        $base64String = \base64_decode($str, true);
2940
2941 15
        return $base64String !== false && \base64_encode($base64String) === $str;
2942
    }
2943
2944
    /**
2945
     * Check if the input is binary... (is look like a hack).
2946
     *
2947
     * @param mixed $input
2948
     * @param bool  $strict
2949
     *
2950
     * @return bool
2951
     */
2952 39
    public static function is_binary($input, bool $strict = false): bool
2953
    {
2954 39
        $input = (string) $input;
2955 39
        if ($input === '') {
2956 10
            return false;
2957
        }
2958
2959 39
        if (\preg_match('~^[01]+$~', $input)) {
2960 12
            return true;
2961
        }
2962
2963 39
        $ext = self::get_file_type($input);
2964 39
        if ($ext['type'] === 'binary') {
2965 7
            return true;
2966
        }
2967
2968 36
        $testLength = self::strlen_in_byte($input);
2969 36
        if ($testLength) {
2970 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2971
                self::checkForSupport();
2972
            }
2973
2974 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2975
            if (
2976 36
                $testNull !== false
2977
                &&
2978 36
                ($testNull / $testLength) > 0.25
2979
            ) {
2980 12
                return true;
2981
            }
2982
        }
2983
2984 34
        if ($strict === true) {
2985 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2986
                self::checkForSupport();
2987
            }
2988
2989 34
            if (self::$SUPPORT['finfo'] === false) {
2990
                throw new \RuntimeException('ext-fileinfo: is not installed');
2991
            }
2992
2993
            /** @noinspection PhpComposerExtensionStubsInspection */
2994 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
2995 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
2996 14
                return true;
2997
            }
2998
        }
2999
3000 30
        return false;
3001
    }
3002
3003
    /**
3004
     * Check if the file is binary.
3005
     *
3006
     * @param string $file
3007
     *
3008
     * @return bool
3009
     */
3010 6
    public static function is_binary_file($file): bool
3011
    {
3012
        // init
3013 6
        $block = '';
3014
3015 6
        $fp = \fopen($file, 'rb');
3016 6
        if (\is_resource($fp)) {
3017 6
            $block = \fread($fp, 512);
3018 6
            \fclose($fp);
3019
        }
3020
3021 6
        if ($block === '') {
3022 2
            return false;
3023
        }
3024
3025 6
        return self::is_binary($block, true);
3026
    }
3027
3028
    /**
3029
     * Returns true if the string contains only whitespace chars, false otherwise.
3030
     *
3031
     * @param string $str
3032
     *
3033
     * @return bool
3034
     *              Whether or not $str contains only whitespace characters
3035
     */
3036 15
    public static function is_blank(string $str): bool
3037
    {
3038 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3039
    }
3040
3041
    /**
3042
     * Checks if the given string is equal to any "Byte Order Mark".
3043
     *
3044
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3045
     *
3046
     * @param string $str <p>The input string.</p>
3047
     *
3048
     * @return bool
3049
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3050
     */
3051 2
    public static function is_bom($str): bool
3052
    {
3053
        /** @noinspection PhpUnusedLocalVariableInspection */
3054 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3055 2
            if ($str === $bomString) {
3056 2
                return true;
3057
            }
3058
        }
3059
3060 2
        return false;
3061
    }
3062
3063
    /**
3064
     * Determine whether the string is considered to be empty.
3065
     *
3066
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3067
     * empty() does not generate a warning if the variable does not exist.
3068
     *
3069
     * @param mixed $str
3070
     *
3071
     * @return bool whether or not $str is empty()
3072
     */
3073
    public static function is_empty($str): bool
3074
    {
3075
        return empty($str);
3076
    }
3077
3078
    /**
3079
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3080
     *
3081
     * @param string $str
3082
     *
3083
     * @return bool
3084
     *              Whether or not $str contains only hexadecimal chars
3085
     */
3086 13
    public static function is_hexadecimal(string $str): bool
3087
    {
3088 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3089
    }
3090
3091
    /**
3092
     * Check if the string contains any html-tags <lall>.
3093
     *
3094
     * @param string $str <p>The input string.</p>
3095
     *
3096
     * @return bool
3097
     */
3098 3
    public static function is_html(string $str): bool
3099
    {
3100 3
        if ($str === '') {
3101 3
            return false;
3102
        }
3103
3104
        // init
3105 3
        $matches = [];
3106
3107 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3108
3109 3
        return \count($matches) !== 0;
3110
    }
3111
3112
    /**
3113
     * Try to check if "$str" is an json-string.
3114
     *
3115
     * @param string $str                              <p>The input string.</p>
3116
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3117
     *
3118
     * @return bool
3119 22
     */
3120
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3121 22
    {
3122 3
        if ($str === '') {
3123
            return false;
3124
        }
3125 21
3126
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3127
            self::checkForSupport();
3128
        }
3129 21
3130
        if (self::$SUPPORT['json'] === false) {
3131
            throw new \RuntimeException('ext-json: is not installed');
3132
        }
3133 21
3134
        $json = self::json_decode($str);
3135
        if ($json === null && \strtoupper($str) !== 'NULL') {
3136
            return false;
3137 21
        }
3138
3139 21
        if (
3140
            $onlyArrayOrObjectResultsAreValid === true
3141
            &&
3142 21
            \is_object($json) === false
3143
            &&
3144
            \is_array($json) === false
3145
        ) {
3146
            return false;
3147
        }
3148
3149
        /** @noinspection PhpComposerExtensionStubsInspection */
3150 8
        return \json_last_error() === \JSON_ERROR_NONE;
3151
    }
3152 8
3153
    /**
3154
     * @param string $str
3155
     *
3156
     * @return bool
3157
     */
3158
    public static function is_lowercase(string $str): bool
3159
    {
3160
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3161
    }
3162 7
3163
    /**
3164 7
     * Returns true if the string is serialized, false otherwise.
3165 1
     *
3166
     * @param string $str
3167
     *
3168
     * @return bool whether or not $str is serialized
3169
     */
3170 6
    public static function is_serialized(string $str): bool
3171
    {
3172 6
        if ($str === '') {
3173
            return false;
3174
        }
3175
3176
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3177
        /** @noinspection UnserializeExploitsInspection */
3178
        return $str === 'b:0;'
3179
               ||
3180
               @\unserialize($str) !== false;
3181
    }
3182
3183
    /**
3184 8
     * Returns true if the string contains only lower case chars, false
3185
     * otherwise.
3186 8
     *
3187
     * @param string $str <p>The input string.</p>
3188
     *
3189
     * @return bool
3190
     *              Whether or not $str contains only lower case characters
3191
     */
3192
    public static function is_uppercase(string $str): bool
3193
    {
3194
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3195
    }
3196
3197
    /**
3198
     * Check if the string is UTF-16.
3199
     *
3200 21
     * @param mixed $str                   <p>The input string.</p>
3201
     * @param bool  $checkIfStringIsBinary
3202
     *
3203 21
     * @return false|int
3204 21
     *                   <strong>false</strong> if is't not UTF-16,<br>
3205
     *                   <strong>1</strong> for UTF-16LE,<br>
3206
     *                   <strong>2</strong> for UTF-16BE
3207 21
     */
3208
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3209 21
    {
3210
        // init
3211 2
        $str = (string) $str;
3212
        $strChars = [];
3213
3214 21
        if (
3215 2
            $checkIfStringIsBinary === true
3216
            &&
3217
            self::is_binary($str, true) === false
3218 21
        ) {
3219
            return false;
3220 21
        }
3221 21
3222 21
        if (self::$SUPPORT['mbstring'] === false) {
3223 15
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3224 15
        }
3225 15
3226 15
        $str = self::remove_bom($str);
3227 15
3228
        $maybeUTF16LE = 0;
3229 15
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3230 15
        if ($test) {
3231 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3232
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3233
            if ($test3 === $test) {
3234 15
                if (\count($strChars) === 0) {
3235
                    $strChars = self::count_chars($str, true);
3236
                }
3237
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3238 21
                    if (\in_array($test3char, $strChars, true) === true) {
3239 21
                        ++$maybeUTF16LE;
3240 21
                    }
3241 15
                }
3242 15
                unset($test3charEmpty);
3243 15
            }
3244 15
        }
3245 7
3246
        $maybeUTF16BE = 0;
3247 15
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3248 15
        if ($test) {
3249 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3250
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3251
            if ($test3 === $test) {
3252 15
                if (\count($strChars) === 0) {
3253
                    $strChars = self::count_chars($str, true);
3254
                }
3255
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3256 21
                    if (\in_array($test3char, $strChars, true) === true) {
3257 6
                        ++$maybeUTF16BE;
3258 4
                    }
3259
                }
3260
                unset($test3charEmpty);
3261 6
            }
3262
        }
3263
3264 17
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3265
            if ($maybeUTF16LE > $maybeUTF16BE) {
3266
                return 1;
3267
            }
3268
3269
            return 2;
3270
        }
3271
3272
        return false;
3273
    }
3274
3275
    /**
3276
     * Check if the string is UTF-32.
3277
     *
3278 17
     * @param mixed $str                   <p>The input string.</p>
3279
     * @param bool  $checkIfStringIsBinary
3280
     *
3281 17
     * @return false|int
3282 17
     *                   <strong>false</strong> if is't not UTF-32,<br>
3283
     *                   <strong>1</strong> for UTF-32LE,<br>
3284
     *                   <strong>2</strong> for UTF-32BE
3285 17
     */
3286
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3287 17
    {
3288
        // init
3289 2
        $str = (string) $str;
3290
        $strChars = [];
3291
3292 17
        if (
3293 2
            $checkIfStringIsBinary === true
3294
            &&
3295
            self::is_binary($str, true) === false
3296 17
        ) {
3297
            return false;
3298 17
        }
3299 17
3300 17
        if (self::$SUPPORT['mbstring'] === false) {
3301 11
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3302 11
        }
3303 11
3304 11
        $str = self::remove_bom($str);
3305 11
3306
        $maybeUTF32LE = 0;
3307 11
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3308 11
        if ($test) {
3309 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3310
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3311
            if ($test3 === $test) {
3312 11
                if (\count($strChars) === 0) {
3313
                    $strChars = self::count_chars($str, true);
3314
                }
3315
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3316 17
                    if (\in_array($test3char, $strChars, true) === true) {
3317 17
                        ++$maybeUTF32LE;
3318 17
                    }
3319 11
                }
3320 11
                unset($test3charEmpty);
3321 11
            }
3322 11
        }
3323 7
3324
        $maybeUTF32BE = 0;
3325 11
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3326 11
        if ($test) {
3327 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3328
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3329
            if ($test3 === $test) {
3330 11
                if (\count($strChars) === 0) {
3331
                    $strChars = self::count_chars($str, true);
3332
                }
3333
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3334 17
                    if (\in_array($test3char, $strChars, true) === true) {
3335 2
                        ++$maybeUTF32BE;
3336 2
                    }
3337
                }
3338
                unset($test3charEmpty);
3339 2
            }
3340
        }
3341
3342 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3343
            if ($maybeUTF32LE > $maybeUTF32BE) {
3344
                return 1;
3345
            }
3346
3347
            return 2;
3348
        }
3349
3350
        return false;
3351
    }
3352
3353
    /**
3354
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3355 106
     *
3356
     * @see    http://hsivonen.iki.fi/php-utf8/
3357 106
     *
3358 2
     * @param string|string[] $str    <p>The string to be checked.</p>
3359 2
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3360 2
     *
3361
     * @return bool
3362
     */
3363
    public static function is_utf8($str, bool $strict = false): bool
3364
    {
3365
        if (\is_array($str) === true) {
3366
            foreach ($str as &$v) {
3367 106
                if (self::is_utf8($v, $strict) === false) {
3368 12
                    return false;
3369
                }
3370
            }
3371 102
3372 2
            return true;
3373
        }
3374 2
3375 2
        if ($str === '') {
3376
            return true;
3377
        }
3378
3379
        if ($strict === true) {
3380
            $isBinary = self::is_binary($str, true);
3381
3382
            if ($isBinary && self::is_utf16($str, false) !== false) {
3383 102
                return false;
3384
            }
3385
3386
            if ($isBinary && self::is_utf32($str, false) !== false) {
3387
                return false;
3388
            }
3389
        }
3390
3391
        if (self::pcre_utf8_support() !== true) {
3392 102
3393
            // If even just the first character can be matched, when the /u
3394 102
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3395 102
            // invalid, nothing at all will match, even if the string contains
3396
            // some valid sequences
3397 102
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3398
        }
3399
3400
        $mState = 0; // cached expected number of octets after the current octet
3401 102
        // until the beginning of the next UTF8 character sequence
3402
        $mUcs4 = 0; // cached Unicode character
3403
        $mBytes = 1; // cached expected number of octets in the current sequence
3404
3405 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3406
            self::checkForSupport();
3407 102
        }
3408 102
3409 102
        if (self::$ORD === null) {
3410
            self::$ORD = self::getData('ord');
3411
        }
3412 102
3413
        $len = self::strlen_in_byte((string) $str);
3414 98
        /** @noinspection ForeachInvariantsInspection */
3415 83
        for ($i = 0; $i < $len; ++$i) {
3416
            $in = self::$ORD[$str[$i]];
3417 74
            if ($mState === 0) {
3418 74
                // When mState is zero we expect either a US-ASCII character or a
3419 74
                // multi-octet sequence.
3420 74
                if ((0x80 & $in) === 0) {
3421 58
                    // US-ASCII, pass straight through.
3422
                    $mBytes = 1;
3423 41
                } elseif ((0xE0 & $in) === 0xC0) {
3424 41
                    // First octet of 2 octet sequence.
3425 41
                    $mUcs4 = $in;
3426 41
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3427 30
                    $mState = 1;
3428
                    $mBytes = 2;
3429 19
                } elseif ((0xF0 & $in) === 0xE0) {
3430 19
                    // First octet of 3 octet sequence.
3431 19
                    $mUcs4 = $in;
3432 19
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3433 13
                    $mState = 2;
3434
                    $mBytes = 3;
3435
                } elseif ((0xF8 & $in) === 0xF0) {
3436
                    // First octet of 4 octet sequence.
3437
                    $mUcs4 = $in;
3438
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3439
                    $mState = 3;
3440
                    $mBytes = 4;
3441
                } elseif ((0xFC & $in) === 0xF8) {
3442 5
                    /* First octet of 5 octet sequence.
3443 5
                     *
3444 5
                     * This is illegal because the encoded codepoint must be either
3445 5
                     * (a) not the shortest form or
3446 10
                     * (b) outside the Unicode range of 0-0x10FFFF.
3447
                     * Rather than trying to resynchronize, we will carry on until the end
3448 5
                     * of the sequence and let the later error handling code catch it.
3449 5
                     */
3450 5
                    $mUcs4 = $in;
3451 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3452
                    $mState = 4;
3453
                    $mBytes = 5;
3454
                } elseif ((0xFE & $in) === 0xFC) {
3455 102
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3456
                    $mUcs4 = $in;
3457
                    $mUcs4 = ($mUcs4 & 1) << 30;
3458
                    $mState = 5;
3459
                    $mBytes = 6;
3460 83
                } else {
3461
                    // Current octet is neither in the US-ASCII range nor a legal first
3462 75
                    // octet of a multi-octet sequence.
3463 75
                    return false;
3464 75
                }
3465 75
            } else {
3466
                // When mState is non-zero, we expect a continuation of the multi-octet
3467
                // sequence
3468 75
                if ((0xC0 & $in) === 0x80) {
3469
                    // Legal continuation.
3470
                    $shift = ($mState - 1) * 6;
3471
                    $tmp = $in;
3472
                    $tmp = ($tmp & 0x0000003F) << $shift;
3473 75
                    $mUcs4 |= $tmp;
3474
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3475 75
                    // Unicode code point to be output.
3476
                    if (--$mState === 0) {
3477 75
                        // Check for illegal sequences and code points.
3478
                        //
3479 75
                        // From Unicode 3.1, non-shortest form is illegal
3480
                        if (
3481
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3482 75
                            ||
3483
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3484
                            ||
3485 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3486
                            ||
3487 8
                            ($mBytes > 4)
3488
                            ||
3489
                            // From Unicode 3.2, surrogate characters are illegal.
3490 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3491 75
                            ||
3492 75
                            // Code points outside the Unicode range are illegal.
3493
                            ($mUcs4 > 0x10FFFF)
3494
                        ) {
3495
                            return false;
3496
                        }
3497 36
                        // initialize UTF8 cache
3498
                        $mState = 0;
3499
                        $mUcs4 = 0;
3500
                        $mBytes = 1;
3501
                    }
3502 66
                } else {
3503
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3504
                    // Incomplete multi-octet sequence.
3505
                    return false;
3506
                }
3507
            }
3508
        }
3509
3510
        return true;
3511
    }
3512
3513
    /**
3514
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3515
     * Decodes a JSON string
3516
     *
3517
     * @see http://php.net/manual/en/function.json-decode.php
3518
     *
3519
     * @param string $json    <p>
3520
     *                        The <i>json</i> string being decoded.
3521
     *                        </p>
3522
     *                        <p>
3523
     *                        This function only works with UTF-8 encoded strings.
3524
     *                        </p>
3525
     *                        <p>PHP implements a superset of
3526
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3527
     *                        only supports these values when they are nested inside an array or an object.
3528
     *                        </p>
3529
     * @param bool   $assoc   [optional] <p>
3530
     *                        When <b>TRUE</b>, returned objects will be converted into
3531
     *                        associative arrays.
3532
     *                        </p>
3533
     * @param int    $depth   [optional] <p>
3534
     *                        User specified recursion depth.
3535
     *                        </p>
3536
     * @param int    $options [optional] <p>
3537
     *                        Bitmask of JSON decode options. Currently only
3538
     *                        <b>JSON_BIGINT_AS_STRING</b>
3539
     *                        is supported (default is to cast large integers as floats)
3540 24
     *                        </p>
3541
     *
3542 24
     * @return mixed
3543
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3544 24
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3545
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3546
     *               is deeper than the recursion limit.
3547
     */
3548 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3549
    {
3550
        $json = self::filter($json);
3551
3552
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3553 24
            self::checkForSupport();
3554
        }
3555
3556
        if (self::$SUPPORT['json'] === false) {
3557
            throw new \RuntimeException('ext-json: is not installed');
3558
        }
3559
3560
        /** @noinspection PhpComposerExtensionStubsInspection */
3561
        return \json_decode($json, $assoc, $depth, $options);
3562
    }
3563
3564
    /**
3565
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3566
     * Returns the JSON representation of a value.
3567
     *
3568
     * @see http://php.net/manual/en/function.json-encode.php
3569
     *
3570
     * @param mixed $value   <p>
3571
     *                       The <i>value</i> being encoded. Can be any type except
3572
     *                       a resource.
3573
     *                       </p>
3574
     *                       <p>
3575
     *                       All string data must be UTF-8 encoded.
3576
     *                       </p>
3577
     *                       <p>PHP implements a superset of
3578
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3579
     *                       only supports these values when they are nested inside an array or an object.
3580
     *                       </p>
3581
     * @param int   $options [optional] <p>
3582
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3583
     *                       <b>JSON_HEX_TAG</b>,
3584
     *                       <b>JSON_HEX_AMP</b>,
3585
     *                       <b>JSON_HEX_APOS</b>,
3586
     *                       <b>JSON_NUMERIC_CHECK</b>,
3587
     *                       <b>JSON_PRETTY_PRINT</b>,
3588
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3589
     *                       <b>JSON_FORCE_OBJECT</b>,
3590
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3591
     *                       constants is described on
3592
     *                       the JSON constants page.
3593
     *                       </p>
3594 5
     * @param int   $depth   [optional] <p>
3595
     *                       Set the maximum depth. Must be greater than zero.
3596 5
     *                       </p>
3597
     *
3598 5
     * @return false|string
3599
     *                      A JSON encoded <strong>string</strong> on success or<br>
3600
     *                      <strong>FALSE</strong> on failure
3601
     */
3602 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3603
    {
3604
        $value = self::filter($value);
3605
3606
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3607 5
            self::checkForSupport();
3608
        }
3609
3610
        if (self::$SUPPORT['json'] === false) {
3611
            throw new \RuntimeException('ext-json: is not installed');
3612
        }
3613
3614
        /** @noinspection PhpComposerExtensionStubsInspection */
3615
        return \json_encode($value, $options, $depth);
3616
    }
3617
3618
    /**
3619
     * Checks whether JSON is available on the server.
3620
     *
3621
     * @return bool
3622
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3623
     */
3624
    public static function json_loaded(): bool
3625
    {
3626
        return \function_exists('json_decode');
3627
    }
3628
3629
    /**
3630
     * Makes string's first char lowercase.
3631
     *
3632 46
     * @param string      $str                   <p>The input string</p>
3633
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3634 46
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3635
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3636 46
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3637 46
     *
3638 46
     * @return string the resulting string
3639 46
     */
3640 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3641 46
    {
3642
        $strPartTwo = (string) self::substr($str, 1, null, $encoding, $cleanUtf8);
3643
3644 46
        $strPartOne = self::strtolower(
3645
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3646
            $encoding,
3647
            $cleanUtf8,
3648
            $lang,
3649
            $tryToKeepStringLength
3650
        );
3651
3652
        return $strPartOne . $strPartTwo;
3653
    }
3654
3655
    /**
3656
     * alias for "UTF8::lcfirst()"
3657
     *
3658
     * @see UTF8::lcfirst()
3659
     *
3660 2
     * @param string      $str
3661
     * @param string      $encoding
3662
     * @param bool        $cleanUtf8
3663
     * @param string|null $lang
3664
     * @param bool        $tryToKeepStringLength
3665
     *
3666
     * @return string
3667 2
     */
3668
    public static function lcword(
3669
        string $str,
3670
        string $encoding = 'UTF-8',
3671
        bool $cleanUtf8 = false,
3672
        string $lang = null,
3673
        bool $tryToKeepStringLength = false
3674
    ): string {
3675
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3676
    }
3677
3678
    /**
3679
     * Lowercase for all words in the string.
3680
     *
3681
     * @param string      $str                   <p>The input string.</p>
3682
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3683
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3684 2
     *                                           a new word.</p>
3685
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3686
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3687
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3688
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3689
     *
3690
     * @return string
3691
     */
3692
    public static function lcwords(
3693 2
        string $str,
3694 2
        array $exceptions = [],
3695
        string $charlist = '',
3696
        string $encoding = 'UTF-8',
3697 2
        bool $cleanUtf8 = false,
3698 2
        string $lang = null,
3699
        bool $tryToKeepStringLength = false
3700 2
    ): string {
3701 2
        if (!$str) {
3702 2
            return '';
3703
        }
3704
3705
        $words = self::str_to_words($str, $charlist);
3706 2
        $useExceptions = \count($exceptions) > 0;
3707
3708 2
        foreach ($words as &$word) {
3709
            if (!$word) {
3710 2
                continue;
3711
            }
3712
3713
            if (
3714 2
                $useExceptions === false
3715
                ||
3716
                !\in_array($word, $exceptions, true)
3717
            ) {
3718
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3719
            }
3720
        }
3721
3722
        return \implode('', $words);
3723
    }
3724
3725
    /**
3726
     * alias for "UTF8::lcfirst()"
3727
     *
3728
     * @see UTF8::lcfirst()
3729
     *
3730 5
     * @param string      $str
3731
     * @param string      $encoding
3732
     * @param bool        $cleanUtf8
3733
     * @param string|null $lang
3734
     * @param bool        $tryToKeepStringLength
3735
     *
3736
     * @return string
3737 5
     */
3738
    public static function lowerCaseFirst(
3739
        string $str,
3740
        string $encoding = 'UTF-8',
3741
        bool $cleanUtf8 = false,
3742
        string $lang = null,
3743
        bool $tryToKeepStringLength = false
3744
    ): string {
3745
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3746
    }
3747
3748 22
    /**
3749
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3750 22
     *
3751 3
     * @param string $str   <p>The string to be trimmed</p>
3752
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3753
     *
3754
     * @return string the string with unwanted characters stripped from the left
3755 21
     */
3756 14
    public static function ltrim(string $str = '', $chars = \INF): string
3757
    {
3758 10
        if ($str === '') {
3759 10
            return '';
3760
        }
3761
3762
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3763
        if ($chars === \INF || !$chars) {
3764
            $pattern = "^[\pZ\pC]+";
3765
        } else {
3766
            $chars = \preg_quote($chars, '/');
3767
            $pattern = "^[${chars}]+";
3768
        }
3769
3770
        return self::regex_replace($str, $pattern, '', '', '/');
3771
    }
3772
3773
    /**
3774 2
     * Returns the UTF-8 character with the maximum code point in the given data.
3775 2
     *
3776
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3777
     *
3778 2
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3779 2
     */
3780 2
    public static function max($arg)
3781
    {
3782
        if (\is_array($arg) === true) {
3783 2
            $arg = \implode('', $arg);
3784
        }
3785 2
3786
        $codepoints = self::codepoints($arg, false);
3787
        if (\count($codepoints) === 0) {
3788
            return null;
3789
        }
3790
3791
        $codepoint_max = \max($codepoints);
3792
3793
        return self::chr($codepoint_max);
3794
    }
3795
3796
    /**
3797
     * Calculates and returns the maximum number of bytes taken by any
3798 2
     * UTF-8 encoded character in the given string.
3799 2
     *
3800 2
     * @param string $str <p>The original Unicode string.</p>
3801
     *
3802
     * @return int max byte lengths of the given chars
3803 2
     */
3804
    public static function max_chr_width(string $str): int
3805
    {
3806
        $bytes = self::chr_size_list($str);
3807
        if (\count($bytes) > 0) {
3808
            return (int) \max($bytes);
3809
        }
3810
3811
        return 0;
3812
    }
3813
3814 27
    /**
3815 27
     * Checks whether mbstring is available on the server.
3816 27
     *
3817
     * @return bool
3818
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3819 27
     */
3820
    public static function mbstring_loaded(): bool
3821
    {
3822
        $return = \extension_loaded('mbstring');
3823
        if ($return === true) {
3824
            \mb_internal_encoding('UTF-8');
3825
        }
3826
3827
        return $return;
3828
    }
3829
3830
    /**
3831 2
     * Returns the UTF-8 character with the minimum code point in the given data.
3832 2
     *
3833
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3834
     *
3835 2
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3836 2
     */
3837 2
    public static function min($arg)
3838
    {
3839
        if (\is_array($arg) === true) {
3840 2
            $arg = \implode('', $arg);
3841
        }
3842 2
3843
        $codepoints = self::codepoints($arg, false);
3844
        if (\count($codepoints) === 0) {
3845
            return null;
3846
        }
3847
3848
        $codepoint_min = \min($codepoints);
3849
3850
        return self::chr($codepoint_min);
3851
    }
3852
3853
    /**
3854
     * alias for "UTF8::normalize_encoding()"
3855
     *
3856
     * @see        UTF8::normalize_encoding()
3857
     *
3858
     * @param mixed $encoding
3859 2
     * @param mixed $fallback
3860
     *
3861
     * @return mixed
3862
     *
3863
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3864
     */
3865
    public static function normalizeEncoding($encoding, $fallback = '')
3866
    {
3867
        return self::normalize_encoding($encoding, $fallback);
3868
    }
3869
3870
    /**
3871
     * Normalize the encoding-"name" input.
3872 354
     *
3873
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3874
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3875 354
     *
3876
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3877
     */
3878 354
    public static function normalize_encoding($encoding, $fallback = '')
3879
    {
3880 50
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3881
3882 354
        // init
3883
        $encoding = (string) $encoding;
3884 309
3885
        if (
3886
            !$encoding
3887
            ||
3888 49
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3889
            ||
3890 49
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3891
        ) {
3892 22
            return $fallback;
3893
        }
3894
3895
        if (
3896 42
            $encoding === 'UTF-8'
3897
            ||
3898 42
            $encoding === 'UTF8'
3899
        ) {
3900
            return 'UTF-8';
3901
        }
3902
3903
        if (
3904 42
            $encoding === '8BIT'
3905
            ||
3906 42
            $encoding === 'BINARY'
3907
        ) {
3908 2
            return 'CP850';
3909
        }
3910
3911 42
        if (
3912 40
            $encoding === 'HTML'
3913
            ||
3914
            $encoding === 'HTML-ENTITIES'
3915 6
        ) {
3916 1
            return 'HTML-ENTITIES';
3917
        }
3918
3919 6
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3920 4
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3921
        }
3922 4
3923
        if (self::$ENCODINGS === null) {
3924
            self::$ENCODINGS = self::getData('encodings');
3925 5
        }
3926 5
3927 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
3928
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3929
3930 5
            return $encoding;
3931
        }
3932
3933
        $encodingOrig = $encoding;
3934
        $encoding = \strtoupper($encoding);
3935
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3936
3937
        $equivalences = [
3938
            'ISO8859'     => 'ISO-8859-1',
3939
            'ISO88591'    => 'ISO-8859-1',
3940
            'ISO'         => 'ISO-8859-1',
3941
            'LATIN'       => 'ISO-8859-1',
3942
            'LATIN1'      => 'ISO-8859-1', // Western European
3943
            'ISO88592'    => 'ISO-8859-2',
3944
            'LATIN2'      => 'ISO-8859-2', // Central European
3945
            'ISO88593'    => 'ISO-8859-3',
3946
            'LATIN3'      => 'ISO-8859-3', // Southern European
3947
            'ISO88594'    => 'ISO-8859-4',
3948
            'LATIN4'      => 'ISO-8859-4', // Northern European
3949
            'ISO88595'    => 'ISO-8859-5',
3950
            'ISO88596'    => 'ISO-8859-6', // Greek
3951
            'ISO88597'    => 'ISO-8859-7',
3952
            'ISO88598'    => 'ISO-8859-8', // Hebrew
3953
            'ISO88599'    => 'ISO-8859-9',
3954
            'LATIN5'      => 'ISO-8859-9', // Turkish
3955
            'ISO885911'   => 'ISO-8859-11',
3956
            'TIS620'      => 'ISO-8859-11', // Thai
3957
            'ISO885910'   => 'ISO-8859-10',
3958
            'LATIN6'      => 'ISO-8859-10', // Nordic
3959
            'ISO885913'   => 'ISO-8859-13',
3960
            'LATIN7'      => 'ISO-8859-13', // Baltic
3961
            'ISO885914'   => 'ISO-8859-14',
3962
            'LATIN8'      => 'ISO-8859-14', // Celtic
3963
            'ISO885915'   => 'ISO-8859-15',
3964
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3965
            'ISO885916'   => 'ISO-8859-16',
3966
            'LATIN10'     => 'ISO-8859-16', // Southeast European
3967
            'CP1250'      => 'WINDOWS-1250',
3968
            'WIN1250'     => 'WINDOWS-1250',
3969
            'WINDOWS1250' => 'WINDOWS-1250',
3970
            'CP1251'      => 'WINDOWS-1251',
3971
            'WIN1251'     => 'WINDOWS-1251',
3972
            'WINDOWS1251' => 'WINDOWS-1251',
3973
            'CP1252'      => 'WINDOWS-1252',
3974
            'WIN1252'     => 'WINDOWS-1252',
3975
            'WINDOWS1252' => 'WINDOWS-1252',
3976
            'CP1253'      => 'WINDOWS-1253',
3977
            'WIN1253'     => 'WINDOWS-1253',
3978
            'WINDOWS1253' => 'WINDOWS-1253',
3979
            'CP1254'      => 'WINDOWS-1254',
3980
            'WIN1254'     => 'WINDOWS-1254',
3981
            'WINDOWS1254' => 'WINDOWS-1254',
3982
            'CP1255'      => 'WINDOWS-1255',
3983
            'WIN1255'     => 'WINDOWS-1255',
3984
            'WINDOWS1255' => 'WINDOWS-1255',
3985
            'CP1256'      => 'WINDOWS-1256',
3986
            'WIN1256'     => 'WINDOWS-1256',
3987
            'WINDOWS1256' => 'WINDOWS-1256',
3988
            'CP1257'      => 'WINDOWS-1257',
3989
            'WIN1257'     => 'WINDOWS-1257',
3990
            'WINDOWS1257' => 'WINDOWS-1257',
3991
            'CP1258'      => 'WINDOWS-1258',
3992
            'WIN1258'     => 'WINDOWS-1258',
3993
            'WINDOWS1258' => 'WINDOWS-1258',
3994
            'UTF16'       => 'UTF-16',
3995 5
            'UTF32'       => 'UTF-32',
3996 4
            'UTF8'        => 'UTF-8',
3997
            'UTF'         => 'UTF-8',
3998
            'UTF7'        => 'UTF-7',
3999 5
            '8BIT'        => 'CP850',
4000
            'BINARY'      => 'CP850',
4001 5
        ];
4002
4003
        if (!empty($equivalences[$encodingUpperHelper])) {
4004
            $encoding = $equivalences[$encodingUpperHelper];
4005
        }
4006
4007
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4008
4009
        return $encoding;
4010
    }
4011
4012
    /**
4013 5
     * Standardize line ending to unix-like.
4014
     *
4015
     * @param string $str
4016
     *
4017
     * @return string
4018
     */
4019
    public static function normalize_line_ending(string $str): string
4020
    {
4021
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
4022
    }
4023
4024
    /**
4025 38
     * Normalize some MS Word special characters.
4026 2
     *
4027
     * @param string $str <p>The string to be normalized.</p>
4028
     *
4029 38
     * @return string
4030 38
     */
4031
    public static function normalize_msword(string $str): string
4032 38
    {
4033 1
        if ($str === '') {
4034 1
            return '';
4035
        }
4036
4037 1
        static $UTF8_MSWORD_KEYS_CACHE = null;
4038 1
        static $UTF8_MSWORD_VALUES_CACHE = null;
4039
4040
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4041 38
            if (self::$UTF8_MSWORD === null) {
4042
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4043
            }
4044
4045
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4046
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4047
        }
4048
4049
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4050
    }
4051
4052
    /**
4053
     * Normalize the whitespace.
4054
     *
4055
     * @param string $str                     <p>The string to be normalized.</p>
4056 86
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4057 9
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4058
     *                                        bidirectional text chars.</p>
4059
     *
4060 86
     * @return string
4061 86
     */
4062
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4063 86
    {
4064 2
        if ($str === '') {
4065
            return '';
4066 2
        }
4067 1
4068
        static $WHITESPACE_CACHE = [];
4069
        $cacheKey = (int) $keepNonBreakingSpace;
4070 2
4071
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4072
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4073 86
4074 86
            if ($keepNonBreakingSpace === true) {
4075
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4076 86
            }
4077 1
4078
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4079
        }
4080 86
4081
        if ($keepBidiUnicodeControls === false) {
4082
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4083 86
4084
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4085
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4086
            }
4087
4088
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4089
        }
4090
4091
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4092
    }
4093
4094
    /**
4095
     * Calculates Unicode code point of the given UTF-8 encoded character.
4096
     *
4097
     * INFO: opposite to UTF8::chr()
4098
     *
4099
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4100
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4101 30
     *
4102
     * @return int
4103 30
     *             Unicode code point of the given character,<br>
4104
     *             0 on invalid UTF-8 byte sequence
4105
     */
4106 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4107
    {
4108 30
        // init
4109 5
        $chr = (string) $chr;
4110
4111
        static $CHAR_CACHE = [];
4112 30
4113 30
        // save the original string
4114 23
        $chr_orig = $chr;
4115
4116
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4117 25
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4118
        }
4119
4120
        $cacheKey = $chr_orig . $encoding;
4121 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4122 25
            return $CHAR_CACHE[$cacheKey];
4123
        }
4124
4125
        if (self::$ORD === null) {
4126 7
            self::$ORD = self::getData('ord');
4127 1
        }
4128
4129
        if (isset(self::$ORD[$chr])) {
4130 7
            return self::$ORD[$chr];
4131
        }
4132
4133
        // check again, if it's still not UTF-8
4134 7
        if ($encoding !== 'UTF-8') {
4135
            $chr = self::encode($encoding, $chr);
4136 6
        }
4137 6
4138 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4139
            self::checkForSupport();
4140
        }
4141
4142
        if (self::$SUPPORT['intlChar'] === true) {
4143 2
            /** @noinspection PhpComposerExtensionStubsInspection */
4144 2
            $code = \IntlChar::ord($chr);
4145
            if ($code) {
4146 2
                return $CHAR_CACHE[$cacheKey] = $code;
4147
            }
4148
        }
4149
4150
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4151 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4152
        $code = $chr ? $chr[1] : 0;
4153 1
4154
        if ($code >= 0xF0 && isset($chr[4])) {
4155
            /** @noinspection UnnecessaryCastingInspection */
4156 2
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4157
        }
4158 1
4159
        if ($code >= 0xE0 && isset($chr[3])) {
4160
            /** @noinspection UnnecessaryCastingInspection */
4161 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4162
        }
4163
4164
        if ($code >= 0xC0 && isset($chr[2])) {
4165
            /** @noinspection UnnecessaryCastingInspection */
4166
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4167
        }
4168
4169
        return $CHAR_CACHE[$cacheKey] = $code;
4170
    }
4171
4172
    /**
4173
     * Parses the string into an array (into the the second parameter).
4174
     *
4175
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4176
     *          if the second parameter is not set!
4177
     *
4178
     * @see http://php.net/manual/en/function.parse-str.php
4179
     *
4180
     * @param string $str       <p>The input string.</p>
4181 2
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4182 2
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4183
     *
4184
     * @return bool
4185 2
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4186
     */
4187
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4188
    {
4189 2
        if ($cleanUtf8 === true) {
4190 2
            $str = self::clean($str);
4191
        }
4192 2
4193
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4194
            self::checkForSupport();
4195
        }
4196
4197
        if (self::$SUPPORT['mbstring'] === true) {
4198
            $return = \mb_parse_str($str, $result);
4199
4200
            return $return !== false && !empty($result);
4201
        }
4202
4203
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4204
        \parse_str($str, $result);
4205
4206
        return !empty($result);
4207
    }
4208
4209
    /**
4210
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4211 102
     *
4212
     * @return bool
4213
     *              <strong>true</strong> if support is available,<br>
4214
     *              <strong>false</strong> otherwise
4215
     */
4216
    public static function pcre_utf8_support(): bool
4217
    {
4218
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4219
        return (bool) @\preg_match('//u', '');
4220
    }
4221
4222
    /**
4223
     * Create an array containing a range of UTF-8 characters.
4224 2
     *
4225 2
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4226
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4227
     *
4228 2
     * @return string[]
4229
     */
4230
    public static function range($var1, $var2): array
4231
    {
4232 2
        if (!$var1 || !$var2) {
4233
            return [];
4234
        }
4235
4236
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4237 2
            self::checkForSupport();
4238 2
        }
4239 2
4240
        if (self::$SUPPORT['ctype'] === false) {
4241
            throw new \RuntimeException('ext-ctype: is not installed');
4242 2
        }
4243
4244
        /** @noinspection PhpComposerExtensionStubsInspection */
4245 2
        if (\ctype_digit((string) $var1)) {
4246
            $start = (int) $var1;
4247
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4248
            $start = (int) self::hex_to_int($var1);
4249
        } else {
4250 2
            $start = self::ord($var1);
4251 2
        }
4252 2
4253
        if (!$start) {
4254
            return [];
4255 2
        }
4256
4257
        /** @noinspection PhpComposerExtensionStubsInspection */
4258 2
        if (\ctype_digit((string) $var2)) {
4259
            $end = (int) $var2;
4260
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4261
            $end = (int) self::hex_to_int($var2);
4262 2
        } else {
4263
            $end = self::ord($var2);
4264 2
        }
4265 2
4266 2
        if (!$end) {
4267
            return [];
4268
        }
4269
4270
        return \array_map(
4271
            static function (int $i): string {
4272
                return (string) self::chr($i);
4273
            },
4274
            \range($start, $end)
4275
        );
4276
    }
4277
4278
    /**
4279
     * Multi decode html entity & fix urlencoded-win1252-chars.
4280
     *
4281
     * e.g:
4282
     * 'test+test'                     => 'test+test'
4283
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4284
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4285
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4286
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4287
     * 'Düsseldorf'                   => 'Düsseldorf'
4288
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4289
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4290
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4291 3
     *
4292 2
     * @param string $str          <p>The input string.</p>
4293
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4294
     *
4295 3
     * @return string
4296 3
     */
4297 2
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4298
    {
4299
        if ($str === '') {
4300 3
            return '';
4301
        }
4302 3
4303
        $pattern = '/%u([0-9a-f]{3,4})/i';
4304 3
        if (\preg_match($pattern, $str)) {
4305
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4306
        }
4307
4308
        $flags = \ENT_QUOTES | \ENT_HTML5;
4309 3
4310 3
        if ($multi_decode === true) {
4311 3
            do {
4312 3
                $str_compare = $str;
4313 3
4314
                /**
4315
                 * @psalm-suppress PossiblyInvalidArgument
4316
                 */
4317 3
                $str = self::fix_simple_utf8(
4318
                    \rawurldecode(
4319
                        self::html_entity_decode(
4320 3
                            self::to_utf8($str),
4321
                            $flags
4322
                        )
4323
                    )
4324
                );
4325
            } while ($str_compare !== $str);
4326
        }
4327
4328
        return $str;
4329
    }
4330
4331
    /**
4332
     * Replaces all occurrences of $pattern in $str by $replacement.
4333
     *
4334
     * @param string $str         <p>The input string.</p>
4335
     * @param string $pattern     <p>The regular expression pattern.</p>
4336 259
     * @param string $replacement <p>The string to replace with.</p>
4337 9
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4338
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4339
     *
4340
     * @return string
4341 259
     */
4342
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4343
    {
4344
        if ($options === 'msr') {
4345 259
            $options = 'ms';
4346 259
        }
4347 259
4348 259
        // fallback
4349
        if (!$delimiter) {
4350
            $delimiter = '/';
4351
        }
4352
4353
        return (string) \preg_replace(
4354
            $delimiter . $pattern . $delimiter . 'u' . $options,
4355
            $replacement,
4356
            $str
4357
        );
4358
    }
4359
4360
    /**
4361
     * alias for "UTF8::remove_bom()"
4362
     *
4363
     * @see        UTF8::remove_bom()
4364
     *
4365
     * @param string $str
4366
     *
4367
     * @return string
4368
     *
4369
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4370
     */
4371
    public static function removeBOM(string $str): string
4372
    {
4373
        return self::remove_bom($str);
4374
    }
4375
4376
    /**
4377 79
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4378 7
     *
4379
     * @param string $str <p>The input string.</p>
4380
     *
4381 79
     * @return string string without UTF-BOM
4382 79
     */
4383 79
    public static function remove_bom(string $str): string
4384 10
    {
4385 10
        if ($str === '') {
4386
            return '';
4387
        }
4388
4389 10
        $strLength = self::strlen_in_byte($str);
4390
        foreach (self::$BOM as $bomString => $bomByteLength) {
4391 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4392
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4393
                if ($strTmp === false) {
4394
                    return '';
4395 79
                }
4396
4397
                $strLength -= (int) $bomByteLength;
4398
4399
                $str = (string) $strTmp;
4400
            }
4401
        }
4402
4403
        return $str;
4404
    }
4405
4406
    /**
4407
     * Removes duplicate occurrences of a string in another string.
4408 2
     *
4409 2
     * @param string          $str  <p>The base string.</p>
4410
     * @param string|string[] $what <p>String to search for in the base string.</p>
4411
     *
4412 2
     * @return string the result string with removed duplicates
4413
     */
4414 2
    public static function remove_duplicates(string $str, $what = ' '): string
4415 2
    {
4416
        if (\is_string($what) === true) {
4417
            $what = [$what];
4418
        }
4419 2
4420
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4421
            /** @noinspection ForeachSourceInspection */
4422
            foreach ($what as $item) {
4423
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4424
            }
4425
        }
4426
4427
        return $str;
4428
    }
4429
4430
    /**
4431
     * Remove html via "strip_tags()" from the string.
4432
     *
4433
     * @param string $str
4434 6
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4435
     *                              not be stripped. Default: null
4436
     *                              </p>
4437
     *
4438
     * @return string
4439
     */
4440
    public static function remove_html(string $str, string $allowableTags = ''): string
4441
    {
4442
        return \strip_tags($str, $allowableTags);
4443
    }
4444
4445
    /**
4446
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4447 6
     *
4448
     * @param string $str
4449
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4450
     *
4451
     * @return string
4452
     */
4453
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4454
    {
4455
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4456
    }
4457
4458
    /**
4459
     * Remove invisible characters from a string.
4460
     *
4461
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4462
     *
4463
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4464
     *
4465
     * @param string $str
4466 113
     * @param bool   $url_encoded
4467
     * @param string $replacement
4468
     *
4469
     * @return string
4470 113
     */
4471 113
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4472 113
    {
4473
        // init
4474
        $non_displayables = [];
4475 113
4476
        // every control character except newline (dec 10),
4477
        // carriage return (dec 13) and horizontal tab (dec 09)
4478 113
        if ($url_encoded) {
4479 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4480
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4481 113
        }
4482
4483
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4484
4485
        do {
4486
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4487
        } while ($count !== 0);
4488
4489
        return $str;
4490
    }
4491
4492
    /**
4493
     * Returns a new string with the prefix $substring removed, if present.
4494
     *
4495 12
     * @param string $str
4496 6
     * @param string $substring <p>The prefix to remove.</p>
4497 6
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4498 6
     *
4499 6
     * @return string string without the prefix $substring
4500 6
     */
4501
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4502
    {
4503
        if (self::str_starts_with($str, $substring)) {
4504 6
            return (string) self::substr(
4505
                $str,
4506
                (int) self::strlen($substring, $encoding),
4507
                null,
4508
                $encoding
4509
            );
4510
        }
4511
4512
        return $str;
4513
    }
4514
4515
    /**
4516
     * Returns a new string with the suffix $substring removed, if present.
4517
     *
4518 12
     * @param string $str
4519 6
     * @param string $substring <p>The suffix to remove.</p>
4520 6
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4521 6
     *
4522 6
     * @return string string having a $str without the suffix $substring
4523
     */
4524
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4525
    {
4526 6
        if (self::str_ends_with($str, $substring)) {
4527
            return (string) self::substr(
4528
                $str,
4529
                0,
4530
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding)
4531
            );
4532
        }
4533
4534
        return $str;
4535
    }
4536
4537
    /**
4538
     * Replaces all occurrences of $search in $str by $replacement.
4539
     *
4540
     * @param string $str           <p>The input string.</p>
4541 29
     * @param string $search        <p>The needle to search for.</p>
4542 22
     * @param string $replacement   <p>The string to replace with.</p>
4543
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4544
     *
4545 7
     * @return string string after the replacements
4546
     */
4547
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4548
    {
4549
        if ($caseSensitive) {
4550
            return self::str_replace($search, $replacement, $str);
4551
        }
4552
4553
        return self::str_ireplace($search, $replacement, $str);
4554
    }
4555
4556
    /**
4557
     * Replaces all occurrences of $search in $str by $replacement.
4558
     *
4559
     * @param string       $str           <p>The input string.</p>
4560 30
     * @param array        $search        <p>The elements to search for.</p>
4561 23
     * @param array|string $replacement   <p>The string to replace with.</p>
4562
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4563
     *
4564 7
     * @return string string after the replacements
4565
     */
4566
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4567
    {
4568
        if ($caseSensitive) {
4569
            return self::str_replace($search, $replacement, $str);
4570
        }
4571
4572
        return self::str_ireplace($search, $replacement, $str);
4573
    }
4574
4575
    /**
4576
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4577
     *
4578 62
     * @param string $str                <p>The input string</p>
4579 9
     * @param string $replacementChar    <p>The replacement character.</p>
4580
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4581
     *
4582 62
     * @return string
4583 62
     */
4584 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4585 62
    {
4586
        if ($str === '') {
4587
            return '';
4588 62
        }
4589
4590
        if ($processInvalidUtf8 === true) {
4591
            $replacementCharHelper = $replacementChar;
4592 62
            if ($replacementChar === '') {
4593
                $replacementCharHelper = 'none';
4594
            }
4595
4596
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4597
                self::checkForSupport();
4598
            }
4599 62
4600 62
            if (self::$SUPPORT['mbstring'] === false) {
4601 62
                // if there is no native support for "mbstring",
4602 62
                // then we need to clean the string before ...
4603
                $str = self::clean($str);
4604
            }
4605
4606 62
            // always fallback via symfony polyfill
4607
            $save = \mb_substitute_character();
4608
            \mb_substitute_character($replacementCharHelper);
4609 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4610
            \mb_substitute_character($save);
4611 62
4612
            // the polyfill maybe return false
4613
            /** @psalm-suppress RedundantCondition */
4614
            $str = \is_string($strTmp) ? $strTmp : '';
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4615 62
        }
4616 62
4617
        return \str_replace(
4618 62
            [
4619
                "\xEF\xBF\xBD",
4620
                '�',
4621
            ],
4622
            [
4623
                $replacementChar,
4624
                $replacementChar,
4625
            ],
4626
            $str
4627
        );
4628
    }
4629
4630
    /**
4631
     * Strip whitespace or other characters from end of a UTF-8 string.
4632 22
     *
4633 3
     * @param string $str   <p>The string to be trimmed.</p>
4634
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4635
     *
4636
     * @return string the string with unwanted characters stripped from the right
4637 21
     */
4638 16
    public static function rtrim(string $str = '', $chars = \INF): string
4639
    {
4640 8
        if ($str === '') {
4641 8
            return '';
4642
        }
4643
4644 21
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4645
        if ($chars === \INF || !$chars) {
4646
            $pattern = "[\pZ\pC]+\$";
4647
        } else {
4648
            $chars = \preg_quote($chars, '/');
4649
            $pattern = "[${chars}]+\$";
4650
        }
4651
4652 2
        return self::regex_replace($str, $pattern, '', '', '/');
4653
    }
4654
4655
    /**
4656 2
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4657 2
     */
4658 2
    public static function showSupport()
4659
    {
4660 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4661 2
            self::checkForSupport();
4662 2
        }
4663
4664
        echo '<pre>';
4665
        foreach (self::$SUPPORT as $key => &$value) {
4666
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4667
        }
4668
        unset($value);
4669
        echo '</pre>';
4670
    }
4671
4672
    /**
4673
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4674
     *
4675 2
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4676 2
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4677
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4678
     *
4679
     * @return string the HTML numbered entity
4680 2
     */
4681
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4682 2
    {
4683
        if ($char === '') {
4684 2
            return '';
4685
        }
4686
4687 2
        if (
4688 2
            $keepAsciiChars === true
4689
            &&
4690
            self::is_ascii($char) === true
4691 2
        ) {
4692
            return $char;
4693
        }
4694
4695
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4696
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4697
        }
4698
4699
        return '&#' . self::ord($char, $encoding) . ';';
4700
    }
4701
4702 5
    /**
4703
     * @param string $str
4704
     * @param int    $tabLength
4705
     *
4706
     * @return string
4707
     */
4708
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4709
    {
4710
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4711
    }
4712
4713
    /**
4714
     * Convert a string to an array of Unicode characters.
4715
     *
4716
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4717 87
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4718 3
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4719
     *
4720
     * @return array
4721 86
     *               <p>An array containing chunks of the input.</p>
4722 2
     */
4723 2
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4724
    {
4725
        if ($length <= 0) {
4726 2
            return [];
4727
        }
4728
4729
        if (\is_array($str) === true) {
4730 86
            foreach ($str as $k => &$v) {
4731
                $v = self::split($v, $length);
4732 86
            }
4733 13
4734
            return $str;
4735
        }
4736
4737 83
        // init
4738
        $str = (string) $str;
4739 83
4740
        if ($str === '') {
4741
            return [];
4742
        }
4743 83
4744 19
        // init
4745
        $ret = [];
4746
4747 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4748 79
            self::checkForSupport();
4749 79
        }
4750 79
4751
        if ($cleanUtf8 === true) {
4752 79
            $str = self::clean($str);
4753
        }
4754
4755
        if (self::$SUPPORT['pcre_utf8'] === true) {
4756
            \preg_match_all('/./us', $str, $retArray);
4757 8
            if (isset($retArray[0])) {
4758
                $ret = $retArray[0];
4759
            }
4760
            unset($retArray);
4761 8
        } else {
4762
4763
            // fallback
4764 8
4765 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4766 8
                self::checkForSupport();
4767
            }
4768 8
4769
            $len = self::strlen_in_byte($str);
4770 8
4771
            /** @noinspection ForeachInvariantsInspection */
4772 4
            for ($i = 0; $i < $len; ++$i) {
4773 4
                if (($str[$i] & "\x80") === "\x00") {
4774
                    $ret[] = $str[$i];
4775 4
                } elseif (
4776
                    isset($str[$i + 1])
4777
                    &&
4778 6
                    ($str[$i] & "\xE0") === "\xC0"
4779
                ) {
4780 6
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4781
                        $ret[] = $str[$i] . $str[$i + 1];
4782
4783 6
                        ++$i;
4784
                    }
4785 6
                } elseif (
4786
                    isset($str[$i + 2])
4787 6
                    &&
4788
                    ($str[$i] & "\xF0") === "\xE0"
4789 6
                ) {
4790
                    if (
4791
                        ($str[$i + 1] & "\xC0") === "\x80"
4792
                        &&
4793
                        ($str[$i + 2] & "\xC0") === "\x80"
4794
                    ) {
4795
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4796
4797
                        $i += 2;
4798
                    }
4799
                } elseif (
4800
                    isset($str[$i + 3])
4801
                    &&
4802
                    ($str[$i] & "\xF8") === "\xF0"
4803
                ) {
4804
                    if (
4805
                        ($str[$i + 1] & "\xC0") === "\x80"
4806
                        &&
4807
                        ($str[$i + 2] & "\xC0") === "\x80"
4808
                        &&
4809
                        ($str[$i + 3] & "\xC0") === "\x80"
4810
                    ) {
4811 83
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4812 11
4813
                        $i += 3;
4814 11
                    }
4815
                }
4816 11
            }
4817 11
        }
4818 11
4819
        if ($length > 1) {
4820
            $ret = \array_chunk($ret, $length);
4821
4822 76
            return \array_map(
4823
                static function (array $item): string {
4824
                    return \implode('', $item);
4825
                },
4826 76
                $ret
4827
            );
4828
        }
4829
4830
        if (isset($ret[0]) && $ret[0] === '') {
4831
            return [];
4832
        }
4833
4834
        return $ret;
4835
    }
4836
4837
    /**
4838
     * Returns a camelCase version of the string. Trims surrounding spaces,
4839
     * capitalizes letters following digits, spaces, dashes and underscores,
4840
     * and removes spaces, dashes, as well as underscores.
4841
     *
4842
     * @param string      $str                   <p>The input string.</p>
4843
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
4844 32
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4845 32
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4846
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4847 32
     *
4848 32
     * @return string
4849
     */
4850
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
4851
    {
4852
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4853
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4854
4855 27
        $str = (string) \preg_replace_callback(
4856 27
            '/[-_\s]+(.)?/u',
4857
            /**
4858
             * @param array $match
4859 1
             *
4860 32
             * @return string
4861 32
             */
4862
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4863
                if (isset($match[1])) {
4864 32
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4865 32
                }
4866
4867
                return '';
4868
            },
4869
            $str
4870
        );
4871
4872 6
        return (string) \preg_replace_callback(
4873 32
            '/[\d]+(.)?/u',
4874 32
            /**
4875
             * @param array $match
4876
             *
4877
             * @return string
4878
             */
4879
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4880
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4881
            },
4882
            $str
4883
        );
4884
    }
4885
4886
    /**
4887
     * Returns the string with the first letter of each word capitalized,
4888 1
     * except for when the word is a name which shouldn't be capitalized.
4889
     *
4890 1
     * @param string $str
4891
     *
4892 1
     * @return string string with $str capitalized
4893
     */
4894
    public static function str_capitalize_name(string $str): string
4895
    {
4896
        $str = self::collapse_whitespace($str);
4897
4898
        $str = self::str_capitalize_name_helper($str, ' ');
4899
4900
        return self::str_capitalize_name_helper($str, '-');
4901
    }
4902
4903
    /**
4904
     * Returns true if the string contains $needle, false otherwise. By default
4905
     * the comparison is case-sensitive, but can be made insensitive by setting
4906
     * $caseSensitive to false.
4907
     *
4908
     * @param string $haystack      <p>The input string.</p>
4909 106
     * @param string $needle        <p>Substring to look for.</p>
4910 1
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4911
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4912
     *
4913
     * @return bool whether or not $haystack contains $needle
4914
     */
4915 105
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4916 2
    {
4917
        if ($haystack === '' || $needle === '') {
4918
            return false;
4919 105
        }
4920 55
4921
        // only a fallback to prevent BC in the api ...
4922
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4923 50
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4924
            $encoding = (string) $caseSensitive;
4925
        }
4926
4927
        if ($caseSensitive) {
4928
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
4929
        }
4930
4931
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
4932
    }
4933
4934
    /**
4935
     * Returns true if the string contains all $needles, false otherwise. By
4936
     * default the comparison is case-sensitive, but can be made insensitive by
4937
     * setting $caseSensitive to false.
4938
     *
4939
     * @param string $haystack      <p>The input string.</p>
4940 44
     * @param array  $needles       <p>SubStrings to look for.</p>
4941
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4942
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4943
     *
4944 44
     * @return bool whether or not $haystack contains $needle
4945 1
     */
4946
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4947
    {
4948
        if ($haystack === '') {
4949
            return false;
4950 43
        }
4951 1
4952
        if (empty($needles)) {
4953
            return false;
4954 43
        }
4955 43
4956 43
        // only a fallback to prevent BC in the api ...
4957
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4958
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4959
            $encoding = (string) $caseSensitive;
4960 24
        }
4961
4962
        foreach ($needles as &$needle) {
4963
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4964
                return false;
4965
            }
4966
        }
4967
4968
        return true;
4969
    }
4970
4971
    /**
4972
     * Returns true if the string contains any $needles, false otherwise. By
4973
     * default the comparison is case-sensitive, but can be made insensitive by
4974
     * setting $caseSensitive to false.
4975
     *
4976
     * @param string $haystack      <p>The input string.</p>
4977
     * @param array  $needles       <p>SubStrings to look for.</p>
4978 43
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4979 1
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4980
     *
4981
     * @return bool
4982 42
     *              Whether or not $str contains $needle
4983 42
     */
4984 42
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4985
    {
4986
        if (empty($needles)) {
4987
            return false;
4988 18
        }
4989
4990
        foreach ($needles as &$needle) {
4991
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4992
                return true;
4993
            }
4994
        }
4995
4996
        return false;
4997
    }
4998
4999
    /**
5000
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5001
     * inserted before uppercase characters (with the exception of the first
5002
     * character of the string), and in place of spaces as well as underscores.
5003 19
     *
5004
     * @param string $str      <p>The input string.</p>
5005
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5006
     *
5007
     * @return string
5008
     */
5009
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5010
    {
5011
        return self::str_delimit($str, '-', $encoding);
5012
    }
5013
5014
    /**
5015
     * Returns a lowercase and trimmed string separated by the given delimiter.
5016
     * Delimiters are inserted before uppercase characters (with the exception
5017
     * of the first character of the string), and in place of spaces, dashes,
5018
     * and underscores. Alpha delimiters are not converted to lowercase.
5019
     *
5020
     * @param string      $str                   <p>The input string.</p>
5021
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5022
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5023
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5024
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5025
     *                                           tr</p>
5026
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5027
     *                                           ß</p>
5028
     *
5029
     * @return string
5030
     */
5031 49
    public static function str_delimit(
5032
        string $str,
5033 49
        string $delimiter,
5034
        string $encoding = 'UTF-8',
5035 49
        bool $cleanUtf8 = false,
5036
        string $lang = null,
5037 49
        bool $tryToKeepStringLength = false
5038
    ): string {
5039
        $str = self::trim($str);
5040
5041
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
5042
5043
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5044
5045
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5046
    }
5047
5048
    /**
5049
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5050
     *
5051
     * @param string $str <p>The input string.</p>
5052 30
     *
5053
     * @return false|string
5054
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5055
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5056
     */
5057
    public static function str_detect_encoding($str)
5058 30
    {
5059 10
        // init
5060 10
        $str = (string) $str;
5061 2
5062
        //
5063 10
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5064 2
        //
5065
5066
        if (self::is_binary($str, true) === true) {
5067 8
            $isUtf16 = self::is_utf16($str, false);
5068 8
            if ($isUtf16 === 1) {
5069
                return 'UTF-16LE';
5070
            }
5071 8
            if ($isUtf16 === 2) {
5072
                return 'UTF-16BE';
5073
            }
5074
5075
            $isUtf32 = self::is_utf32($str, false);
5076 8
            if ($isUtf32 === 1) {
5077
                return 'UTF-32LE';
5078
            }
5079
            if ($isUtf32 === 2) {
5080
                return 'UTF-32BE';
5081
            }
5082
5083 26
            // is binary but not "UTF-16" or "UTF-32"
5084 9
            return false;
5085
        }
5086
5087
        //
5088
        // 2.) simple check for ASCII chars
5089
        //
5090
5091 26
        if (self::is_ascii($str) === true) {
5092 18
            return 'ASCII';
5093
        }
5094
5095
        //
5096
        // 3.) simple check for UTF-8 chars
5097
        //
5098
5099
        if (self::is_utf8($str) === true) {
5100
            return 'UTF-8';
5101 16
        }
5102
5103
        //
5104
        // 4.) check via "mb_detect_encoding()"
5105
        //
5106
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5107
5108
        $detectOrder = [
5109
            'ISO-8859-1',
5110
            'ISO-8859-2',
5111
            'ISO-8859-3',
5112
            'ISO-8859-4',
5113
            'ISO-8859-5',
5114
            'ISO-8859-6',
5115
            'ISO-8859-7',
5116
            'ISO-8859-8',
5117
            'ISO-8859-9',
5118
            'ISO-8859-10',
5119
            'ISO-8859-13',
5120
            'ISO-8859-14',
5121
            'ISO-8859-15',
5122
            'ISO-8859-16',
5123
            'WINDOWS-1251',
5124
            'WINDOWS-1252',
5125
            'WINDOWS-1254',
5126
            'CP932',
5127
            'CP936',
5128
            'CP950',
5129
            'CP866',
5130
            'CP850',
5131
            'CP51932',
5132
            'CP50220',
5133
            'CP50221',
5134
            'CP50222',
5135 16
            'ISO-2022-JP',
5136
            'ISO-2022-KR',
5137
            'JIS',
5138
            'JIS-ms',
5139 16
            'EUC-CN',
5140
            'EUC-JP',
5141 16
        ];
5142 16
5143 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5144
            self::checkForSupport();
5145
        }
5146
5147
        if (self::$SUPPORT['mbstring'] === true) {
5148
            // info: do not use the symfony polyfill here
5149
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5150
            if ($encoding) {
5151
                return $encoding;
5152
            }
5153
        }
5154
5155
        //
5156
        // 5.) check via "iconv()"
5157
        //
5158
5159
        if (self::$ENCODINGS === null) {
5160
            self::$ENCODINGS = self::getData('encodings');
5161
        }
5162
5163
        foreach (self::$ENCODINGS as $encodingTmp) {
5164
            // INFO: //IGNORE but still throw notice
5165
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5166
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5167
                return $encodingTmp;
5168
            }
5169
        }
5170
5171
        return false;
5172
    }
5173
5174
    /**
5175
     * Check if the string ends with the given substring.
5176 40
     *
5177 4
     * @param string $haystack <p>The string to search in.</p>
5178
     * @param string $needle   <p>The substring to search for.</p>
5179
     *
5180 38
     * @return bool
5181
     */
5182
    public static function str_ends_with(string $haystack, string $needle): bool
5183
    {
5184
        if ($haystack === '' || $needle === '') {
5185
            return false;
5186
        }
5187
5188
        return \substr($haystack, -\strlen($needle)) === $needle;
5189
    }
5190
5191
    /**
5192
     * Returns true if the string ends with any of $substrings, false otherwise.
5193
     *
5194
     * - case-sensitive
5195 7
     *
5196
     * @param string   $str        <p>The input string.</p>
5197
     * @param string[] $substrings <p>Substrings to look for.</p>
5198
     *
5199 7
     * @return bool whether or not $str ends with $substring
5200 7
     */
5201 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5202
    {
5203
        if (empty($substrings)) {
5204
            return false;
5205 6
        }
5206
5207
        foreach ($substrings as &$substring) {
5208
            if (self::str_ends_with($str, $substring)) {
5209
                return true;
5210
            }
5211
        }
5212
5213
        return false;
5214
    }
5215
5216
    /**
5217
     * Ensures that the string begins with $substring. If it doesn't, it's
5218
     * prepended.
5219 10
     *
5220 4
     * @param string $str       <p>The input string.</p>
5221
     * @param string $substring <p>The substring to add if not present.</p>
5222
     *
5223 10
     * @return string
5224
     */
5225
    public static function str_ensure_left(string $str, string $substring): string
5226
    {
5227
        if (!self::str_starts_with($str, $substring)) {
5228
            $str = $substring . $str;
5229
        }
5230
5231
        return $str;
5232
    }
5233
5234
    /**
5235
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5236 10
     *
5237 4
     * @param string $str       <p>The input string.</p>
5238
     * @param string $substring <p>The substring to add if not present.</p>
5239
     *
5240 10
     * @return string
5241
     */
5242
    public static function str_ensure_right(string $str, string $substring): string
5243
    {
5244
        if (!self::str_ends_with($str, $substring)) {
5245
            $str .= $substring;
5246
        }
5247
5248
        return $str;
5249
    }
5250
5251
    /**
5252
     * Capitalizes the first word of the string, replaces underscores with
5253 3
     * spaces, and strips '_id'.
5254
     *
5255 3
     * @param string $str
5256
     *
5257
     * @return string
5258
     */
5259 3
    public static function str_humanize($str): string
5260
    {
5261
        $str = self::str_replace(
5262 3
            [
5263
                '_id',
5264
                '_',
5265 3
            ],
5266
            [
5267
                '',
5268
                ' ',
5269
            ],
5270
            $str
5271
        );
5272
5273
        return self::ucfirst(self::trim($str));
5274
    }
5275
5276
    /**
5277
     * Check if the string ends with the given substring, case insensitive.
5278 12
     *
5279 2
     * @param string $haystack <p>The string to search in.</p>
5280
     * @param string $needle   <p>The substring to search for.</p>
5281
     *
5282 12
     * @return bool
5283
     */
5284
    public static function str_iends_with(string $haystack, string $needle): bool
5285
    {
5286
        if ($haystack === '' || $needle === '') {
5287
            return false;
5288
        }
5289
5290
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5291
    }
5292
5293
    /**
5294
     * Returns true if the string ends with any of $substrings, false otherwise.
5295
     *
5296
     * - case-insensitive
5297 4
     *
5298
     * @param string   $str        <p>The input string.</p>
5299
     * @param string[] $substrings <p>Substrings to look for.</p>
5300
     *
5301 4
     * @return bool whether or not $str ends with $substring
5302 4
     */
5303 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5304
    {
5305
        if (empty($substrings)) {
5306
            return false;
5307
        }
5308
5309
        foreach ($substrings as &$substring) {
5310
            if (self::str_iends_with($str, $substring)) {
5311
                return true;
5312
            }
5313
        }
5314
5315
        return false;
5316
    }
5317
5318
    /**
5319
     * Returns the index of the first occurrence of $needle in the string,
5320
     * and false if not found. Accepts an optional offset from which to begin
5321
     * the search.
5322
     *
5323
     * @param string $str      <p>The input string.</p>
5324
     * @param string $needle   <p>Substring to look for.</p>
5325 2
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5326 2
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5327 2
     *
5328 2
     * @return false|int
5329 2
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5330
     */
5331
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5332
    {
5333
        return self::stripos(
5334
            $str,
5335
            $needle,
5336
            $offset,
5337
            $encoding
5338
        );
5339
    }
5340
5341
    /**
5342
     * Returns the index of the last occurrence of $needle in the string,
5343
     * and false if not found. Accepts an optional offset from which to begin
5344
     * the search. Offsets may be negative to count from the last character
5345
     * in the string.
5346
     *
5347
     * @param string $str      <p>The input string.</p>
5348
     * @param string $needle   <p>Substring to look for.</p>
5349 2
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5350 2
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5351 2
     *
5352 2
     * @return false|int
5353 2
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5354
     */
5355
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5356
    {
5357
        return self::strripos(
5358
            $str,
5359
            $needle,
5360
            $offset,
5361
            $encoding
5362
        );
5363
    }
5364
5365
    /**
5366
     * Returns the index of the first occurrence of $needle in the string,
5367
     * and false if not found. Accepts an optional offset from which to begin
5368
     * the search.
5369
     *
5370
     * @param string $str      <p>The input string.</p>
5371
     * @param string $needle   <p>Substring to look for.</p>
5372 12
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5373 12
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5374 12
     *
5375 12
     * @return false|int
5376 12
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5377
     */
5378
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5379
    {
5380
        return self::strpos(
5381
            $str,
5382
            $needle,
5383
            $offset,
5384
            $encoding
5385
        );
5386
    }
5387
5388
    /**
5389
     * Returns the index of the last occurrence of $needle in the string,
5390
     * and false if not found. Accepts an optional offset from which to begin
5391
     * the search. Offsets may be negative to count from the last character
5392
     * in the string.
5393
     *
5394
     * @param string $str      <p>The input string.</p>
5395
     * @param string $needle   <p>Substring to look for.</p>
5396 12
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5397 12
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5398 12
     *
5399 12
     * @return false|int
5400 12
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5401
     */
5402
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5403
    {
5404
        return self::strrpos(
5405
            $str,
5406
            $needle,
5407
            $offset,
5408
            $encoding
5409
        );
5410
    }
5411
5412
    /**
5413
     * Inserts $substring into the string at the $index provided.
5414
     *
5415
     * @param string $str       <p>The input string.</p>
5416 8
     * @param string $substring <p>String to be inserted.</p>
5417
     * @param int    $index     <p>The index at which to insert the substring.</p>
5418 8
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5419 1
     *
5420
     * @return string
5421
     */
5422 7
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5423 7
    {
5424 7
        $len = (int) self::strlen($str, $encoding);
5425
5426
        if ($index > $len) {
5427
            return $str;
5428
        }
5429
5430
        return (string) self::substr($str, 0, $index, $encoding) .
5431
               $substring .
5432
               (string) self::substr($str, $index, $len, $encoding);
5433
    }
5434
5435
    /**
5436
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5437
     *
5438
     * @see  http://php.net/manual/en/function.str-ireplace.php
5439
     *
5440
     * @param mixed $search  <p>
5441
     *                       Every replacement with search array is
5442
     *                       performed on the result of previous replacement.
5443
     *                       </p>
5444
     * @param mixed $replace <p>
5445
     *                       </p>
5446
     * @param mixed $subject <p>
5447
     *                       If subject is an array, then the search and
5448
     *                       replace is performed with every entry of
5449
     *                       subject, and the return value is an array as
5450
     *                       well.
5451
     *                       </p>
5452
     * @param int   $count   [optional] <p>
5453
     *                       The number of matched and replaced needles will
5454 29
     *                       be returned in count which is passed by
5455
     *                       reference.
5456
     *                       </p>
5457 29
     *
5458 29
     * @return mixed a string or an array of replacements
5459 29
     */
5460 6
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5461
    {
5462 29
        $search = (array) $search;
5463
5464
        /** @noinspection AlterInForeachInspection */
5465
        foreach ($search as &$s) {
5466 29
            $s = (string) $s;
5467 29
            if ($s === '') {
5468
                $s = '/^(?<=.)$/';
5469 29
            } else {
5470
                $s = '/' . \preg_quote($s, '/') . '/ui';
5471
            }
5472
        }
5473
5474
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5475
        $count = $replace; // used as reference parameter
5476
5477
        return $subject;
5478
    }
5479
5480
    /**
5481
     * Replaces $search from the beginning of string with $replacement.
5482
     *
5483 17
     * @param string $str         <p>The input string.</p>
5484 4
     * @param string $search      <p>The string to search for.</p>
5485 2
     * @param string $replacement <p>The replacement.</p>
5486
     *
5487
     * @return string string after the replacements
5488 2
     */
5489 2
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5490
    {
5491
        if ($str === '') {
5492
            if ($replacement === '') {
5493 13
                return '';
5494 2
            }
5495
5496
            if ($search === '') {
5497 11
                return $replacement;
5498 10
            }
5499
        }
5500
5501 1
        if ($search === '') {
5502
            return $str . $replacement;
5503
        }
5504
5505
        if (\stripos($str, $search) === 0) {
5506
            return $replacement . \substr($str, \strlen($search));
5507
        }
5508
5509
        return $str;
5510
    }
5511
5512
    /**
5513
     * Replaces $search from the ending of string with $replacement.
5514
     *
5515 17
     * @param string $str         <p>The input string.</p>
5516 4
     * @param string $search      <p>The string to search for.</p>
5517 2
     * @param string $replacement <p>The replacement.</p>
5518
     *
5519
     * @return string string after the replacements
5520 2
     */
5521 2
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5522
    {
5523
        if ($str === '') {
5524
            if ($replacement === '') {
5525 13
                return '';
5526 2
            }
5527
5528
            if ($search === '') {
5529 11
                return $replacement;
5530 9
            }
5531
        }
5532
5533 11
        if ($search === '') {
5534
            return $str . $replacement;
5535
        }
5536
5537
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5538
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5539
        }
5540
5541
        return $str;
5542
    }
5543
5544
    /**
5545
     * Check if the string starts with the given substring, case insensitive.
5546 12
     *
5547 2
     * @param string $haystack <p>The string to search in.</p>
5548
     * @param string $needle   <p>The substring to search for.</p>
5549
     *
5550 12
     * @return bool
5551
     */
5552
    public static function str_istarts_with(string $haystack, string $needle): bool
5553
    {
5554
        if ($haystack === '' || $needle === '') {
5555
            return false;
5556
        }
5557
5558
        return self::stripos($haystack, $needle) === 0;
5559
    }
5560
5561
    /**
5562
     * Returns true if the string begins with any of $substrings, false otherwise.
5563
     *
5564
     * - case-insensitive
5565 4
     *
5566
     * @param string $str        <p>The input string.</p>
5567
     * @param array  $substrings <p>Substrings to look for.</p>
5568
     *
5569 4
     * @return bool whether or not $str starts with $substring
5570
     */
5571
    public static function str_istarts_with_any(string $str, array $substrings): bool
5572
    {
5573 4
        if ($str === '') {
5574 4
            return false;
5575 4
        }
5576
5577
        if (empty($substrings)) {
5578
            return false;
5579
        }
5580
5581
        foreach ($substrings as &$substring) {
5582
            if (self::str_istarts_with($str, $substring)) {
5583
                return true;
5584
            }
5585
        }
5586
5587
        return false;
5588
    }
5589
5590
    /**
5591
     * Gets the substring after the first occurrence of a separator.
5592
     *
5593
     * @param string $str       <p>The input string.</p>
5594 1
     * @param string $separator <p>The string separator.</p>
5595
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5596 1
     *
5597
     * @return string
5598 1
     */
5599
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5600
    {
5601 1
        if (
5602 1
            $separator === ''
5603 1
            ||
5604
            $str === ''
5605
        ) {
5606 1
            return '';
5607 1
        }
5608 1
5609 1
        $offset = self::str_iindex_first($str, $separator);
5610 1
        if ($offset === false) {
5611
            return '';
5612
        }
5613
5614
        return (string) self::substr(
5615
            $str,
5616
            $offset + (int) self::strlen($separator, $encoding),
5617
            null,
5618
            $encoding
5619
        );
5620
    }
5621
5622
    /**
5623
     * Gets the substring after the last occurrence of a separator.
5624
     *
5625
     * @param string $str       <p>The input string.</p>
5626 1
     * @param string $separator <p>The string separator.</p>
5627
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5628 1
     *
5629
     * @return string
5630 1
     */
5631
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5632
    {
5633 1
        if (
5634 1
            $separator === ''
5635 1
            ||
5636
            $str === ''
5637
        ) {
5638 1
            return '';
5639 1
        }
5640 1
5641 1
        $offset = self::str_iindex_last($str, $separator);
5642 1
        if ($offset === false) {
5643
            return '';
5644
        }
5645
5646
        return (string) self::substr(
5647
            $str,
5648
            $offset + (int) self::strlen($separator, $encoding),
5649
            null,
5650
            $encoding
5651
        );
5652
    }
5653
5654
    /**
5655
     * Gets the substring before the first occurrence of a separator.
5656
     *
5657
     * @param string $str       <p>The input string.</p>
5658 1
     * @param string $separator <p>The string separator.</p>
5659
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5660 1
     *
5661
     * @return string
5662 1
     */
5663
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5664
    {
5665 1
        if (
5666 1
            $separator === ''
5667 1
            ||
5668
            $str === ''
5669
        ) {
5670 1
            return '';
5671
        }
5672
5673
        $offset = self::str_iindex_first($str, $separator);
5674
        if ($offset === false) {
5675
            return '';
5676
        }
5677
5678
        return (string) self::substr($str, 0, $offset, $encoding);
5679
    }
5680
5681
    /**
5682
     * Gets the substring before the last occurrence of a separator.
5683
     *
5684
     * @param string $str       <p>The input string.</p>
5685 1
     * @param string $separator <p>The string separator.</p>
5686
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5687 1
     *
5688
     * @return string
5689 1
     */
5690
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5691
    {
5692 1
        if (
5693 1
            $separator === ''
5694 1
            ||
5695
            $str === ''
5696
        ) {
5697 1
            return '';
5698
        }
5699
5700
        $offset = self::str_iindex_last($str, $separator);
5701
        if ($offset === false) {
5702
            return '';
5703
        }
5704
5705
        return (string) self::substr($str, 0, $offset, $encoding);
5706
    }
5707
5708
    /**
5709
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5710
     *
5711
     * @param string $str          <p>The input string.</p>
5712
     * @param string $needle       <p>The string to look for.</p>
5713 2
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5714
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5715 2
     *
5716
     * @return string
5717 2
     */
5718
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5719
    {
5720 2
        if (
5721 2
            $needle === ''
5722 2
            ||
5723 2
            $str === ''
5724 2
        ) {
5725
            return '';
5726 2
        }
5727 2
5728
        $part = self::stristr(
5729
            $str,
5730 2
            $needle,
5731
            $beforeNeedle,
5732
            $encoding
5733
        );
5734
        if ($part === false) {
5735
            return '';
5736
        }
5737
5738
        return $part;
5739
    }
5740
5741
    /**
5742
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5743
     *
5744
     * @param string $str          <p>The input string.</p>
5745
     * @param string $needle       <p>The string to look for.</p>
5746 1
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5747
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5748 1
     *
5749
     * @return string
5750 1
     */
5751
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5752
    {
5753 1
        if (
5754 1
            $needle === ''
5755 1
            ||
5756
            $str === ''
5757
        ) {
5758 1
            return '';
5759
        }
5760
5761
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5762
        if ($part === false) {
5763
            return '';
5764
        }
5765
5766
        return $part;
5767
    }
5768
5769
    /**
5770
     * Returns the last $n characters of the string.
5771
     *
5772 12
     * @param string $str      <p>The input string.</p>
5773 4
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5774
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5775
     *
5776 8
     * @return string
5777
     */
5778
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5779
    {
5780
        if ($n <= 0) {
5781
            return '';
5782
        }
5783
5784
        return (string) self::substr($str, -$n, null, $encoding);
5785
    }
5786
5787
    /**
5788
     * Limit the number of characters in a string.
5789
     *
5790
     * @param string $str      <p>The input string.</p>
5791 2
     * @param int    $length   [optional] <p>Default: 100</p>
5792 2
     * @param string $strAddOn [optional] <p>Default: …</p>
5793
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5794
     *
5795 2
     * @return string
5796 2
     */
5797
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5798
    {
5799 2
        if ($str === '') {
5800 2
            return '';
5801
        }
5802
5803 2
        if ($length <= 0) {
5804
            return '';
5805
        }
5806
5807
        if ((int) self::strlen($str, $encoding) <= $length) {
5808
            return $str;
5809
        }
5810
5811
        return (string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding) . $strAddOn;
5812
    }
5813
5814
    /**
5815
     * Limit the number of characters in a string, but also after the next word.
5816
     *
5817
     * @param string $str      <p>The input string.</p>
5818 6
     * @param int    $length   [optional] <p>Default: 100</p>
5819 2
     * @param string $strAddOn [optional] <p>Default: …</p>
5820
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5821
     *
5822 6
     * @return string
5823 2
     */
5824
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5825
    {
5826 6
        if ($str === '') {
5827 2
            return '';
5828
        }
5829
5830 6
        if ($length <= 0) {
5831 5
            return '';
5832
        }
5833
5834 3
        if ((int) self::strlen($str, $encoding) <= $length) {
5835 3
            return $str;
5836
        }
5837
5838
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5839 3
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5840 3
        }
5841 3
5842
        $str = self::substr($str, 0, $length, $encoding);
5843 3
        if ($str === false) {
5844 2
            return '' . $strAddOn;
5845
        }
5846
5847 3
        $array = \explode(' ', $str);
5848
        \array_pop($array);
5849
        $new_str = \implode(' ', $array);
5850
5851
        if ($new_str === '') {
5852
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5853
        }
5854
5855
        return $new_str . $strAddOn;
5856
    }
5857
5858
    /**
5859
     * Returns the longest common prefix between the string and $otherStr.
5860
     *
5861 10
     * @param string $str      <p>The input sting.</p>
5862
     * @param string $otherStr <p>Second string for comparison.</p>
5863 10
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5864 10
     *
5865 8
     * @return string
5866
     */
5867
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5868 8
    {
5869
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5870 8
5871
        $longestCommonPrefix = '';
5872 6
        for ($i = 0; $i < $maxLength; ++$i) {
5873
            $char = self::substr($str, $i, 1, $encoding);
5874 6
5875
            if (
5876
                $char !== false
5877
                &&
5878 10
                $char === self::substr($otherStr, $i, 1, $encoding)
5879
            ) {
5880
                $longestCommonPrefix .= $char;
5881
            } else {
5882
                break;
5883
            }
5884
        }
5885
5886
        return $longestCommonPrefix;
5887
    }
5888
5889
    /**
5890
     * Returns the longest common substring between the string and $otherStr.
5891
     * In the case of ties, it returns that which occurs first.
5892
     *
5893
     * @param string $str
5894
     * @param string $otherStr <p>Second string for comparison.</p>
5895 11
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5896 11
     *
5897
     * @return string string with its $str being the longest common substring
5898
     */
5899 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5900 2
    {
5901
        // Uses dynamic programming to solve
5902
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5903 9
        $strLength = (int) self::strlen($str, $encoding);
5904 9
        $otherLength = (int) self::strlen($otherStr, $encoding);
5905 9
5906 9
        // Return if either string is empty
5907 9
        if ($strLength === 0 || $otherLength === 0) {
5908 9
            return '';
5909
        }
5910
5911 9
        $len = 0;
5912 9
        $end = 0;
5913 9
        $table = \array_fill(
5914 9
            0,
5915
            $strLength + 1,
5916 9
            \array_fill(0, $otherLength + 1, 0)
5917 8
        );
5918 8
5919 8
        for ($i = 1; $i <= $strLength; ++$i) {
5920 8
            for ($j = 1; $j <= $otherLength; ++$j) {
5921
                $strChar = self::substr($str, $i - 1, 1, $encoding);
5922
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5923 9
5924
                if ($strChar === $otherChar) {
5925
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5926
                    if ($table[$i][$j] > $len) {
5927
                        $len = $table[$i][$j];
5928 9
                        $end = $i;
5929
                    }
5930
                } else {
5931
                    $table[$i][$j] = 0;
5932
                }
5933
            }
5934
        }
5935
5936
        return (string) self::substr($str, $end - $len, $len, $encoding);
5937
    }
5938
5939
    /**
5940
     * Returns the longest common suffix between the string and $otherStr.
5941
     *
5942 10
     * @param string $str
5943
     * @param string $otherStr <p>Second string for comparison.</p>
5944 10
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5945 10
     *
5946 8
     * @return string
5947
     */
5948
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5949 8
    {
5950
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5951 8
5952
        $longestCommonSuffix = '';
5953 6
        for ($i = 1; $i <= $maxLength; ++$i) {
5954
            $char = self::substr($str, -$i, 1, $encoding);
5955 6
5956
            if (
5957
                $char !== false
5958
                &&
5959 10
                $char === self::substr($otherStr, -$i, 1, $encoding)
5960
            ) {
5961
                $longestCommonSuffix = $char . $longestCommonSuffix;
5962
            } else {
5963
                break;
5964
            }
5965
        }
5966
5967
        return $longestCommonSuffix;
5968
    }
5969
5970
    /**
5971
     * Returns true if $str matches the supplied pattern, false otherwise.
5972 126
     *
5973
     * @param string $str     <p>The input string.</p>
5974
     * @param string $pattern <p>Regex pattern to match against.</p>
5975
     *
5976
     * @return bool whether or not $str matches the pattern
5977
     */
5978
    public static function str_matches_pattern(string $str, string $pattern): bool
5979
    {
5980
        return (bool) \preg_match('/' . $pattern . '/u', $str);
5981
    }
5982
5983
    /**
5984
     * Returns whether or not a character exists at an index. Offsets may be
5985
     * negative to count from the last character in the string. Implements
5986
     * part of the ArrayAccess interface.
5987
     *
5988
     * @param string $str      <p>The input string.</p>
5989 6
     * @param int    $offset   <p>The index to check.</p>
5990
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5991 6
     *
5992 3
     * @return bool whether or not the index exists
5993
     */
5994
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5995 3
    {
5996
        // init
5997
        $length = (int) self::strlen($str, $encoding);
5998
5999
        if ($offset >= 0) {
6000
            return $length > $offset;
6001
        }
6002
6003
        return $length >= \abs($offset);
6004
    }
6005
6006
    /**
6007
     * Returns the character at the given index. Offsets may be negative to
6008
     * count from the last character in the string. Implements part of the
6009
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6010
     * does not exist.
6011
     *
6012
     * @param string $str      <p>The input string.</p>
6013
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6014
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6015 2
     *
6016
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6017
     *
6018 2
     * @return string the character at the specified index
6019
     */
6020 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6021
    {
6022 1
        // init
6023
        $length = (int) self::strlen($str);
6024
6025 1
        if (
6026
            ($index >= 0 && $length <= $index)
6027
            ||
6028
            $length < \abs($index)
6029
        ) {
6030
            throw new \OutOfBoundsException('No character exists at the index');
6031
        }
6032
6033
        return self::char_at($str, $index, $encoding);
6034
    }
6035
6036
    /**
6037
     * Pad a UTF-8 string to given length with another string.
6038
     *
6039
     * @param string     $str        <p>The input string.</p>
6040
     * @param int        $pad_length <p>The length of return string.</p>
6041
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6042
     * @param int|string $pad_type   [optional] <p>
6043
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6044
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6045 41
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6046
     *                               </p>
6047
     * @param string     $encoding   [optional] <p>Default: UTF-8</p>
6048
     *
6049 41
     * @return string returns the padded string
6050 13
     */
6051 3
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6052 10
    {
6053 6
        if ($str === '') {
6054 4
            return '';
6055 3
        }
6056
6057 1
        if ($pad_type !== (int) $pad_type) {
6058 1
            if ($pad_type === 'left') {
6059
                $pad_type = \STR_PAD_LEFT;
6060
            } elseif ($pad_type === 'right') {
6061
                $pad_type = \STR_PAD_RIGHT;
6062
            } elseif ($pad_type === 'both') {
6063 40
                $pad_type = \STR_PAD_BOTH;
6064
            } else {
6065
                throw new \InvalidArgumentException(
6066 40
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6067
                );
6068 40
            }
6069
        }
6070 39
6071
        $str_length = (int) self::strlen($str, $encoding);
6072 39
6073
        if (
6074
            $pad_length > 0
6075 39
            &&
6076 13
            $pad_length >= $str_length
6077 13
        ) {
6078 13
            $ps_length = (int) self::strlen($pad_string, $encoding);
6079
6080 13
            $diff = ($pad_length - $str_length);
6081
6082 29
            switch ($pad_type) {
6083 14
                case \STR_PAD_LEFT:
6084 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6085 14
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6086 14
                    $post = '';
6087
6088 14
                    break;
6089
6090 18
                case \STR_PAD_BOTH:
6091
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6092 18
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6093 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6094 18
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6095
6096
                    break;
6097 39
6098
                case \STR_PAD_RIGHT:
6099
                default:
6100 4
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6101
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6102
                    $pre = '';
6103
            }
6104
6105
            return $pre . $str . $post;
6106
        }
6107
6108
        return $str;
6109
    }
6110
6111
    /**
6112
     * Returns a new string of a given length such that both sides of the
6113
     * string are padded. Alias for pad() with a $padType of 'both'.
6114
     *
6115
     * @param string $str
6116 11
     * @param int    $length   <p>Desired string length after padding.</p>
6117
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6118 11
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6119
     *
6120
     * @return string string with padding applied
6121
     */
6122
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6123
    {
6124
        $padding = $length - (int) self::strlen($str, $encoding);
6125
6126
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6127
    }
6128
6129
    /**
6130
     * Returns a new string of a given length such that the beginning of the
6131
     * string is padded. Alias for pad() with a $padType of 'left'.
6132
     *
6133
     * @param string $str
6134 7
     * @param int    $length   <p>Desired string length after padding.</p>
6135
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6136
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6137
     *
6138
     * @return string string with left padding
6139
     */
6140
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6141
    {
6142
        return self::apply_padding($str, $length - (int) self::strlen($str), 0, $padStr, $encoding);
6143
    }
6144
6145
    /**
6146
     * Returns a new string of a given length such that the end of the string
6147
     * is padded. Alias for pad() with a $padType of 'right'.
6148
     *
6149
     * @param string $str
6150 7
     * @param int    $length   <p>Desired string length after padding.</p>
6151
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6152
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6153
     *
6154
     * @return string string with right padding
6155
     */
6156
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6157
    {
6158
        return self::apply_padding($str, 0, $length - (int) self::strlen($str), $padStr, $encoding);
6159
    }
6160
6161
    /**
6162
     * Repeat a string.
6163
     *
6164
     * @param string $str        <p>
6165
     *                           The string to be repeated.
6166
     *                           </p>
6167
     * @param int    $multiplier <p>
6168
     *                           Number of time the input string should be
6169
     *                           repeated.
6170
     *                           </p>
6171
     *                           <p>
6172
     *                           multiplier has to be greater than or equal to 0.
6173 9
     *                           If the multiplier is set to 0, the function
6174
     *                           will return an empty string.
6175 9
     *                           </p>
6176
     *
6177
     * @return string the repeated string
6178
     */
6179
    public static function str_repeat(string $str, int $multiplier): string
6180
    {
6181
        $str = self::filter($str);
6182
6183
        return \str_repeat($str, $multiplier);
6184
    }
6185
6186
    /**
6187
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6188
     *
6189
     * Replace all occurrences of the search string with the replacement string
6190
     *
6191
     * @see http://php.net/manual/en/function.str-replace.php
6192
     *
6193
     * @param mixed $search  <p>
6194
     *                       The value being searched for, otherwise known as the needle.
6195
     *                       An array may be used to designate multiple needles.
6196
     *                       </p>
6197
     * @param mixed $replace <p>
6198
     *                       The replacement value that replaces found search
6199
     *                       values. An array may be used to designate multiple replacements.
6200
     *                       </p>
6201
     * @param mixed $subject <p>
6202
     *                       The string or array being searched and replaced on,
6203
     *                       otherwise known as the haystack.
6204
     *                       </p>
6205
     *                       <p>
6206
     *                       If subject is an array, then the search and
6207
     *                       replace is performed with every entry of
6208
     *                       subject, and the return value is an array as
6209
     *                       well.
6210 60
     *                       </p>
6211
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6212
     *
6213
     * @return mixed this function returns a string or an array with the replaced values
6214
     */
6215
    public static function str_replace($search, $replace, $subject, int &$count = null)
6216
    {
6217
        /** @psalm-suppress PossiblyNullArgument */
6218
        return \str_replace($search, $replace, $subject, $count);
6219
    }
6220
6221
    /**
6222
     * Replaces $search from the beginning of string with $replacement.
6223
     *
6224 17
     * @param string $str         <p>The input string.</p>
6225 4
     * @param string $search      <p>The string to search for.</p>
6226 2
     * @param string $replacement <p>The replacement.</p>
6227
     *
6228
     * @return string string after the replacements
6229 2
     */
6230 2
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6231
    {
6232
        if ($str === '') {
6233
            if ($replacement === '') {
6234 13
                return '';
6235 2
            }
6236
6237
            if ($search === '') {
6238 11
                return $replacement;
6239 9
            }
6240
        }
6241
6242 2
        if ($search === '') {
6243
            return $str . $replacement;
6244
        }
6245
6246
        if (\strpos($str, $search) === 0) {
6247
            return $replacement . \substr($str, \strlen($search));
6248
        }
6249
6250
        return $str;
6251
    }
6252
6253
    /**
6254
     * Replaces $search from the ending of string with $replacement.
6255
     *
6256 17
     * @param string $str         <p>The input string.</p>
6257 4
     * @param string $search      <p>The string to search for.</p>
6258 2
     * @param string $replacement <p>The replacement.</p>
6259
     *
6260
     * @return string string after the replacements
6261 2
     */
6262 2
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6263
    {
6264
        if ($str === '') {
6265
            if ($replacement === '') {
6266 13
                return '';
6267 2
            }
6268
6269
            if ($search === '') {
6270 11
                return $replacement;
6271 8
            }
6272
        }
6273
6274 11
        if ($search === '') {
6275
            return $str . $replacement;
6276
        }
6277
6278
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6279
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6280
        }
6281
6282
        return $str;
6283
    }
6284
6285
    /**
6286
     * Replace the first "$search"-term with the "$replace"-term.
6287
     *
6288
     * @param string $search
6289
     * @param string $replace
6290 2
     * @param string $subject
6291 2
     *
6292
     * @return string
6293 2
     *
6294
     * @psalm-suppress InvalidReturnType
6295
     */
6296 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6297
    {
6298
        $pos = self::strpos($subject, $search);
6299
        if ($pos !== false) {
6300
            /** @psalm-suppress InvalidReturnStatement */
6301
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6302
        }
6303
6304
        return $subject;
6305
    }
6306
6307
    /**
6308
     * Replace the last "$search"-term with the "$replace"-term.
6309
     *
6310
     * @param string $search
6311
     * @param string $replace
6312 2
     * @param string $subject
6313 2
     *
6314
     * @return string
6315 2
     *
6316
     * @psalm-suppress InvalidReturnType
6317
     */
6318 2
    public static function str_replace_last(string $search, string $replace, string $subject): string
6319
    {
6320
        $pos = self::strrpos($subject, $search);
6321
        if ($pos !== false) {
6322
            /** @psalm-suppress InvalidReturnStatement */
6323
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6324
        }
6325
6326
        return $subject;
6327
    }
6328
6329
    /**
6330
     * Shuffles all the characters in the string.
6331
     *
6332 5
     * PS: uses random algorithm which is weak for cryptography purposes
6333
     *
6334 5
     * @param string $str <p>The input string</p>
6335
     *
6336 5
     * @return string the shuffled string
6337 5
     */
6338 5
    public static function str_shuffle(string $str): string
6339 5
    {
6340 5
        $indexes = \range(0, (int) self::strlen($str) - 1);
6341
        /** @noinspection NonSecureShuffleUsageInspection */
6342
        \shuffle($indexes);
6343
6344 5
        $shuffledStr = '';
6345
        foreach ($indexes as &$i) {
6346
            $tmpSubStr = self::substr($str, $i, 1);
6347
            if ($tmpSubStr !== false) {
6348
                $shuffledStr .= $tmpSubStr;
6349
            }
6350
        }
6351
6352
        return $shuffledStr;
6353
    }
6354
6355
    /**
6356
     * Returns the substring beginning at $start, and up to, but not including
6357
     * the index specified by $end. If $end is omitted, the function extracts
6358
     * the remaining string. If $end is negative, it is computed from the end
6359
     * of the string.
6360
     *
6361
     * @param string $str
6362
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6363
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6364 18
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6365 6
     *
6366 12
     * @return false|string
6367 4
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6368 8
     *                      characters long, <b>FALSE</b> will be returned.
6369 2
     */
6370
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6371 6
    {
6372
        if ($end === null) {
6373
            $length = (int) self::strlen($str);
6374 14
        } elseif ($end >= 0 && $end <= $start) {
6375
            return '';
6376
        } elseif ($end < 0) {
6377
            $length = (int) self::strlen($str) + $end - $start;
6378
        } else {
6379
            $length = $end - $start;
6380
        }
6381
6382
        return self::substr($str, $start, $length, $encoding);
6383
    }
6384
6385
    /**
6386
     * Convert a string to e.g.: "snake_case"
6387 20
     *
6388 20
     * @param string $str
6389
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6390 20
     *
6391 20
     * @return string string in snake_case
6392
     */
6393
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6394
    {
6395
        $str = self::normalize_whitespace($str);
6396
        $str = \str_replace('-', '_', $str);
6397
6398 8
        $str = (string) \preg_replace_callback(
6399 8
            '/([\d|A-Z])/u',
6400
            /**
6401 8
             * @param string[] $matches
6402 4
             *
6403
             * @return string
6404
             */
6405 4
            static function (array $matches) use ($encoding): string {
6406 20
                $match = $matches[1];
6407 20
                $matchInt = (int) $match;
6408
6409
                if ((string) $matchInt === $match) {
6410 20
                    return '_' . $match . '_';
6411
                }
6412 20
6413
                return '_' . self::strtolower($match, $encoding);
6414
            },
6415
            $str
6416
        );
6417 20
6418
        $str = (string) \preg_replace(
6419
            [
6420
                '/\s+/',        // convert spaces to "_"
6421 20
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6422
                '/_+/',         // remove double "_"
6423
            ],
6424 20
            [
6425
                '_',
6426 20
                '',
6427
                '_',
6428
            ],
6429
            $str
6430
        );
6431
6432
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6433
6434
        return self::trim($str); // trim leading & trailing whitespace
6435
    }
6436
6437
    /**
6438
     * Sort all characters according to code points.
6439
     *
6440 2
     * @param string $str    <p>A UTF-8 string.</p>
6441
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6442 2
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6443 2
     *
6444
     * @return string string of sorted characters
6445
     */
6446 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6447 2
    {
6448
        $array = self::codepoints($str);
6449 2
6450
        if ($unique) {
6451
            $array = \array_flip(\array_flip($array));
6452 2
        }
6453
6454
        if ($desc) {
6455
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6455
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6456
        } else {
6457
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6457
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6458
        }
6459
6460
        return self::string($array);
6461
    }
6462
6463
    /**
6464
     * alias for "UTF8::split()"
6465
     *
6466
     * @see UTF8::split()
6467 25
     *
6468
     * @param string|string[] $str
6469
     * @param int             $len
6470
     *
6471
     * @return string[]
6472
     */
6473
    public static function str_split($str, int $len = 1): array
6474
    {
6475
        return self::split($str, $len);
6476
    }
6477
6478
    /**
6479
     * Splits the string with the provided regular expression, returning an
6480
     * array of Stringy objects. An optional integer $limit will truncate the
6481
     * results.
6482
     *
6483 16
     * @param string $str
6484 2
     * @param string $pattern <p>The regex with which to split the string.</p>
6485
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6486
     *
6487
     * @return string[] an array of strings
6488
     */
6489 14
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6490 1
    {
6491
        if ($limit === 0) {
6492
            return [];
6493
        }
6494
6495 13
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6496 8
        // and current versions of HHVM (3.8 and below)
6497
        if ($pattern === '') {
6498 5
            return [$str];
6499
        }
6500
6501 13
        // this->split returns the remaining unsplit string in the last index when
6502
        // supplying a limit
6503 13
        if ($limit > 0) {
6504
            ++$limit;
6505
        } else {
6506
            $limit = -1;
6507 13
        }
6508 4
6509
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6510
6511 13
        if ($array === false) {
6512
            return [];
6513
        }
6514
6515
        if ($limit > 0 && \count($array) === $limit) {
6516
            \array_pop($array);
6517
        }
6518
6519
        return $array;
6520
    }
6521
6522
    /**
6523
     * Check if the string starts with the given substring.
6524 41
     *
6525 4
     * @param string $haystack <p>The string to search in.</p>
6526
     * @param string $needle   <p>The substring to search for.</p>
6527
     *
6528 39
     * @return bool
6529
     */
6530
    public static function str_starts_with(string $haystack, string $needle): bool
6531
    {
6532
        if ($haystack === '' || $needle === '') {
6533
            return false;
6534
        }
6535
6536
        return \strpos($haystack, $needle) === 0;
6537
    }
6538
6539
    /**
6540
     * Returns true if the string begins with any of $substrings, false otherwise.
6541
     *
6542
     * - case-sensitive
6543 8
     *
6544
     * @param string $str        <p>The input string.</p>
6545
     * @param array  $substrings <p>Substrings to look for.</p>
6546
     *
6547 8
     * @return bool whether or not $str starts with $substring
6548
     */
6549
    public static function str_starts_with_any(string $str, array $substrings): bool
6550
    {
6551 8
        if ($str === '') {
6552 8
            return false;
6553 8
        }
6554
6555
        if (empty($substrings)) {
6556
            return false;
6557 6
        }
6558
6559
        foreach ($substrings as &$substring) {
6560
            if (self::str_starts_with($str, $substring)) {
6561
                return true;
6562
            }
6563
        }
6564
6565
        return false;
6566
    }
6567
6568
    /**
6569
     * Gets the substring after the first occurrence of a separator.
6570
     *
6571
     * @param string $str       <p>The input string.</p>
6572 1
     * @param string $separator <p>The string separator.</p>
6573
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6574 1
     *
6575
     * @return string
6576 1
     */
6577
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6578
    {
6579 1
        if (
6580 1
            $separator === ''
6581 1
            ||
6582
            $str === ''
6583
        ) {
6584 1
            return '';
6585 1
        }
6586 1
6587 1
        $offset = self::str_index_first($str, $separator);
6588 1
        if ($offset === false) {
6589
            return '';
6590
        }
6591
6592
        return (string) self::substr(
6593
            $str,
6594
            $offset + (int) self::strlen($separator, $encoding),
6595
            null,
6596
            $encoding
6597
        );
6598
    }
6599
6600
    /**
6601
     * Gets the substring after the last occurrence of a separator.
6602
     *
6603
     * @param string $str       <p>The input string.</p>
6604 1
     * @param string $separator <p>The string separator.</p>
6605
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6606 1
     *
6607
     * @return string
6608 1
     */
6609
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6610
    {
6611 1
        if (
6612 1
            $separator === ''
6613 1
            ||
6614
            $str === ''
6615
        ) {
6616 1
            return '';
6617 1
        }
6618 1
6619 1
        $offset = self::str_index_last($str, $separator);
6620 1
        if ($offset === false) {
6621
            return '';
6622
        }
6623
6624
        return (string) self::substr(
6625
            $str,
6626
            $offset + (int) self::strlen($separator, $encoding),
6627
            null,
6628
            $encoding
6629
        );
6630
    }
6631
6632
    /**
6633
     * Gets the substring before the first occurrence of a separator.
6634
     *
6635
     * @param string $str       <p>The input string.</p>
6636 1
     * @param string $separator <p>The string separator.</p>
6637
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6638 1
     *
6639
     * @return string
6640 1
     */
6641
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6642
    {
6643 1
        if (
6644 1
            $separator === ''
6645 1
            ||
6646
            $str === ''
6647
        ) {
6648 1
            return '';
6649 1
        }
6650 1
6651 1
        $offset = self::str_index_first($str, $separator);
6652 1
        if ($offset === false) {
6653
            return '';
6654
        }
6655
6656
        return (string) self::substr(
6657
            $str,
6658
            0,
6659
            $offset,
6660
            $encoding
6661
        );
6662
    }
6663
6664
    /**
6665
     * Gets the substring before the last occurrence of a separator.
6666
     *
6667
     * @param string $str       <p>The input string.</p>
6668 1
     * @param string $separator <p>The string separator.</p>
6669
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6670 1
     *
6671
     * @return string
6672 1
     */
6673
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6674
    {
6675 1
        if (
6676 1
            $separator === ''
6677 1
            ||
6678
            $str === ''
6679
        ) {
6680 1
            return '';
6681 1
        }
6682 1
6683 1
        $offset = self::str_index_last($str, $separator);
6684 1
        if ($offset === false) {
6685
            return '';
6686
        }
6687
6688
        return (string) self::substr(
6689
            $str,
6690
            0,
6691
            $offset,
6692
            $encoding
6693
        );
6694
    }
6695
6696
    /**
6697
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6698
     *
6699
     * @param string $str          <p>The input string.</p>
6700
     * @param string $needle       <p>The string to look for.</p>
6701 2
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6702
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6703 2
     *
6704
     * @return string
6705 2
     */
6706
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6707
    {
6708 2
        if (
6709 2
            $str === ''
6710 2
            ||
6711 2
            $needle === ''
6712 2
        ) {
6713
            return '';
6714 2
        }
6715 2
6716
        $part = self::strstr(
6717
            $str,
6718 2
            $needle,
6719
            $beforeNeedle,
6720
            $encoding
6721
        );
6722
        if ($part === false) {
6723
            return '';
6724
        }
6725
6726
        return $part;
6727
    }
6728
6729
    /**
6730
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6731
     *
6732
     * @param string $str          <p>The input string.</p>
6733
     * @param string $needle       <p>The string to look for.</p>
6734 2
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6735
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6736 2
     *
6737
     * @return string
6738 2
     */
6739
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6740
    {
6741 2
        if (
6742 2
            $str === ''
6743 2
            ||
6744
            $needle === ''
6745
        ) {
6746 2
            return '';
6747
        }
6748
6749
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6750
        if ($part === false) {
6751
            return '';
6752
        }
6753
6754
        return $part;
6755
    }
6756
6757
    /**
6758
     * Surrounds $str with the given substring.
6759 5
     *
6760
     * @param string $str
6761
     * @param string $substring <p>The substring to add to both sides.</P>
6762
     *
6763
     * @return string string with the substring both prepended and appended
6764
     */
6765
    public static function str_surround(string $str, string $substring): string
6766
    {
6767
        return \implode('', [$substring, $str, $substring]);
6768
    }
6769
6770
    /**
6771
     * Returns a trimmed string with the first letter of each word capitalized.
6772
     * Also accepts an array, $ignore, allowing you to list words not to be
6773
     * capitalized.
6774
     *
6775
     * @param string              $str
6776
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
6777
     *                                                   Default: null</p>
6778
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6779
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6780
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6781
     *                                                   tr</p>
6782
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6783
     *                                                   ß</p>
6784
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6785
     *
6786
     * @return string the titleized string
6787
     */
6788
    public static function str_titleize(
6789 10
        string $str,
6790 5
        array $ignore = null,
6791
        string $encoding = 'UTF-8',
6792
        bool $cleanUtf8 = false,
6793 10
        string $lang = null,
6794
        bool $tryToKeepStringLength = false,
6795 10
        bool $useTrimFirst = true
6796 10
    ): string {
6797 2
        if ($useTrimFirst === true) {
6798
            $str = self::trim($str);
6799
        }
6800 10
6801 10
        $str_array = self::str_to_words($str);
6802 10
6803 10
        foreach ($str_array as &$str_tmp) {
6804 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
6805 10
                continue;
6806 10
            }
6807
6808 10
            $str_tmp = self::str_upper_first(
6809 10
                self::strtolower(
6810 10
                    $str_tmp,
6811 10
                    $encoding,
6812
                    $cleanUtf8,
6813
                    $lang,
6814
                    $tryToKeepStringLength
6815 10
                ),
6816
                $encoding,
6817
                $cleanUtf8,
6818
                $lang,
6819
                $tryToKeepStringLength
6820
            );
6821
        }
6822
6823
        return \implode('', $str_array);
6824
    }
6825
6826
    /**
6827
     * Returns a trimmed string in proper title case.
6828
     *
6829
     * Also accepts an array, $ignore, allowing you to list words not to be
6830
     * capitalized.
6831
     *
6832
     * Adapted from John Gruber's script.
6833
     *
6834
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6835
     *
6836 35
     * @param string $str
6837
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
6838 35
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6839
     *
6840
     * @return string the titleized string
6841
     */
6842
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6843
    {
6844
        $smallWords = \array_merge(
6845
            [
6846
                '(?<!q&)a',
6847
                'an',
6848
                'and',
6849
                'as',
6850
                'at(?!&t)',
6851
                'but',
6852
                'by',
6853
                'en',
6854
                'for',
6855
                'if',
6856
                'in',
6857
                'of',
6858 35
                'on',
6859
                'or',
6860
                'the',
6861 35
                'to',
6862 35
                'v[.]?',
6863
                'via',
6864 35
                'vs[.]?',
6865
            ],
6866 35
            $ignore
6867 2
        );
6868
6869
        $smallWordsRx = \implode('|', $smallWords);
6870
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6871 35
6872
        $str = self::trim($str);
6873
6874 35
        if (self::has_lowercase($str) === false) {
6875
            $str = self::strtolower($str);
6876 35
        }
6877
6878 35
        // the main substitutions
6879
        $str = (string) \preg_replace_callback(
6880 35
            '~\b (_*) (?:                                                              # 1. Leading underscore and
6881
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6882
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6883
                        |
6884
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6885
                        |
6886
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6887
                        |
6888
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6889
                      ) (_*) \b                                                           # 6. With trailing underscore
6890 35
                    ~ux',
6891 35
            /**
6892
             * @param string[] $matches
6893 5
             *
6894 35
             * @return string
6895
             */
6896 25
            static function (array $matches) use ($encoding): string {
6897 35
                // preserve leading underscore
6898
                $str = $matches[1];
6899 34
                if ($matches[2]) {
6900
                    // preserve URLs, domains, emails and file paths
6901
                    $str .= $matches[2];
6902 7
                } elseif ($matches[3]) {
6903
                    // lower-case small words
6904
                    $str .= self::strtolower($matches[3], $encoding);
6905 35
                } elseif ($matches[4]) {
6906
                    // capitalize word w/o internal caps
6907 35
                    $str .= static::str_upper_first($matches[4], $encoding);
6908 35
                } else {
6909 35
                    // preserve other kinds of word (iPhone)
6910
                    $str .= $matches[5];
6911
                }
6912
                // Preserve trailing underscore
6913 35
                $str .= $matches[6];
6914
6915
                return $str;
6916
            },
6917 35
            $str
6918
        );
6919
6920
        // Exceptions for small words: capitalize at start of title...
6921
        $str = (string) \preg_replace_callback(
6922
            '~(  \A [[:punct:]]*                # start of title...
6923
                      |  [:.;?!][ ]+               # or of subsentence...
6924
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6925 11
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6926 35
                     ~uxi',
6927 35
            /**
6928
             * @param string[] $matches
6929
             *
6930
             * @return string
6931 35
             */
6932 35
            static function (array $matches) use ($encoding): string {
6933
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6934
            },
6935
            $str
6936
        );
6937
6938
        // ...and end of title
6939
        $str = (string) \preg_replace_callback(
6940
            '~\b ( ' . $smallWordsRx . ' ) # small word...
6941
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6942 3
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6943 35
                     ~uxi',
6944 35
            /**
6945
             * @param string[] $matches
6946
             *
6947
             * @return string
6948
             */
6949 35
            static function (array $matches) use ($encoding): string {
6950
                return static::str_upper_first($matches[1], $encoding);
6951
            },
6952 35
            $str
6953
        );
6954
6955
        // Exceptions for small words in hyphenated compound words.
6956
        // e.g. "in-flight" -> In-Flight
6957
        $str = (string) \preg_replace_callback(
6958
            '~\b
6959
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6960
                        ( ' . $smallWordsRx . ' )
6961
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6962 35
                       ~uxi',
6963 35
            /**
6964
             * @param string[] $matches
6965
             *
6966
             * @return string
6967 35
             */
6968
            static function (array $matches) use ($encoding): string {
6969
                return static::str_upper_first($matches[1], $encoding);
6970
            },
6971 35
            $str
6972
        );
6973
6974
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6975
        $str = (string) \preg_replace_callback(
6976
            '~\b
6977
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6978
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6979
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6980
                      (?!	- )                   # Negative lookahead for another -
6981 35
                     ~uxi',
6982 35
            /**
6983
             * @param string[] $matches
6984
             *
6985 35
             * @return string
6986
             */
6987
            static function (array $matches) use ($encoding): string {
6988
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6989
            },
6990
            $str
6991
        );
6992
6993
        return $str;
6994
    }
6995
6996
    /**
6997 2
     * Get a binary representation of a specific string.
6998
     *
6999 2
     * @param string $str <p>The input string.</p>
7000
     *
7001
     * @return string
7002
     */
7003
    public static function str_to_binary(string $str): string
7004
    {
7005
        $value = \unpack('H*', $str);
7006
7007
        return \base_convert($value[1], 16, 2);
7008
    }
7009
7010
    /**
7011 17
     * @param string   $str
7012 1
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7013
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7014
     *
7015 16
     * @return string[]
7016 16
     */
7017
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7018
    {
7019
        if ($str === '') {
7020
            return $removeEmptyValues === true ? [] : [''];
7021 16
        }
7022
7023 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
7024
        if ($return === false) {
7025 16
            return $removeEmptyValues === true ? [] : [''];
7026
        }
7027
7028
        if (
7029
            $removeShortValues === null
7030
            &&
7031
            $removeEmptyValues === false
7032
        ) {
7033
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7034
        }
7035
7036
        return self::reduce_string_array(
7037
            $return,
7038
            $removeEmptyValues,
7039
            $removeShortValues
7040
        );
7041
    }
7042
7043
    /**
7044
     * Convert a string into an array of words.
7045
     *
7046
     * @param string   $str
7047 23
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7048 4
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7049
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7050
     *
7051 23
     * @return string[]
7052
     */
7053 23
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7054 23
    {
7055
        if ($str === '') {
7056
            return $removeEmptyValues === true ? [] : [''];
7057
        }
7058
7059 23
        $charList = self::rxClass($charList, '\pL');
7060
7061 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7062
        if ($return === false) {
7063 23
            return $removeEmptyValues === true ? [] : [''];
7064
        }
7065
7066 2
        if (
7067 2
            $removeShortValues === null
7068 2
            &&
7069 2
            $removeEmptyValues === false
7070
        ) {
7071
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7072 2
        }
7073 2
7074
        $tmpReturn = self::reduce_string_array(
7075
            $return,
7076 2
            $removeEmptyValues,
7077
            $removeShortValues
7078
        );
7079
7080
        foreach ($tmpReturn as &$item) {
7081
            $item = (string) $item;
7082
        }
7083
7084
        return $tmpReturn;
7085
    }
7086
7087
    /**
7088
     * alias for "UTF8::to_ascii()"
7089
     *
7090
     * @see UTF8::to_ascii()
7091
     *
7092 8
     * @param string $str
7093
     * @param string $unknown
7094
     * @param bool   $strict
7095
     *
7096
     * @return string
7097
     */
7098
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7099
    {
7100
        return self::to_ascii($str, $unknown, $strict);
7101
    }
7102
7103
    /**
7104
     * Truncates the string to a given length. If $substring is provided, and
7105
     * truncating occurs, the string is further truncated so that the substring
7106
     * may be appended without exceeding the desired length.
7107
     *
7108
     * @param string $str
7109
     * @param int    $length    <p>Desired length of the truncated string.</p>
7110 22
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7111
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7112 22
     *
7113
     * @return string string after truncating
7114
     */
7115
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7116 22
    {
7117 4
        // init
7118
        $str = (string) $str;
7119
7120
        if ($str === '') {
7121 18
            return '';
7122 18
        }
7123
7124 18
        if ($length >= (int) self::strlen($str, $encoding)) {
7125
            return $str;
7126
        }
7127
7128
        // Need to further trim the string so we can append the substring
7129
        $substringLength = (int) self::strlen($substring, $encoding);
7130
        $length -= $substringLength;
7131
7132
        return ((string) self::substr($str, 0, $length, $encoding)) . $substring;
7133
    }
7134
7135
    /**
7136
     * Truncates the string to a given length, while ensuring that it does not
7137
     * split words. If $substring is provided, and truncating occurs, the
7138
     * string is further truncated so that the substring may be appended without
7139
     * exceeding the desired length.
7140
     *
7141
     * @param string $str
7142
     * @param int    $length                          <p>Desired length of the truncated string.</p>
7143
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
7144
     *                                                ''</p>
7145
     * @param string $encoding                        [optional] <p>Default: UTF-8</p>
7146
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7147
     *
7148 46
     * @return string string after truncating
7149 8
     */
7150
    public static function str_truncate_safe(
7151
        string $str,
7152
        int $length,
7153 38
        string $substring = '',
7154 38
        string $encoding = 'UTF-8',
7155
        bool $ignoreDoNotSplitWordsForOneWord = false
7156 38
    ): string {
7157 38
        if ($length >= (int) self::strlen($str, $encoding)) {
7158
            return $str;
7159
        }
7160
7161
        // need to further trim the string so we can append the substring
7162 38
        $substringLength = (int) self::strlen($substring, $encoding);
7163 38
        $length -= $substringLength;
7164
7165 24
        $truncated = self::substr($str, 0, $length, $encoding);
7166
        if ($truncated === false) {
7167
            return '';
7168 24
        }
7169
7170 24
        // if the last word was truncated
7171
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7172 20
        if ($strPosSpace !== $length) {
7173
            // find pos of the last occurrence of a space, get up to that
7174
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7175
7176 38
            if (
7177
                $lastPos !== false
7178
                ||
7179
                ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7180
            ) {
7181
                $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
7182
            }
7183
        }
7184
7185
        return $truncated . $substring;
7186
    }
7187
7188
    /**
7189
     * Returns a lowercase and trimmed string separated by underscores.
7190
     * Underscores are inserted before uppercase characters (with the exception
7191 16
     * of the first character of the string), and in place of spaces as well as
7192
     * dashes.
7193
     *
7194
     * @param string $str
7195
     *
7196
     * @return string the underscored string
7197
     */
7198
    public static function str_underscored(string $str): string
7199
    {
7200
        return self::str_delimit($str, '_');
7201
    }
7202
7203
    /**
7204
     * Returns an UpperCamelCase version of the supplied string. It trims
7205
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7206
     * and underscores, and removes spaces, dashes, underscores.
7207
     *
7208
     * @param string      $str                   <p>The input string.</p>
7209 13
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7210
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7211
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7212
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7213
     *
7214
     * @return string string in UpperCamelCase
7215
     */
7216
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7217
    {
7218
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7219
    }
7220
7221
    /**
7222
     * alias for "UTF8::ucfirst()"
7223
     *
7224
     * @see UTF8::ucfirst()
7225
     *
7226
     * @param string      $str
7227 63
     * @param string      $encoding
7228
     * @param bool        $cleanUtf8
7229
     * @param string|null $lang
7230
     * @param bool        $tryToKeepStringLength
7231
     *
7232
     * @return string
7233
     */
7234
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7235
    {
7236
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7237
    }
7238
7239
    /**
7240
     * Counts number of words in the UTF-8 string.
7241
     *
7242
     * @param string $str      <p>The input string.</p>
7243
     * @param int    $format   [optional] <p>
7244
     *                         <strong>0</strong> => return a number of words (default)<br>
7245 2
     *                         <strong>1</strong> => return an array of words<br>
7246
     *                         <strong>2</strong> => return an array of words with word-offset as key
7247 2
     *                         </p>
7248
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7249 2
     *
7250 2
     * @return int|string[] The number of words in the string
7251 2
     */
7252 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7253
    {
7254 2
        $strParts = self::str_to_words($str, $charlist);
7255 2
7256 2
        $len = \count($strParts);
7257 2
7258 2
        if ($format === 1) {
7259 2
            $numberOfWords = [];
7260
            for ($i = 1; $i < $len; $i += 2) {
7261
                $numberOfWords[] = $strParts[$i];
7262 2
            }
7263
        } elseif ($format === 2) {
7264
            $numberOfWords = [];
7265 2
            $offset = (int) self::strlen($strParts[0]);
7266
            for ($i = 1; $i < $len; $i += 2) {
7267
                $numberOfWords[$offset] = $strParts[$i];
7268
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
7269
            }
7270
        } else {
7271
            $numberOfWords = (int) (($len - 1) / 2);
7272
        }
7273
7274
        return $numberOfWords;
7275
    }
7276
7277
    /**
7278
     * Case-insensitive string comparison.
7279
     *
7280
     * INFO: Case-insensitive version of UTF8::strcmp()
7281
     *
7282
     * @param string $str1     <p>The first string.</p>
7283
     * @param string $str2     <p>The second string.</p>
7284 23
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7285 23
     *
7286 23
     * @return int
7287
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7288
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7289
     *             <strong>0</strong> if they are equal
7290
     */
7291
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7292
    {
7293
        return self::strcmp(
7294
            self::strtocasefold($str1, true, false, $encoding, null, false),
7295
            self::strtocasefold($str2, true, false, $encoding, null, false)
7296
        );
7297
    }
7298
7299
    /**
7300
     * alias for "UTF8::strstr()"
7301
     *
7302
     * @see UTF8::strstr()
7303
     *
7304
     * @param string $haystack
7305 2
     * @param string $needle
7306
     * @param bool   $before_needle
7307
     * @param string $encoding
7308
     * @param bool   $cleanUtf8
7309
     *
7310
     * @return false|string
7311
     */
7312
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7313
    {
7314
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7315
    }
7316
7317
    /**
7318
     * Case-sensitive string comparison.
7319
     *
7320
     * @param string $str1 <p>The first string.</p>
7321
     * @param string $str2 <p>The second string.</p>
7322 29
     *
7323 24
     * @return int
7324 29
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7325
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7326
     *             <strong>0</strong> if they are equal
7327
     */
7328
    public static function strcmp(string $str1, string $str2): int
7329
    {
7330
        /** @noinspection PhpUndefinedClassInspection */
7331
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7332
            \Normalizer::normalize($str1, \Normalizer::NFD),
7333
            \Normalizer::normalize($str2, \Normalizer::NFD)
7334
        );
7335
    }
7336
7337
    /**
7338
     * Find length of initial segment not matching mask.
7339
     *
7340 12
     * @param string $str
7341 2
     * @param string $charList
7342
     * @param int    $offset
7343
     * @param int    $length
7344 11
     *
7345
     * @return int
7346 3
     */
7347 3
    public static function strcspn(string $str, string $charList, int $offset = null, int $length = null): int
7348
    {
7349
        if ($charList === '') {
7350 3
            return (int) self::strlen($str);
7351
        }
7352
7353 11
        if ($offset !== null || $length !== null) {
7354 2
            /** @noinspection UnnecessaryCastingInspection */
7355
            $strTmp = self::substr($str, (int) $offset, $length);
7356
            if ($strTmp === false) {
7357 10
                return 0;
7358 10
            }
7359 9
            $str = $strTmp;
7360 9
        }
7361
7362
        if ($str === '') {
7363
            return 0;
7364 9
        }
7365
7366
        $matches = [];
7367 2
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
7368
            $return = self::strlen($matches[1]);
7369
            if ($return === false) {
7370
                return 0;
7371
            }
7372
7373
            return $return;
7374
        }
7375
7376
        return (int) self::strlen($str);
7377
    }
7378
7379
    /**
7380
     * alias for "UTF8::stristr()"
7381
     *
7382
     * @see UTF8::stristr()
7383
     *
7384
     * @param string $haystack
7385 1
     * @param string $needle
7386
     * @param bool   $before_needle
7387
     * @param string $encoding
7388
     * @param bool   $cleanUtf8
7389
     *
7390
     * @return false|string
7391
     */
7392
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7393
    {
7394
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7395
    }
7396
7397
    /**
7398
     * Create a UTF-8 string from code points.
7399 4
     *
7400 4
     * INFO: opposite to UTF8::codepoints()
7401 4
     *
7402
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7403 4
     *
7404
     * @return string UTF-8 encoded string
7405
     */
7406 4
    public static function string(array $array): string
7407
    {
7408
        return \implode(
7409
            '',
7410
            \array_map(
7411
                [
7412
                    self::class,
7413
                    'chr',
7414
                ],
7415
                $array
7416
            )
7417
        );
7418
    }
7419
7420
    /**
7421
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7422
     *
7423 6
     * @param string $str <p>The input string.</p>
7424 6
     *
7425 6
     * @return bool
7426
     *              <strong>true</strong> if the string has BOM at the start,<br>
7427
     *              <strong>false</strong> otherwise
7428
     */
7429 6
    public static function string_has_bom(string $str): bool
7430
    {
7431
        /** @noinspection PhpUnusedLocalVariableInspection */
7432
        foreach (self::$BOM as $bomString => &$bomByteLength) {
7433
            if (\strpos($str, $bomString) === 0) {
7434
                return true;
7435
            }
7436
        }
7437
7438
        return false;
7439
    }
7440
7441
    /**
7442
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7443
     *
7444
     * @see http://php.net/manual/en/function.strip-tags.php
7445
     *
7446
     * @param string $str            <p>
7447
     *                               The input string.
7448
     *                               </p>
7449
     * @param string $allowable_tags [optional] <p>
7450
     *                               You can use the optional second parameter to specify tags which should
7451
     *                               not be stripped.
7452
     *                               </p>
7453
     *                               <p>
7454 4
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
7455 1
     *                               can not be changed with allowable_tags.
7456
     *                               </p>
7457
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7458 4
     *
7459 2
     * @return string the stripped string
7460
     */
7461
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7462
    {
7463 4
        if ($str === '') {
7464
            return '';
7465
        }
7466
7467
        if ($cleanUtf8 === true) {
7468
            $str = self::clean($str);
7469
        }
7470
7471
        /** @noinspection UnnecessaryCastingInspection */
7472
        return \strip_tags($str, (string) $allowable_tags);
7473
    }
7474
7475
    /**
7476
     * Strip all whitespace characters. This includes tabs and newline
7477 36
     * characters, as well as multibyte whitespace such as the thin space
7478 3
     * and ideographic space.
7479
     *
7480
     * @param string $str
7481 33
     *
7482
     * @return string
7483
     */
7484
    public static function strip_whitespace(string $str): string
7485
    {
7486
        if ($str === '') {
7487
            return '';
7488
        }
7489
7490
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7491
    }
7492
7493
    /**
7494
     * Finds position of first occurrence of a string within another, case insensitive.
7495
     *
7496
     * @see http://php.net/manual/en/function.mb-stripos.php
7497
     *
7498
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7499
     * @param string $needle    <p>The string to find in haystack.</p>
7500
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7501 75
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7502 5
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7503
     *
7504
     * @return false|int
7505 74
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7506
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7507
     */
7508 1
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7509 1
    {
7510
        if ($haystack === '' || $needle === '') {
7511
            return false;
7512 74
        }
7513 23
7514
        if ($cleanUtf8 === true) {
7515
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7516 74
            // if invalid characters are found in $haystack before $needle
7517
            $haystack = self::clean($haystack);
7518
            $needle = self::clean($needle);
7519
        }
7520 74
7521 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7522 74
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7523 54
        }
7524
7525
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7526
            self::checkForSupport();
7527
        }
7528 31
7529
        if (self::$SUPPORT['mbstring'] === true) {
7530 31
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7531
            if ($returnTmp !== false) {
7532 31
                return $returnTmp;
7533
            }
7534 31
        }
7535 31
7536
        if (
7537
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7538
            &&
7539
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7540
            &&
7541
            self::$SUPPORT['intl'] === true
7542
        ) {
7543
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7544 31
            if ($returnTmp !== false) {
7545 15
                return $returnTmp;
7546
            }
7547
        }
7548
7549
        //
7550
        // fallback for ascii only
7551
        //
7552 20
7553 20
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7554
            return \stripos($haystack, $needle, $offset);
7555 20
        }
7556
7557
        //
7558
        // fallback via vanilla php
7559
        //
7560
7561
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7562
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7563
7564
        return self::strpos($haystack, $needle, $offset, $encoding);
7565
    }
7566
7567
    /**
7568
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7569
     *
7570
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
7571
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
7572
     * @param bool   $before_needle [optional] <p>
7573
     *                              If <b>TRUE</b>, it returns the part of the
7574 12
     *                              haystack before the first occurrence of the needle (excluding the needle).
7575 3
     *                              </p>
7576
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7577
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7578 9
     *
7579 1
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7580
     */
7581
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7582 9
    {
7583
        if ($haystack === '' || $needle === '') {
7584
            return false;
7585 1
        }
7586 1
7587
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7588
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7589 9
        }
7590
7591
        if ($cleanUtf8 === true) {
7592
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7593 9
            // if invalid characters are found in $haystack before $needle
7594
            $needle = self::clean($needle);
7595
            $haystack = self::clean($haystack);
7596
        }
7597
7598 9
        if (!$needle) {
7599
            return $haystack;
7600 9
        }
7601
7602
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7603
            self::checkForSupport();
7604
        }
7605 9
7606 9
        if (
7607
            $encoding !== 'UTF-8'
7608
            &&
7609
            self::$SUPPORT['mbstring'] === false
7610
        ) {
7611
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7612
        }
7613
7614
        if (self::$SUPPORT['mbstring'] === true) {
7615
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7616
        }
7617
7618
        if (
7619
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7620
            &&
7621
            self::$SUPPORT['intl'] === true
7622
        ) {
7623
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7624
            if ($returnTmp !== false) {
7625
                return $returnTmp;
7626
            }
7627
        }
7628
7629
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7630
            return \stristr($haystack, $needle, $before_needle);
7631
        }
7632
7633
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7634
7635
        if (!isset($match[1])) {
7636
            return false;
7637
        }
7638
7639
        if ($before_needle) {
7640
            return $match[1];
7641
        }
7642
7643
        return self::substr($haystack, (int) self::strlen($match[1]));
7644
    }
7645
7646
    /**
7647
     * Get the string length, not the byte-length!
7648
     *
7649
     * @see     http://php.net/manual/en/function.mb-strlen.php
7650
     *
7651
     * @param string $str       <p>The string being checked for length.</p>
7652
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7653
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7654
     *
7655
     * @return false|int
7656 284
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
7657 46
     *                   $encoding.
7658
     *                   (One multi-byte character counted as +1).
7659
     *                   <br>
7660 282
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
7661 96
     *                   chars.
7662
     */
7663
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7664
    {
7665
        if ($str === '') {
7666
            return 0;
7667
        }
7668
7669 282
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7670
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7671 282
        }
7672
7673 2
        //
7674
        // fallback for binary || ascii only
7675
        //
7676 282
7677
        if (
7678
            $encoding === 'CP850'
7679 4
            ||
7680
            $encoding === 'ASCII'
7681
        ) {
7682 282
            return self::strlen_in_byte($str);
7683
        }
7684
7685
        if ($cleanUtf8 === true) {
7686
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7687 282
            // if invalid characters are found in $str
7688
            $str = self::clean($str);
7689 282
        }
7690
7691 282
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7692
            self::checkForSupport();
7693 2
        }
7694
7695
        if (
7696
            $encoding !== 'UTF-8'
7697
            &&
7698
            self::$SUPPORT['mbstring'] === false
7699
            &&
7700 282
            self::$SUPPORT['iconv'] === false
7701 278
        ) {
7702 278
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7703 278
        }
7704
7705
        //
7706
        // fallback via mbstring
7707
        //
7708
7709
        if (self::$SUPPORT['mbstring'] === true) {
7710
            $returnTmp = \mb_strlen($str, $encoding);
7711 8
            if ($returnTmp !== false) {
7712
                return $returnTmp;
7713
            }
7714
        }
7715
7716
        //
7717
        // fallback via iconv
7718
        //
7719
7720
        if (self::$SUPPORT['iconv'] === true) {
7721
            $returnTmp = \iconv_strlen($str, $encoding);
7722
            if ($returnTmp !== false) {
7723 8
                return $returnTmp;
7724
            }
7725 8
        }
7726
7727
        //
7728
        // fallback via intl
7729
        //
7730
7731
        if (
7732
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7733
            &&
7734
            self::$SUPPORT['intl'] === true
7735
        ) {
7736
            $returnTmp = \grapheme_strlen($str);
7737 8
            if ($returnTmp !== null) {
7738 4
                return $returnTmp;
7739
            }
7740
        }
7741
7742
        //
7743
        // fallback for ascii only
7744
        //
7745 8
7746
        if (self::is_ascii($str)) {
7747 8
            return \strlen($str);
7748 8
        }
7749
7750
        //
7751
        // fallback via vanilla php
7752 8
        //
7753
7754
        \preg_match_all('/./us', $str, $parts);
7755
7756
        $returnTmp = \count($parts[0]);
7757
        if ($returnTmp === 0) {
7758
            return false;
7759
        }
7760
7761
        return $returnTmp;
7762
    }
7763
7764 192
    /**
7765
     * Get string length in byte.
7766
     *
7767
     * @param string $str
7768 192
     *
7769
     * @return int
7770
     */
7771
    public static function strlen_in_byte(string $str): int
7772 192
    {
7773
        if ($str === '') {
7774
            return 0;
7775
        }
7776
7777 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7778
            self::checkForSupport();
7779
        }
7780
7781
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7782
            // "mb_" is available if overload is used, so use it ...
7783
            return \mb_strlen($str, 'CP850'); // 8-BIT
7784
        }
7785
7786
        return \strlen($str);
7787
    }
7788
7789
    /**
7790
     * Case insensitive string comparisons using a "natural order" algorithm.
7791
     *
7792
     * INFO: natural order version of UTF8::strcasecmp()
7793
     *
7794
     * @param string $str1     <p>The first string.</p>
7795
     * @param string $str2     <p>The second string.</p>
7796 2
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7797 2
     *
7798 2
     * @return int
7799
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7800
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7801
     *             <strong>0</strong> if they are equal
7802
     */
7803
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7804
    {
7805
        return self::strnatcmp(
7806
            self::strtocasefold($str1, true, false, $encoding, null, false),
7807
            self::strtocasefold($str2, true, false, $encoding, null, false)
7808
        );
7809
    }
7810
7811
    /**
7812
     * String comparisons using a "natural order" algorithm
7813
     *
7814
     * INFO: natural order version of UTF8::strcmp()
7815
     *
7816
     * @see  http://php.net/manual/en/function.strnatcmp.php
7817
     *
7818
     * @param string $str1 <p>The first string.</p>
7819 4
     * @param string $str2 <p>The second string.</p>
7820
     *
7821
     * @return int
7822
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7823
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7824
     *             <strong>0</strong> if they are equal
7825
     */
7826
    public static function strnatcmp(string $str1, string $str2): int
7827
    {
7828
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
7829
    }
7830
7831
    /**
7832
     * Case-insensitive string comparison of the first n characters.
7833
     *
7834
     * @see  http://php.net/manual/en/function.strncasecmp.php
7835
     *
7836
     * @param string $str1     <p>The first string.</p>
7837
     * @param string $str2     <p>The second string.</p>
7838
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7839 2
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7840 2
     *
7841 2
     * @return int
7842 2
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7843
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7844
     *             <strong>0</strong> if they are equal
7845
     */
7846
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7847
    {
7848
        return self::strncmp(
7849
            self::strtocasefold($str1, true, false, $encoding, null, false),
7850
            self::strtocasefold($str2, true, false, $encoding, null, false),
7851
            $len
7852
        );
7853
    }
7854
7855
    /**
7856
     * String comparison of the first n characters.
7857
     *
7858
     * @see  http://php.net/manual/en/function.strncmp.php
7859
     *
7860
     * @param string $str1 <p>The first string.</p>
7861
     * @param string $str2 <p>The second string.</p>
7862 4
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
7863 4
     *
7864
     * @return int
7865 4
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7866
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7867
     *             <strong>0</strong> if they are equal
7868
     */
7869
    public static function strncmp(string $str1, string $str2, int $len): int
7870
    {
7871
        $str1 = (string) self::substr($str1, 0, $len);
7872
        $str2 = (string) self::substr($str2, 0, $len);
7873
7874
        return self::strcmp($str1, $str2);
7875
    }
7876
7877
    /**
7878
     * Search a string for any of a set of characters.
7879
     *
7880 2
     * @see  http://php.net/manual/en/function.strpbrk.php
7881 2
     *
7882
     * @param string $haystack  <p>The string where char_list is looked for.</p>
7883
     * @param string $char_list <p>This parameter is case sensitive.</p>
7884 2
     *
7885 2
     * @return false|string string starting from the character found, or false if it is not found
7886
     */
7887
    public static function strpbrk(string $haystack, string $char_list)
7888 2
    {
7889
        if ($haystack === '' || $char_list === '') {
7890
            return false;
7891
        }
7892
7893
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7894
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
7895
        }
7896
7897
        return false;
7898
    }
7899
7900
    /**
7901
     * Find position of first occurrence of string in a string.
7902
     *
7903
     * @see http://php.net/manual/en/function.mb-strpos.php
7904
     *
7905
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7906
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7907
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7908 161
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7909 4
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7910
     *
7911
     * @return false|int
7912
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7913 160
     *                   string.<br> If needle is not found it returns false.
7914
     */
7915
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7916 160
    {
7917
        if ($haystack === '') {
7918 160
            return false;
7919 2
        }
7920
7921
        // iconv and mbstring do not support integer $needle
7922 160
        if ((int) $needle === $needle && $needle >= 0) {
7923
            $needle = (string) self::chr($needle);
7924
        }
7925 3
        $needle = (string) $needle;
7926 3
7927
        if ($needle === '') {
7928
            return false;
7929 160
        }
7930 66
7931
        if ($cleanUtf8 === true) {
7932
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7933 160
            // if invalid characters are found in $haystack before $needle
7934
            $needle = self::clean($needle);
7935
            $haystack = self::clean($haystack);
7936
        }
7937
7938
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7939
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7940
        }
7941
7942 160
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7943
            self::checkForSupport();
7944 160
        }
7945
7946 2
        //
7947
        // fallback for binary || ascii only
7948
        //
7949
7950 160
        if (
7951
            $encoding === 'CP850'
7952 160
            ||
7953
            $encoding === 'ASCII'
7954 160
        ) {
7955
            return self::strpos_in_byte($haystack, $needle, $offset);
7956 2
        }
7957
7958
        if (
7959
            $encoding !== 'UTF-8'
7960
            &&
7961
            self::$SUPPORT['iconv'] === false
7962
            &&
7963 160
            self::$SUPPORT['mbstring'] === false
7964 160
        ) {
7965 160
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7966 102
        }
7967
7968
        //
7969
        // fallback via mbstring
7970
        //
7971
7972
        if (self::$SUPPORT['mbstring'] === true) {
7973
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7974
            if ($returnTmp !== false) {
7975 72
                return $returnTmp;
7976
            }
7977 72
        }
7978
7979 72
        //
7980
        // fallback via intl
7981 72
        //
7982 72
7983
        if (
7984
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7985
            &&
7986
            $offset >= 0 // grapheme_strpos() can't handle negative offset
7987
            &&
7988
            self::$SUPPORT['intl'] === true
7989
        ) {
7990
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7991
            if ($returnTmp !== false) {
7992 72
                return $returnTmp;
7993
            }
7994 72
        }
7995
7996
        //
7997
        // fallback via iconv
7998 72
        //
7999 72
8000
        if (
8001
            $offset >= 0 // iconv_strpos() can't handle negative offset
8002
            &&
8003
            self::$SUPPORT['iconv'] === true
8004
        ) {
8005
            // ignore invalid negative offset to keep compatibility
8006
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8007
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8008 72
            if ($returnTmp !== false) {
8009 37
                return $returnTmp;
8010
            }
8011
        }
8012
8013
        //
8014
        // fallback for ascii only
8015
        //
8016 40
8017
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8018
            return \strpos($haystack, $needle, $offset);
8019 40
        }
8020
8021 40
        //
8022
        // fallback via vanilla php
8023
        //
8024 40
8025
        if ($haystackIsAscii) {
8026 40
            $haystackTmp = \substr($haystack, $offset);
8027 2
        } else {
8028
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8029
        }
8030 40
        if ($haystackTmp === false) {
8031 40
            $haystackTmp = '';
8032 40
        }
8033
        $haystack = (string) $haystackTmp;
8034
8035 4
        if ($offset < 0) {
8036 4
            $offset = 0;
8037
        }
8038
8039 2
        $pos = \strpos($haystack, $needle);
8040
        if ($pos === false) {
8041
            return false;
8042
        }
8043
8044
        if ($pos) {
8045
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8046
        }
8047
8048
        return $offset + 0;
8049
    }
8050
8051
    /**
8052
     * Find position of first occurrence of string in a string.
8053
     *
8054
     * @param string $haystack <p>
8055
     *                         The string being checked.
8056
     *                         </p>
8057
     * @param string $needle   <p>
8058
     *                         The position counted from the beginning of haystack.
8059
     *                         </p>
8060 81
     * @param int    $offset   [optional] <p>
8061
     *                         The search offset. If it is not specified, 0 is used.
8062
     *                         </p>
8063
     *
8064 81
     * @return false|int The numeric position of the first occurrence of needle in the
8065
     *                   haystack string. If needle is not found, it returns false.
8066
     */
8067
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8068 81
    {
8069
        if ($haystack === '' || $needle === '') {
8070
            return false;
8071
        }
8072
8073 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8074
            self::checkForSupport();
8075
        }
8076
8077
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8078
            // "mb_" is available if overload is used, so use it ...
8079
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8080
        }
8081
8082
        return \strpos($haystack, $needle, $offset);
8083
    }
8084
8085
    /**
8086
     * Finds the last occurrence of a character in a string within another.
8087
     *
8088
     * @see http://php.net/manual/en/function.mb-strrchr.php
8089
     *
8090
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8091
     * @param string $needle        <p>The string to find in haystack</p>
8092
     * @param bool   $before_needle [optional] <p>
8093
     *                              Determines which portion of haystack
8094
     *                              this function returns.
8095
     *                              If set to true, it returns all of haystack
8096
     *                              from the beginning to the last occurrence of needle.
8097
     *                              If set to false, it returns all of haystack
8098 4
     *                              from the last occurrence of needle to the end,
8099 2
     *                              </p>
8100
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8101
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8102 4
     *
8103 2
     * @return false|string the portion of haystack or false if needle is not found
8104
     */
8105
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8106 4
    {
8107
        if ($haystack === '' || $needle === '') {
8108
            return false;
8109 2
        }
8110 2
8111
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8112
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8113 4
        }
8114
8115
        if ($cleanUtf8 === true) {
8116
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8117
            // if invalid characters are found in $haystack before $needle
8118 4
            $needle = self::clean($needle);
8119
            $haystack = self::clean($haystack);
8120 4
        }
8121
8122
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8123
            self::checkForSupport();
8124
        }
8125 4
8126 4
        if (
8127
            $encoding !== 'UTF-8'
8128
            &&
8129
            self::$SUPPORT['mbstring'] === false
8130
        ) {
8131
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8132
        }
8133
8134
        if (self::$SUPPORT['mbstring'] === true) {
8135
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8136
        }
8137
8138
        //
8139
        // fallback for binary || ascii only
8140
        //
8141
8142
        if (
8143
            $before_needle === false
8144
            &&
8145
            (
8146
                $encoding === 'CP850'
8147
                ||
8148
                $encoding === 'ASCII'
8149
            )
8150
        ) {
8151
            return \strrchr($haystack, $needle);
8152
        }
8153
8154
        //
8155
        // fallback via iconv
8156
        //
8157
8158
        if (self::$SUPPORT['iconv'] === true) {
8159
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8160
            if ($needleTmp === false) {
8161
                return false;
8162
            }
8163
            $needle = (string) $needleTmp;
8164
8165
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8166
            if ($pos === false) {
8167
                return false;
8168
            }
8169
8170
            if ($before_needle) {
8171
                return self::substr($haystack, 0, $pos, $encoding);
8172
            }
8173
8174
            return self::substr($haystack, $pos, null, $encoding);
8175
        }
8176
8177
        //
8178
        // fallback via vanilla php
8179
        //
8180
8181
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8182
        if ($needleTmp === false) {
8183
            return false;
8184
        }
8185
        $needle = (string) $needleTmp;
8186
8187
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
8188
        if ($pos === false) {
8189
            return false;
8190
        }
8191
8192
        if ($before_needle) {
8193
            return self::substr($haystack, 0, $pos, $encoding);
8194
        }
8195
8196
        return self::substr($haystack, $pos, null, $encoding);
8197
    }
8198
8199 10
    /**
8200 4
     * Reverses characters order in the string.
8201
     *
8202
     * @param string $str <p>The input string.</p>
8203 8
     *
8204 8
     * @return string the string with characters in the reverse sequence
8205 8
     */
8206 8
    public static function strrev(string $str): string
8207 8
    {
8208 8
        if ($str === '') {
8209
            return '';
8210
        }
8211
8212 8
        $reversed = '';
8213
        $i = (int) self::strlen($str);
8214
        while ($i--) {
8215
            $reversedTmp = self::substr($str, $i, 1);
8216
            if ($reversedTmp !== false) {
8217
                $reversed .= $reversedTmp;
8218
            }
8219
        }
8220
8221
        return $reversed;
8222
    }
8223
8224
    /**
8225
     * Finds the last occurrence of a character in a string within another, case insensitive.
8226
     *
8227
     * @see http://php.net/manual/en/function.mb-strrichr.php
8228
     *
8229
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8230
     * @param string $needle        <p>The string to find in haystack.</p>
8231
     * @param bool   $before_needle [optional] <p>
8232
     *                              Determines which portion of haystack
8233
     *                              this function returns.
8234
     *                              If set to true, it returns all of haystack
8235
     *                              from the beginning to the last occurrence of needle.
8236
     *                              If set to false, it returns all of haystack
8237 3
     *                              from the last occurrence of needle to the end,
8238 2
     *                              </p>
8239
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8240
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8241 3
     *
8242 2
     * @return false|string the portion of haystack or<br>false if needle is not found
8243
     */
8244
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8245 3
    {
8246
        if ($haystack === '' || $needle === '') {
8247
            return false;
8248 2
        }
8249 2
8250
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8251
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8252 3
        }
8253
8254
        if ($cleanUtf8 === true) {
8255
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8256
            // if invalid characters are found in $haystack before $needle
8257
            $needle = self::clean($needle);
8258
            $haystack = self::clean($haystack);
8259
        }
8260 3
8261 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8262
            self::checkForSupport();
8263
        }
8264
8265
        //
8266
        // fallback via mbstring
8267
        //
8268
8269
        if (self::$SUPPORT['mbstring'] === true) {
8270
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8271
        }
8272
8273
        //
8274
        // fallback via vanilla php
8275
        //
8276
8277
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8278
        if ($needleTmp === false) {
8279
            return false;
8280
        }
8281
        $needle = (string) $needleTmp;
8282
8283
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8284
        if ($pos === false) {
8285
            return false;
8286
        }
8287
8288
        if ($before_needle) {
8289
            return self::substr($haystack, 0, $pos, $encoding);
8290
        }
8291
8292
        return self::substr($haystack, $pos, null, $encoding);
8293
    }
8294
8295
    /**
8296
     * Find position of last occurrence of a case-insensitive string.
8297
     *
8298
     * @param string     $haystack  <p>The string to look in.</p>
8299
     * @param int|string $needle    <p>The string to look for.</p>
8300
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8301 4
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8302
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8303
     *
8304
     * @return false|int
8305
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8306 4
     *                   string.<br>If needle is not found, it returns false.
8307
     */
8308
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8309 4
    {
8310
        if ($haystack === '') {
8311 4
            return false;
8312
        }
8313
8314
        // iconv and mbstring do not support integer $needle
8315 4
        if ((int) $needle === $needle && $needle >= 0) {
8316
            $needle = (string) self::chr($needle);
8317 2
        }
8318 2
        $needle = (string) $needle;
8319
8320
        if ($needle === '') {
8321 4
            return false;
8322 2
        }
8323
8324
        if ($cleanUtf8 === true) {
8325
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8326
            $needle = self::clean($needle);
8327
            $haystack = self::clean($haystack);
8328
        }
8329
8330 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8331
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8332 4
        }
8333
8334
        //
8335
        // fallback for binary || ascii only
8336
        //
8337 4
8338
        if (
8339
            $encoding === 'CP850'
8340
            ||
8341
            $encoding === 'ASCII'
8342 4
        ) {
8343
            return self::strripos_in_byte($haystack, $needle, $offset);
8344 4
        }
8345
8346
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8347
            self::checkForSupport();
8348
        }
8349
8350
        if (
8351
            $encoding !== 'UTF-8'
8352
            &&
8353 4
            self::$SUPPORT['mbstring'] === false
8354 4
        ) {
8355
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8356
        }
8357
8358
        //
8359
        // fallback via mbstrig
8360
        //
8361
8362
        if (self::$SUPPORT['mbstring'] === true) {
8363
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8364
        }
8365
8366
        //
8367
        // fallback via intl
8368
        //
8369
8370
        if (
8371
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8372
            &&
8373
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8374
            &&
8375
            self::$SUPPORT['intl'] === true
8376
        ) {
8377
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8378
            if ($returnTmp !== false) {
8379
                return $returnTmp;
8380
            }
8381
        }
8382
8383
        //
8384
        // fallback for ascii only
8385
        //
8386
8387
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8388
            return self::strripos_in_byte($haystack, $needle, $offset);
8389
        }
8390
8391
        //
8392
        // fallback via vanilla php
8393
        //
8394
8395
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8396
        $needle = self::strtocasefold($needle, true, false, $encoding);
8397
8398
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8399
    }
8400
8401
    /**
8402
     * Finds position of last occurrence of a string within another, case insensitive.
8403
     *
8404
     * @param string $haystack <p>
8405
     *                         The string from which to get the position of the last occurrence
8406
     *                         of needle.
8407
     *                         </p>
8408
     * @param string $needle   <p>
8409
     *                         The string to find in haystack.
8410
     *                         </p>
8411
     * @param int    $offset   [optional] <p>
8412
     *                         The position in haystack
8413
     *                         to start searching.
8414
     *                         </p>
8415
     *
8416
     * @return false|int return the numeric position of the last occurrence of needle in the
8417
     *                   haystack string, or false if needle is not found
8418
     */
8419
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8420
    {
8421
        if ($haystack === '' || $needle === '') {
8422
            return false;
8423
        }
8424
8425
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8426
            self::checkForSupport();
8427
        }
8428
8429
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8430
            // "mb_" is available if overload is used, so use it ...
8431
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8432
        }
8433
8434
        return \strripos($haystack, $needle, $offset);
8435
    }
8436
8437
    /**
8438
     * Find position of last occurrence of a string in a string.
8439
     *
8440
     * @see http://php.net/manual/en/function.mb-strrpos.php
8441
     *
8442
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8443
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8444
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8445
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8446
     *                              the end of the string.
8447
     *                              </p>
8448 50
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8449 3
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8450
     *
8451
     * @return false|int
8452
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8453 49
     *                   string.<br>If needle is not found, it returns false.
8454 2
     */
8455
    public static function strrpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8456 49
    {
8457
        if ($haystack === '') {
8458 49
            return false;
8459 2
        }
8460
8461
        // iconv and mbstring do not support integer $needle
8462 49
        if ((int) $needle === $needle && $needle >= 0) {
8463
            $needle = (string) self::chr($needle);
8464 4
        }
8465 4
        $needle = (string) $needle;
8466
8467
        if ($needle === '') {
8468 49
            return false;
8469 20
        }
8470
8471
        if ($cleanUtf8 === true) {
8472
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8473
            $needle = self::clean($needle);
8474
            $haystack = self::clean($haystack);
8475
        }
8476
8477 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8478
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8479 49
        }
8480
8481 2
        //
8482
        // fallback for binary || ascii only
8483
        //
8484 49
8485
        if (
8486
            $encoding === 'CP850'
8487
            ||
8488
            $encoding === 'ASCII'
8489 49
        ) {
8490
            return self::strrpos_in_byte($haystack, $needle, $offset);
8491 49
        }
8492
8493
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8494
            self::checkForSupport();
8495
        }
8496
8497
        if (
8498
            $encoding !== 'UTF-8'
8499
            &&
8500 49
            self::$SUPPORT['mbstring'] === false
8501 49
        ) {
8502
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8503
        }
8504
8505
        //
8506
        // fallback via mbstring
8507
        //
8508
8509
        if (self::$SUPPORT['mbstring'] === true) {
8510
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8511
        }
8512
8513
        //
8514
        // fallback via intl
8515
        //
8516
8517
        if (
8518
            $offset !== null
8519
            &&
8520
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8521
            &&
8522
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8523
            &&
8524
            self::$SUPPORT['intl'] === true
8525
        ) {
8526
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8527
            if ($returnTmp !== false) {
8528
                return $returnTmp;
8529
            }
8530
        }
8531
8532
        //
8533
        // fallback for ascii only
8534
        //
8535
8536
        if (
8537
            $offset !== null
8538
            &&
8539
            self::is_ascii($haystack)
8540
            &&
8541
            self::is_ascii($needle)
8542
        ) {
8543
            return self::strrpos_in_byte($haystack, $needle, $offset);
8544
        }
8545
8546
        //
8547
        // fallback via vanilla php
8548
        //
8549
8550
        $haystackTmp = null;
8551
        if ($offset > 0) {
8552
            $haystackTmp = self::substr($haystack, $offset);
8553
        } elseif ($offset < 0) {
8554
            $haystackTmp = self::substr($haystack, 0, $offset);
8555
            $offset = 0;
8556
        }
8557
8558
        if ($haystackTmp !== null) {
8559
            if ($haystackTmp === false) {
8560
                $haystackTmp = '';
8561
            }
8562
            $haystack = (string) $haystackTmp;
8563
        }
8564
8565
        $pos = self::strrpos_in_byte($haystack, $needle);
8566
        if ($pos === false) {
8567
            return false;
8568
        }
8569
8570
        $strTmp = self::substr_in_byte($haystack, 0, $pos);
8571
        if ($strTmp === false) {
0 ignored issues
show
introduced by
The condition $strTmp === false is always false.
Loading history...
8572
            return false;
8573
        }
8574
8575
        return $offset + (int) self::strlen($strTmp);
8576
    }
8577
8578
    /**
8579
     * Find position of last occurrence of a string in a string.
8580
     *
8581
     * @param string $haystack <p>
8582
     *                         The string being checked, for the last occurrence
8583
     *                         of needle.
8584
     *                         </p>
8585
     * @param string $needle   <p>
8586
     *                         The string to find in haystack.
8587
     *                         </p>
8588 2
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8589
     *                         the string. Negative values will stop searching at an arbitrary point
8590
     *                         prior to the end of the string.
8591
     *
8592 2
     * @return false|int The numeric position of the last occurrence of needle in the
8593
     *                   haystack string. If needle is not found, it returns false.
8594
     */
8595
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8596 2
    {
8597
        if ($haystack === '' || $needle === '') {
8598
            return false;
8599
        }
8600
8601 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8602
            self::checkForSupport();
8603
        }
8604
8605
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8606
            // "mb_" is available if overload is used, so use it ...
8607
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8608
        }
8609
8610
        return \strrpos($haystack, $needle, $offset);
8611
    }
8612
8613
    /**
8614
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8615
     * mask.
8616
     *
8617 10
     * @param string $str    <p>The input string.</p>
8618 2
     * @param string $mask   <p>The mask of chars</p>
8619
     * @param int    $offset [optional]
8620
     * @param int    $length [optional]
8621 10
     *
8622 2
     * @return false|int
8623
     */
8624
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null)
8625 8
    {
8626
        if ($offset || $length !== null) {
8627 8
            $str = (string) self::substr($str, $offset, $length);
8628
        }
8629
8630
        if ($str === '' || $mask === '') {
8631
            return 0;
8632
        }
8633
8634
        $matches = [];
8635
8636
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0]) : 0;
8637
    }
8638
8639
    /**
8640
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8641
     *
8642
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8643
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8644
     * @param bool   $before_needle [optional] <p>
8645
     *                              If <b>TRUE</b>, strstr() returns the part of the
8646
     *                              haystack before the first occurrence of the needle (excluding the needle).
8647 5
     *                              </p>
8648 2
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8649
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8650
     *
8651 5
     * @return false|string
8652
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
8653
     */
8654
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8655
    {
8656
        if ($haystack === '' || $needle === '') {
8657
            return false;
8658 5
        }
8659 2
8660
        if ($cleanUtf8 === true) {
8661
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8662
            // if invalid characters are found in $haystack before $needle
8663
            $needle = self::clean($needle);
8664
            $haystack = self::clean($haystack);
8665
        }
8666
8667 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8669 5
        }
8670
8671
        //
8672
        // fallback for binary || ascii only
8673
        //
8674 5
8675
        if (
8676
            $encoding === 'CP850'
8677
            ||
8678
            $encoding === 'ASCII'
8679 5
        ) {
8680
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8681 5
        }
8682
8683
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8684
            self::checkForSupport();
8685
        }
8686
8687
        if (
8688
            $encoding !== 'UTF-8'
8689
            &&
8690 5
            self::$SUPPORT['mbstring'] === false
8691 5
        ) {
8692
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8693
        }
8694
8695
        //
8696
        // fallback via mbstring
8697
        //
8698
8699
        if (self::$SUPPORT['mbstring'] === true) {
8700
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8701
        }
8702
8703
        //
8704
        // fallback via intl
8705
        //
8706
8707
        if (
8708
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8709
            &&
8710
            self::$SUPPORT['intl'] === true
8711
        ) {
8712
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8713
            if ($returnTmp !== false) {
8714
                return $returnTmp;
8715
            }
8716
        }
8717
8718
        //
8719
        // fallback for ascii only
8720
        //
8721
8722
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8723
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8724
        }
8725
8726
        //
8727
        // fallback via vanilla php
8728
        //
8729
8730
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8731
8732
        if (!isset($match[1])) {
8733
            return false;
8734
        }
8735
8736
        if ($before_needle) {
8737
            return $match[1];
8738
        }
8739
8740
        return self::substr($haystack, (int) self::strlen($match[1]));
8741
    }
8742
8743
    /**
8744
     *  * Finds first occurrence of a string within another.
8745
     *
8746
     * @param string $haystack      <p>
8747
     *                              The string from which to get the first occurrence
8748
     *                              of needle.
8749
     *                              </p>
8750
     * @param string $needle        <p>
8751
     *                              The string to find in haystack.
8752
     *                              </p>
8753
     * @param bool   $before_needle [optional] <p>
8754
     *                              Determines which portion of haystack
8755
     *                              this function returns.
8756
     *                              If set to true, it returns all of haystack
8757
     *                              from the beginning to the first occurrence of needle.
8758
     *                              If set to false, it returns all of haystack
8759
     *                              from the first occurrence of needle to the end,
8760
     *                              </p>
8761
     *
8762
     * @return false|string the portion of haystack,
8763
     *                      or false if needle is not found
8764
     */
8765
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8766
    {
8767
        if ($haystack === '' || $needle === '') {
8768
            return false;
8769
        }
8770
8771
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8772
            self::checkForSupport();
8773
        }
8774
8775
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8776
            // "mb_" is available if overload is used, so use it ...
8777
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8778
        }
8779
8780
        return \strstr($haystack, $needle, $before_needle);
8781
    }
8782
8783
    /**
8784
     * Unicode transformation for case-less matching.
8785
     *
8786
     * @see http://unicode.org/reports/tr21/tr21-5.html
8787
     *
8788
     * @param string      $str       <p>The input string.</p>
8789
     * @param bool        $full      [optional] <p>
8790
     *                               <b>true</b>, replace full case folding chars (default)<br>
8791
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8792
     *                               </p>
8793
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8794
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8795
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8796
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8797
     *                               is for some languages better ...</p>
8798
     *
8799
     * @return string
8800 53
     */
8801 5
    public static function strtocasefold(
8802
        string $str,
8803
        bool $full = true,
8804 52
        bool $cleanUtf8 = false,
8805
        string $encoding = 'UTF-8',
8806 52
        string $lang = null,
8807 2
        $lower = true
8808
    ): string {
8809
        if ($str === '') {
8810 50
            return '';
8811
        }
8812
8813
        $str = self::fixStrCaseHelper($str, $lower, $full);
8814
8815
        if ($lower === true) {
8816
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8817
        }
8818
8819
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8820
    }
8821
8822
    /**
8823
     * Make a string lowercase.
8824
     *
8825
     * @see http://php.net/manual/en/function.mb-strtolower.php
8826
     *
8827
     * @param string      $str                   <p>The string being lowercased.</p>
8828
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8829
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8830 156
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8831
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8832 156
     *
8833 12
     * @return string
8834
     *                <p>String with all alphabetic characters converted to lowercase.</p>
8835
     */
8836 154
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8837
    {
8838
        // init
8839 4
        $str = (string) $str;
8840
8841
        if ($str === '') {
8842 154
            return '';
8843 94
        }
8844
8845
        if ($cleanUtf8 === true) {
8846
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8847 154
            // if invalid characters are found in $haystack before $needle
8848
            $str = self::clean($str);
8849
        }
8850
8851 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8852 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8853
        }
8854
8855
        // hack for old php version or for the polyfill ...
8856 2
        if ($tryToKeepStringLength === true) {
8857 2
            $str = self::fixStrCaseHelper($str, true);
8858 2
        }
8859
8860
        if ($lang !== null) {
8861
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8862
                self::checkForSupport();
8863
            }
8864
8865 2
            if (self::$SUPPORT['intl'] === true) {
8866
                $langCode = $lang . '-Lower';
8867
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8868
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
8869
8870
                    $langCode = 'Any-Lower';
8871
                }
8872 154
8873
                /** @noinspection PhpComposerExtensionStubsInspection */
8874
                return \transliterator_transliterate($langCode, $str);
8875
            }
8876
8877
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
8878
        }
8879
8880
        // always fallback via symfony polyfill
8881
        return \mb_strtolower($str, $encoding);
8882
    }
8883
8884
    /**
8885
     * Make a string uppercase.
8886
     *
8887
     * @see http://php.net/manual/en/function.mb-strtoupper.php
8888
     *
8889
     * @param string      $str                   <p>The string being uppercased.</p>
8890
     * @param string      $encoding              [optional] <p>Set the charset.</p>
8891
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8892 163
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8893
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8894 163
     *
8895 12
     * @return string
8896
     *                <p>String with all alphabetic characters converted to uppercase.</p>
8897
     */
8898 161
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8899
    {
8900
        // init
8901 3
        $str = (string) $str;
8902
8903
        if ($str === '') {
8904 161
            return '';
8905 76
        }
8906
8907
        if ($cleanUtf8 === true) {
8908
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8909 161
            // if invalid characters are found in $haystack before $needle
8910 2
            $str = self::clean($str);
8911
        }
8912
8913 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8914 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8915
        }
8916
8917
        // hack for old php version or for the polyfill ...
8918 2
        if ($tryToKeepStringLength === true) {
8919 2
            $str = self::fixStrCaseHelper($str, false);
8920 2
        }
8921
8922
        if ($lang !== null) {
8923
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8924
                self::checkForSupport();
8925
            }
8926
8927 2
            if (self::$SUPPORT['intl'] === true) {
8928
                $langCode = $lang . '-Upper';
8929
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8930
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
8931
8932
                    $langCode = 'Any-Upper';
8933
                }
8934 161
8935
                /** @noinspection PhpComposerExtensionStubsInspection */
8936
                return \transliterator_transliterate($langCode, $str);
8937
            }
8938
8939
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
8940
        }
8941
8942
        // always fallback via symfony polyfill
8943
        return \mb_strtoupper($str, $encoding);
8944
    }
8945
8946
    /**
8947
     * Translate characters or replace sub-strings.
8948
     *
8949
     * @see  http://php.net/manual/en/function.strtr.php
8950
     *
8951
     * @param string          $str  <p>The string being translated.</p>
8952 2
     * @param string|string[] $from <p>The string replacing from.</p>
8953
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
8954
     *
8955
     * @return string
8956 2
     *                This function returns a copy of str, translating all occurrences of each character in from to the
8957
     *                corresponding character in to
8958
     */
8959
    public static function strtr(string $str, $from, $to = ''): string
8960 2
    {
8961 2
        if ($str === '') {
8962 2
            return '';
8963 2
        }
8964 2
8965
        if ($from === $to) {
8966 2
            return $str;
8967 2
        }
8968 2
8969 2
        if ($to !== '') {
8970
            $from = self::str_split($from);
8971
            $to = self::str_split($to);
8972 2
            $countFrom = \count($from);
8973 2
            $countTo = \count($to);
8974
8975
            if ($countFrom > $countTo) {
8976
                $from = \array_slice($from, 0, $countTo);
8977
            } elseif ($countFrom < $countTo) {
8978 2
                $to = \array_slice($to, 0, $countFrom);
8979 2
            }
8980
8981
            $from = \array_combine($from, $to);
8982 2
            if ($from === false) {
8983
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
8984
            }
8985
        }
8986
8987
        if (\is_string($from)) {
8988
            return \str_replace($from, '', $str);
8989
        }
8990
8991
        return \strtr($str, $from);
8992
    }
8993
8994
    /**
8995
     * Return the width of a string.
8996 2
     *
8997 2
     * @param string $str       <p>The input string.</p>
8998
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8999
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9000 2
     *
9001 2
     * @return int
9002
     */
9003
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9004 2
    {
9005
        if ($str === '') {
9006
            return 0;
9007 2
        }
9008
9009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9010 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9011
        }
9012
9013
        if ($cleanUtf8 === true) {
9014
            // iconv and mbstring are not tolerant to invalid encoding
9015
            // further, their behaviour is inconsistent with that of PHP's substr
9016
            $str = self::clean($str);
9017
        }
9018 2
9019 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9020
            self::checkForSupport();
9021
        }
9022
9023
        //
9024
        // fallback via mbstring
9025
        //
9026
9027
        if (self::$SUPPORT['mbstring'] === true) {
9028
            return \mb_strwidth($str, $encoding);
9029
        }
9030
9031
        //
9032
        // fallback via vanilla php
9033
        //
9034
9035
        if ($encoding !== 'UTF-8') {
9036
            $str = self::encode('UTF-8', $str, false, $encoding);
9037
        }
9038
9039
        $wide = 0;
9040
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9041
9042
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
9043
    }
9044
9045
    /**
9046
     * Get part of a string.
9047
     *
9048
     * @see http://php.net/manual/en/function.mb-substr.php
9049
     *
9050
     * @param string $str       <p>The string being checked.</p>
9051
     * @param int    $offset    <p>The first position used in str.</p>
9052
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9053
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9054 421
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9055 26
     *
9056
     * @return false|string
9057
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9058
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9059 416
     *                      characters long, <b>FALSE</b> will be returned.
9060 20
     */
9061
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9062
    {
9063 413
        if ($str === '') {
9064
            return '';
9065
        }
9066 2
9067
        // Empty string
9068
        if ($length === 0) {
9069
            return '';
9070 413
        }
9071 40
9072
        if ($cleanUtf8 === true) {
9073
            // iconv and mbstring are not tolerant to invalid encoding
9074 384
            // further, their behaviour is inconsistent with that of PHP's substr
9075 172
            $str = self::clean($str);
9076
        }
9077
9078 384
        // Whole string
9079
        if (!$offset && $length === null) {
9080
            return $str;
9081
        }
9082
9083
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9084
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9085
        }
9086
9087 384
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9088
            self::checkForSupport();
9089 384
        }
9090
9091 2
        //
9092
        // fallback for binary || ascii only
9093
        //
9094
9095
        if (
9096
            $encoding === 'CP850'
9097
            ||
9098 382
            $encoding === 'ASCII'
9099 382
        ) {
9100 382
            return self::substr_in_byte($str, $offset, $length);
9101 382
        }
9102
9103
        //
9104
        // fallback via mbstring
9105
        //
9106 4
9107 4
        if (self::$SUPPORT['mbstring'] === true) {
9108 4
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9109
            if ($return !== false) {
9110
                return $return;
9111
            }
9112 4
        }
9113
9114
        // otherwise we need the string-length and can't fake it via "2147483647"
9115
        $str_length = 0;
9116
        if ($offset || $length === null) {
9117 4
            $str_length = self::strlen($str, $encoding);
9118
        }
9119
9120
        // e.g.: invalid chars + mbstring not installed
9121
        if ($str_length === false) {
9122 4
            return false;
9123
        }
9124
9125
        // Empty string
9126
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9127
            return '';
9128 4
        }
9129 4
9130
        // Impossible
9131 2
        if ($offset && $offset > $str_length) {
9132
            // "false" is the php native return type here,
9133
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9134
            return '';
9135 4
        }
9136
9137 4
        if ($length === null) {
9138
            $length = (int) $str_length;
9139 2
        } else {
9140
            $length = (int) $length;
9141
        }
9142
9143
        if (
9144
            $encoding !== 'UTF-8'
9145
            &&
9146
            self::$SUPPORT['mbstring'] === false
9147 4
        ) {
9148
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9149 4
        }
9150
9151 4
        //
9152
        // fallback via intl
9153
        //
9154
9155
        if (
9156
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9157
            &&
9158
            $offset >= 0 // grapheme_substr() can't handle negative offset
9159
            &&
9160
            self::$SUPPORT['intl'] === true
9161
        ) {
9162
            $returnTmp = \grapheme_substr($str, $offset, $length);
9163
            if ($returnTmp !== false) {
9164 4
                return $returnTmp;
9165
            }
9166 4
        }
9167
9168
        //
9169
        // fallback via iconv
9170
        //
9171
9172
        if (
9173
            $length >= 0 // "iconv_substr()" can't handle negative length
9174
            &&
9175
            self::$SUPPORT['iconv'] === true
9176
        ) {
9177
            $returnTmp = \iconv_substr($str, $offset, $length);
9178 4
            if ($returnTmp !== false) {
9179
                return $returnTmp;
9180
            }
9181
        }
9182
9183
        //
9184
        // fallback for ascii only
9185
        //
9186
9187 4
        if (self::is_ascii($str)) {
9188
            return \substr($str, $offset, $length);
9189
        }
9190 4
9191
        //
9192
        // fallback via vanilla php
9193
        //
9194
9195
        // split to array, and remove invalid characters
9196
        $array = self::split($str);
9197
9198
        // extract relevant part, and join to make sting again
9199
        return \implode('', \array_slice($array, $offset, $length));
9200
    }
9201
9202
    /**
9203
     * Binary safe comparison of two strings from an offset, up to length characters.
9204
     *
9205
     * @param string   $str1               <p>The main string being compared.</p>
9206
     * @param string   $str2               <p>The secondary string being compared.</p>
9207
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9208
     *                                     counting from the end of the string.</p>
9209
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9210
     *                                     of the length of the str compared to the length of main_str less the
9211
     *                                     offset.</p>
9212
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9213
     *                                     insensitive.</p>
9214 2
     *
9215
     * @return int
9216 2
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9217
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9218 2
     *             <strong>0</strong> if they are equal
9219 2
     */
9220
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9221
    {
9222 2
        if (
9223 2
            $offset !== 0
9224
            ||
9225
            $length !== null
9226 2
        ) {
9227
            $str1 = (string) self::substr($str1, $offset, $length);
9228
            $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1));
9229
        }
9230
9231
        if ($case_insensitivity === true) {
9232
            return self::strcasecmp($str1, $str2);
9233
        }
9234
9235
        return self::strcmp($str1, $str2);
9236
    }
9237
9238
    /**
9239
     * Count the number of substring occurrences.
9240
     *
9241
     * @see  http://php.net/manual/en/function.substr-count.php
9242
     *
9243
     * @param string $haystack  <p>The string to search in.</p>
9244
     * @param string $needle    <p>The substring to search for.</p>
9245
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
9246
     * @param int    $length    [optional] <p>
9247
     *                          The maximum length after the specified offset to search for the
9248
     *                          substring. It outputs a warning if the offset plus the length is
9249
     *                          greater than the haystack length.
9250
     *                          </p>
9251
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9252
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9253
     *
9254
     * @return false|int this functions returns an integer or false if there isn't a string
9255 18
     */
9256 2
    public static function substr_count(
9257
        string $haystack,
9258
        string $needle,
9259 18
        int $offset = 0,
9260 2
        int $length = null,
9261 2
        string $encoding = 'UTF-8',
9262 2
        bool $cleanUtf8 = false
9263
    ) {
9264
        if ($haystack === '' || $needle === '') {
9265 2
            return false;
9266
        }
9267
9268
        if ($offset || $length !== null) {
9269
            if ($length === null) {
9270 2
                $lengthTmp = self::strlen($haystack);
9271
                if ($lengthTmp === false) {
9272 2
                    return false;
9273
                }
9274
                $length = (int) $lengthTmp;
9275 2
            }
9276
9277 2
            if (
9278
                (
9279 2
                    $length !== 0
9280
                    &&
9281
                    $offset !== 0
9282 2
                )
9283
                &&
9284
                ($length + $offset) <= 0
9285 18
                &&
9286 8
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9287
            ) {
9288
                return false;
9289 18
            }
9290
9291
            $haystack = (string) self::substr($haystack, $offset, $length, $encoding);
9292
        }
9293
9294
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9295
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9296 18
        }
9297
9298
        if ($cleanUtf8 === true) {
9299
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9300
            // if invalid characters are found in $haystack before $needle
9301 18
            $needle = self::clean($needle);
9302
            $haystack = self::clean($haystack);
9303 18
        }
9304
9305
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9306
            self::checkForSupport();
9307
        }
9308 18
9309 18
        if (
9310
            $encoding !== 'UTF-8'
9311
            &&
9312
            self::$SUPPORT['mbstring'] === false
9313
        ) {
9314
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9315
        }
9316
9317
        if (self::$SUPPORT['mbstring'] === true) {
9318
            return \mb_substr_count($haystack, $needle, $encoding);
9319
        }
9320
9321
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9322
9323
        return \count($matches);
9324
    }
9325
9326
    /**
9327
     * Count the number of substring occurrences.
9328
     *
9329
     * @param string $haystack <p>
9330
     *                         The string being checked.
9331
     *                         </p>
9332
     * @param string $needle   <p>
9333
     *                         The string being found.
9334
     *                         </p>
9335
     * @param int    $offset   [optional] <p>
9336
     *                         The offset where to start counting
9337
     *                         </p>
9338
     * @param int    $length   [optional] <p>
9339
     *                         The maximum length after the specified offset to search for the
9340
     *                         substring. It outputs a warning if the offset plus the length is
9341 36
     *                         greater than the haystack length.
9342
     *                         </p>
9343
     *
9344
     * @return false|int the number of times the
9345 36
     *                   needle substring occurs in the
9346
     *                   haystack string
9347
     */
9348
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9349
    {
9350 36
        if ($haystack === '' || $needle === '') {
9351
            return 0;
9352 36
        }
9353
9354
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9355
            self::checkForSupport();
9356
        }
9357
9358
        if (
9359
            ($offset || $length !== null)
9360
            &&
9361
            self::$SUPPORT['mbstring_func_overload'] === true
9362
        ) {
9363
            if ($length === null) {
9364
                $lengthTmp = self::strlen($haystack);
9365
                if ($lengthTmp === false) {
9366
                    return false;
9367
                }
9368
                $length = (int) $lengthTmp;
9369
            }
9370
9371
            if (
9372
                (
9373
                    $length !== 0
9374
                    &&
9375
                    $offset !== 0
9376
                )
9377
                &&
9378
                ($length + $offset) <= 0
9379
                &&
9380
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9381
            ) {
9382
                return false;
9383 36
            }
9384
9385
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9386
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9387
                $haystackTmp = '';
9388 36
            }
9389
            $haystack = (string) $haystackTmp;
9390
        }
9391
9392 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9393
            // "mb_" is available if overload is used, so use it ...
9394
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9395
        }
9396
9397
        if ($length === null) {
9398
            return \substr_count($haystack, $needle, $offset);
9399
        }
9400
9401
        return \substr_count($haystack, $needle, $offset, $length);
9402
    }
9403
9404
    /**
9405
     * Returns the number of occurrences of $substring in the given string.
9406
     * By default, the comparison is case-sensitive, but can be made insensitive
9407
     * by setting $caseSensitive to false.
9408
     *
9409 15
     * @param string $str           <p>The input string.</p>
9410 2
     * @param string $substring     <p>The substring to search for.</p>
9411
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9412
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9413
     *
9414
     * @return int
9415 13
     */
9416 4
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9417
    {
9418
        if ($str === '' || $substring === '') {
9419 13
            return 0;
9420 6
        }
9421 6
9422
        // only a fallback to prevent BC in the api ...
9423
        /** @psalm-suppress RedundantConditionGivenDocblockType */
9424 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9425
            $encoding = (string) $caseSensitive;
9426
        }
9427
9428
        if (!$caseSensitive) {
9429
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9430
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9431
        }
9432
9433
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9434
    }
9435
9436
    /**
9437 2
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9438 2
     *
9439
     * @param string $haystack <p>The string to search in.</p>
9440
     * @param string $needle   <p>The substring to search for.</p>
9441 2
     *
9442 2
     * @return string return the sub-string
9443
     */
9444
    public static function substr_ileft(string $haystack, string $needle): string
9445 2
    {
9446 2
        if ($haystack === '') {
9447
            return '';
9448
        }
9449 2
9450
        if ($needle === '') {
9451
            return $haystack;
9452
        }
9453
9454
        if (self::str_istarts_with($haystack, $needle) === true) {
9455
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9456
        }
9457
9458
        return $haystack;
9459
    }
9460
9461
    /**
9462
     * Get part of a string process in bytes.
9463
     *
9464
     * @param string $str    <p>The string being checked.</p>
9465
     * @param int    $offset <p>The first position used in str.</p>
9466 51
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9467
     *
9468
     * @return false|string
9469
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9470
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9471 51
     *                      characters long, <b>FALSE</b> will be returned.
9472
     */
9473
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9474
    {
9475
        if ($str === '') {
9476 51
            return '';
9477
        }
9478
9479
        // Empty string
9480 51
        if ($length === 0) {
9481
            return '';
9482
        }
9483
9484 51
        // Whole string
9485
        if (!$offset && $length === null) {
9486
            return $str;
9487
        }
9488
9489 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9490
            self::checkForSupport();
9491
        }
9492
9493
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9494
            // "mb_" is available if overload is used, so use it ...
9495
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9496
        }
9497
9498
        return \substr($str, $offset, $length ?? 2147483647);
9499
    }
9500
9501
    /**
9502 2
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9503 2
     *
9504
     * @param string $haystack <p>The string to search in.</p>
9505
     * @param string $needle   <p>The substring to search for.</p>
9506 2
     *
9507 2
     * @return string return the sub-string
9508
     */
9509
    public static function substr_iright(string $haystack, string $needle): string
9510 2
    {
9511 2
        if ($haystack === '') {
9512
            return '';
9513
        }
9514 2
9515
        if ($needle === '') {
9516
            return $haystack;
9517
        }
9518
9519
        if (self::str_iends_with($haystack, $needle) === true) {
9520
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9521
        }
9522
9523
        return $haystack;
9524
    }
9525
9526
    /**
9527 2
     * Removes an prefix ($needle) from start of the string ($haystack).
9528 2
     *
9529
     * @param string $haystack <p>The string to search in.</p>
9530
     * @param string $needle   <p>The substring to search for.</p>
9531 2
     *
9532 2
     * @return string return the sub-string
9533
     */
9534
    public static function substr_left(string $haystack, string $needle): string
9535 2
    {
9536 2
        if ($haystack === '') {
9537
            return '';
9538
        }
9539 2
9540
        if ($needle === '') {
9541
            return $haystack;
9542
        }
9543
9544
        if (self::str_starts_with($haystack, $needle) === true) {
9545
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9546
        }
9547
9548
        return $haystack;
9549
    }
9550
9551
    /**
9552
     * Replace text within a portion of a string.
9553
     *
9554
     * source: https://gist.github.com/stemar/8287074
9555
     *
9556
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
9557
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
9558
     * @param int|int[]       $offset      <p>
9559
     *                                     If start is positive, the replacing will begin at the start'th offset
9560
     *                                     into string.
9561
     *                                     <br><br>
9562
     *                                     If start is negative, the replacing will begin at the start'th character
9563
     *                                     from the end of string.
9564
     *                                     </p>
9565
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
9566
     *                                     portion of string which is to be replaced. If it is negative, it
9567
     *                                     represents the number of characters from the end of string at which to
9568
     *                                     stop replacing. If it is not given, then it will default to strlen(
9569 10
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
9570 1
     *                                     length is zero then this function will have the effect of inserting
9571
     *                                     replacement into string at the given start offset.</p>
9572
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
9573 1
     *
9574 1
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9575
     */
9576 1
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9577
    {
9578
        if (\is_array($str) === true) {
9579
            $num = \count($str);
9580 1
9581 1
            // the replacement
9582 1
            if (\is_array($replacement) === true) {
9583 1
                $replacement = \array_slice($replacement, 0, $num);
9584
            } else {
9585 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9586
            }
9587 1
9588
            // the offset
9589
            if (\is_array($offset) === true) {
9590
                $offset = \array_slice($offset, 0, $num);
9591 1
                foreach ($offset as &$valueTmp) {
9592 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9593 1
                }
9594 1
                unset($valueTmp);
9595 1
            } else {
9596 1
                $offset = \array_pad([$offset], $num, $offset);
9597 1
            }
9598
9599 1
            // the length
9600
            if ($length === null) {
9601
                $length = \array_fill(0, $num, 0);
9602 1
            } elseif (\is_array($length) === true) {
9603
                $length = \array_slice($length, 0, $num);
9604 1
                foreach ($length as &$valueTmpV2) {
9605
                    if ($valueTmpV2 !== null) {
9606
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9607
                    } else {
9608 1
                        $valueTmpV2 = 0;
9609
                    }
9610
                }
9611 10
                unset($valueTmpV2);
9612 1
            } else {
9613 1
                $length = \array_pad([$length], $num, $length);
9614
            }
9615 1
9616
            // recursive call
9617
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9618
        }
9619
9620 10
        if (\is_array($replacement) === true) {
9621 10
            if (\count($replacement) > 0) {
9622
                $replacement = $replacement[0];
9623 10
            } else {
9624
                $replacement = '';
9625
            }
9626
        }
9627 10
9628
        // init
9629
        $str = (string) $str;
9630
        $replacement = (string) $replacement;
9631 10
9632 1
        if (\is_array($length) === true) {
9633
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
9634
        }
9635 9
9636 6
        if (\is_array($offset) === true) {
9637
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
9638 6
        }
9639
9640
        if ($str === '') {
9641 8
            return $replacement;
9642
        }
9643
9644
        if (self::is_ascii($str)) {
9645 8
            return ($length === null) ?
9646 8
                \substr_replace($str, $replacement, $offset) :
9647
                \substr_replace($str, $replacement, $offset, $length);
9648 8
        }
9649 1
9650 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9651
            self::checkForSupport();
9652
        }
9653
9654 8
        if (self::$SUPPORT['mbstring'] === true) {
9655 1
            $string_length = (int) self::strlen($str, $encoding);
9656 8
9657 3
            if ($offset < 0) {
9658
                $offset = (int) \max(0, $string_length + $offset);
9659
            } elseif ($offset > $string_length) {
9660
                $offset = $string_length;
9661 8
            }
9662 3
9663
            if ($length !== null && $length < 0) {
9664
                $length = (int) \max(0, $string_length - $offset + $length);
9665
            } elseif ($length === null || $length > $string_length) {
9666 8
                $length = $string_length;
9667 8
            }
9668 8
9669
            /** @noinspection AdditionOperationOnArraysInspection */
9670
            if (($offset + $length) > $string_length) {
9671
                $length = $string_length - $offset;
9672
            }
9673
9674
            /** @noinspection AdditionOperationOnArraysInspection */
9675
            return (string) self::substr($str, 0, $offset, $encoding) .
9676
                   $replacement .
9677
                   (string) self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
9678
        }
9679
9680
        \preg_match_all('/./us', $str, $smatches);
9681
        \preg_match_all('/./us', $replacement, $rmatches);
9682
9683
        if ($length === null) {
9684
            $lengthTmp = self::strlen($str, $encoding);
9685
            if ($lengthTmp === false) {
9686
                // e.g.: non mbstring support + invalid chars
9687
                return '';
9688
            }
9689
            $length = (int) $lengthTmp;
9690
        }
9691
9692
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
9693
9694
        return \implode('', $smatches[0]);
9695
    }
9696
9697
    /**
9698 2
     * Removes an suffix ($needle) from end of the string ($haystack).
9699 2
     *
9700
     * @param string $haystack <p>The string to search in.</p>
9701
     * @param string $needle   <p>The substring to search for.</p>
9702 2
     *
9703 2
     * @return string return the sub-string
9704
     */
9705
    public static function substr_right(string $haystack, string $needle): string
9706 2
    {
9707 2
        if ($haystack === '') {
9708
            return '';
9709
        }
9710 2
9711
        if ($needle === '') {
9712
            return $haystack;
9713
        }
9714
9715
        if (self::str_ends_with($haystack, $needle) === true) {
9716
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9717
        }
9718
9719
        return $haystack;
9720
    }
9721
9722
    /**
9723
     * Returns a case swapped version of the string.
9724 6
     *
9725 1
     * @param string $str       <p>The input string.</p>
9726
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9727
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9728 6
     *
9729 4
     * @return string each character's case swapped
9730
     */
9731
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9732 6
    {
9733
        if ($str === '') {
9734
            return '';
9735 2
        }
9736
9737
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9738 6
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9739
        }
9740
9741
        if ($cleanUtf8 === true) {
9742
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9743
            // if invalid characters are found in $haystack before $needle
9744
            $str = self::clean($str);
9745
        }
9746
9747
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9748
    }
9749
9750
    /**
9751
     * Checks whether symfony-polyfills are used.
9752
     *
9753
     * @return bool
9754
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
9755
     */
9756
    public static function symfony_polyfill_used(): bool
9757
    {
9758
        // init
9759
        $return = false;
9760
9761
        $returnTmp = \extension_loaded('mbstring');
9762
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9763
            $return = true;
9764
        }
9765
9766
        $returnTmp = \extension_loaded('iconv');
9767
        if ($returnTmp === false && \function_exists('iconv')) {
9768
            $return = true;
9769
        }
9770
9771
        return $return;
9772
    }
9773 6
9774
    /**
9775
     * @param string $str
9776
     * @param int    $tabLength
9777
     *
9778
     * @return string
9779
     */
9780
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9781
    {
9782
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9783
    }
9784
9785
    /**
9786
     * Converts the first character of each word in the string to uppercase
9787
     * and all other chars to lowercase.
9788
     *
9789
     * @param string      $str                   <p>The input string.</p>
9790 5
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9791 2
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9792
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9793
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9794 5
     *
9795
     * @return string string with all characters of $str being title-cased
9796
     */
9797
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9798
    {
9799
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9800
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9801
        }
9802
9803
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9804
    }
9805
9806
    /**
9807
     * alias for "UTF8::to_ascii()"
9808
     *
9809
     * @see        UTF8::to_ascii()
9810
     *
9811
     * @param string $str
9812 7
     * @param string $subst_chr
9813
     * @param bool   $strict
9814
     *
9815
     * @return string
9816
     *
9817
     * @deprecated <p>use "UTF8::to_ascii()"</p>
9818
     */
9819
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9820
    {
9821
        return self::to_ascii($str, $subst_chr, $strict);
9822
    }
9823
9824
    /**
9825
     * alias for "UTF8::to_iso8859()"
9826
     *
9827
     * @see        UTF8::to_iso8859()
9828 2
     *
9829
     * @param string|string[] $str
9830
     *
9831
     * @return string|string[]
9832
     *
9833
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
9834
     */
9835
    public static function toIso8859($str)
9836
    {
9837
        return self::to_iso8859($str);
9838
    }
9839
9840
    /**
9841
     * alias for "UTF8::to_latin1()"
9842
     *
9843
     * @see        UTF8::to_latin1()
9844 2
     *
9845
     * @param string|string[] $str
9846
     *
9847
     * @return string|string[]
9848
     *
9849
     * @deprecated <p>use "UTF8::to_latin1()"</p>
9850
     */
9851
    public static function toLatin1($str)
9852
    {
9853
        return self::to_latin1($str);
9854
    }
9855
9856
    /**
9857
     * alias for "UTF8::to_utf8()"
9858
     *
9859
     * @see        UTF8::to_utf8()
9860 2
     *
9861
     * @param string|string[] $str
9862
     *
9863
     * @return string|string[]
9864
     *
9865
     * @deprecated <p>use "UTF8::to_utf8()"</p>
9866
     */
9867
    public static function toUTF8($str)
9868
    {
9869
        return self::to_utf8($str);
9870
    }
9871
9872
    /**
9873
     * Convert a string into ASCII.
9874
     *
9875 38
     * @param string $str     <p>The input string.</p>
9876
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9877 38
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9878 3
     *                        performance</p>
9879
     *
9880
     * @return string
9881
     */
9882 35
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9883 9
    {
9884
        static $UTF8_TO_ASCII;
9885
9886 28
        if ($str === '') {
9887 28
            return '';
9888 28
        }
9889 28
9890 28
        // check if we only have ASCII, first (better performance)
9891 28
        if (self::is_ascii($str) === true) {
9892 28
            return $str;
9893 28
        }
9894
9895
        $str = self::clean(
9896
            $str,
9897 28
            true,
9898 10
            true,
9899
            true,
9900
            false,
9901 19
            true,
9902 1
            true
9903
        );
9904
9905
        // check again, if we only have ASCII, now ...
9906 1
        if (self::is_ascii($str) === true) {
9907
            return $str;
9908
        }
9909 1
9910
        if ($strict === true) {
9911
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9912 1
                self::checkForSupport();
9913 1
            }
9914
9915
            if (self::$SUPPORT['intl'] === true) {
9916
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9917
                /** @noinspection PhpComposerExtensionStubsInspection */
9918 19
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9919
9920
                // check again, if we only have ASCII, now ...
9921
                if (self::is_ascii($str) === true) {
9922 19
                    return $str;
9923 19
                }
9924 19
            }
9925 19
        }
9926 19
9927
        if (self::$ORD === null) {
9928 19
            self::$ORD = self::getData('ord');
9929 15
        }
9930
9931
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9932 19
        $chars = $ar[0];
9933
        $ord = null;
9934
        foreach ($chars as &$c) {
9935 19
            $ordC0 = self::$ORD[$c[0]];
9936 17
9937
            if ($ordC0 >= 0 && $ordC0 <= 127) {
9938
                continue;
9939 19
            }
9940 8
9941
            $ordC1 = self::$ORD[$c[1]];
9942 8
9943 7
            // ASCII - next please
9944
            if ($ordC0 >= 192 && $ordC0 <= 223) {
9945
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9946 8
            }
9947 2
9948
            if ($ordC0 >= 224) {
9949 2
                $ordC2 = self::$ORD[$c[2]];
9950 2
9951
                if ($ordC0 <= 239) {
9952
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9953 2
                }
9954
9955
                if ($ordC0 >= 240) {
9956
                    $ordC3 = self::$ORD[$c[3]];
9957
9958
                    if ($ordC0 <= 247) {
9959
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9960
                    }
9961
9962
                    if ($ordC0 >= 248) {
9963
                        $ordC4 = self::$ORD[$c[4]];
9964
9965
                        if ($ordC0 <= 251) {
9966
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9967
                        }
9968
9969
                        if ($ordC0 >= 252) {
9970
                            $ordC5 = self::$ORD[$c[5]];
9971 19
9972
                            if ($ordC0 <= 253) {
9973
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9974
                            }
9975
                        }
9976
                    }
9977 19
                }
9978
            }
9979
9980
            if ($ordC0 === 254 || $ordC0 === 255) {
9981
                $c = $unknown;
9982
9983 19
                continue;
9984 19
            }
9985 9
9986 9
            if ($ord === null) {
9987 2
                $c = $unknown;
9988
9989
                continue;
9990
            }
9991 19
9992
            $bank = $ord >> 8;
9993
            if (!isset($UTF8_TO_ASCII[$bank])) {
9994 19
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
9995
                if ($UTF8_TO_ASCII[$bank] === false) {
9996
                    $UTF8_TO_ASCII[$bank] = [];
9997
                }
9998
            }
9999
10000
            $newchar = $ord & 255;
10001
10002
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
10003
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10004
10005
                // keep for debugging
10006 18
                /*
10007
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10008
                echo "char: " . $c . "\n";
10009
                echo "ord: " . $ord . "\n";
10010
                echo "newchar: " . $newchar . "\n";
10011
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10012
                echo "bank:" . $bank . "\n\n";
10013
                 */
10014
10015
                $c = $UTF8_TO_ASCII[$bank][$newchar];
10016
            } else {
10017
10018 19
                // keep for debugging missing chars
10019
                /*
10020
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10021
                echo "char: " . $c . "\n";
10022 19
                echo "ord: " . $ord . "\n";
10023
                echo "newchar: " . $newchar . "\n";
10024
                echo "bank:" . $bank . "\n\n";
10025
                 */
10026
10027
                $c = $unknown;
10028
            }
10029
        }
10030
10031
        return \implode('', $chars);
10032
    }
10033 19
10034
    /**
10035 19
     * @param mixed $str
10036 2
     *
10037
     * @return bool
10038
     */
10039 17
    public static function to_boolean($str): bool
10040
    {
10041
        // init
10042
        $str = (string) $str;
10043 17
10044
        if ($str === '') {
10045
            return false;
10046
        }
10047
10048
        $key = \strtolower($str);
10049
10050
        // Info: http://php.net/manual/en/filter.filters.validate.php
10051
        $map = [
10052
            'true'  => true,
10053 17
            '1'     => true,
10054 13
            'on'    => true,
10055
            'yes'   => true,
10056
            'false' => false,
10057
            '0'     => false,
10058 4
            'off'   => false,
10059 2
            'no'    => false,
10060
        ];
10061
10062 2
        if (isset($map[$key])) {
10063
            return $map[$key];
10064
        }
10065
10066
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10067
        if (\is_numeric($str)) {
10068
            return ((float) $str + 0) > 0;
10069
        }
10070
10071
        return (bool) self::trim($str);
10072
    }
10073
10074
    /**
10075
     * Convert given string to safe filename (and keep string case).
10076
     *
10077 1
     * @param string $string
10078 1
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10079
     *                                  simply replaced with hyphen.
10080
     * @param string $fallback_char
10081 1
     *
10082
     * @return string
10083 1
     */
10084
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10085 1
    {
10086 1
        if ($use_transliterate === true) {
10087 1
            $string = self::str_transliterate($string, $fallback_char);
10088
        }
10089
10090 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
10091 1
10092 1
        $string = (string) \preg_replace(
10093
            [
10094 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10095
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10096
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10097
            ],
10098 1
            [
10099
                '',
10100
                $fallback_char,
10101
                $fallback_char,
10102
            ],
10103
            $string
10104
        );
10105
10106
        // trim "$fallback_char" from beginning and end of the string
10107
        return \trim($string, $fallback_char);
10108
    }
10109
10110 7
    /**
10111 2
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10112 2
     *
10113
     * @param string|string[] $str
10114
     *
10115 2
     * @return string|string[]
10116
     */
10117
    public static function to_iso8859($str)
10118 7
    {
10119 7
        if (\is_array($str) === true) {
10120 2
            foreach ($str as $k => &$v) {
10121
                $v = self::to_iso8859($v);
10122
            }
10123 7
10124
            return $str;
10125
        }
10126
10127
        $str = (string) $str;
10128
        if ($str === '') {
10129
            return '';
10130
        }
10131
10132
        return self::utf8_decode($str);
10133
    }
10134
10135
    /**
10136
     * alias for "UTF8::to_iso8859()"
10137 2
     *
10138
     * @see UTF8::to_iso8859()
10139
     *
10140
     * @param string|string[] $str
10141
     *
10142
     * @return string|string[]
10143
     */
10144
    public static function to_latin1($str)
10145
    {
10146
        return self::to_iso8859($str);
10147
    }
10148
10149
    /**
10150
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10151
     *
10152
     * <ul>
10153
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10154
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10155
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10156
     * case.</li>
10157 37
     * </ul>
10158 4
     *
10159 4
     * @param string|string[] $str                    <p>Any string or array.</p>
10160
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10161
     *
10162 4
     * @return string|string[] the UTF-8 encoded string
10163
     */
10164
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10165 37
    {
10166 37
        if (\is_array($str) === true) {
10167 6
            foreach ($str as $k => &$v) {
10168
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10169
            }
10170 37
10171
            return $str;
10172
        }
10173
10174 37
        $str = (string) $str;
10175 37
        if ($str === '') {
10176
            return $str;
10177 37
        }
10178 37
10179
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10180 37
            self::checkForSupport();
10181
        }
10182 34
10183
        $max = self::strlen_in_byte($str);
10184 31
        $buf = '';
10185
10186 31
        for ($i = 0; $i < $max; ++$i) {
10187 17
            $c1 = $str[$i];
10188 17
10189
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10190 31
10191
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10192 34
10193
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10194 32
10195 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10196
                        $buf .= $c1 . $c2;
10197 32
                        ++$i;
10198 14
                    } else { // not valid UTF8 - convert it
10199 14
                        $buf .= self::to_utf8_convert_helper($c1);
10200
                    }
10201 32
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10202
10203 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10204
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10205 26
10206 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10207 26
                        $buf .= $c1 . $c2 . $c3;
10208
                        $i += 2;
10209 26
                    } else { // not valid UTF8 - convert it
10210 8
                        $buf .= self::to_utf8_convert_helper($c1);
10211 8
                    }
10212
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10213 26
10214
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10215
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10216 34
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10217
10218 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10219
                        $buf .= $c1 . $c2 . $c3 . $c4;
10220 4
                        $i += 3;
10221
                    } else { // not valid UTF8 - convert it
10222 34
                        $buf .= self::to_utf8_convert_helper($c1);
10223
                    }
10224
                } else { // doesn't look like UTF8, but should be converted
10225
                    $buf .= self::to_utf8_convert_helper($c1);
10226
                }
10227 37
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10228 37
10229
                $buf .= self::to_utf8_convert_helper($c1);
10230
            } else { // it doesn't need conversion
10231
                $buf .= $c1;
10232
            }
10233
        }
10234
10235
        // decode unicode escape sequences
10236 8
        $buf = \preg_replace_callback(
10237 37
            '/\\\\u([0-9a-f]{4})/i',
10238 37
            /**
10239
             * @param array $match
10240
             *
10241 37
             * @return string
10242
             */
10243
            static function (array $match): string {
10244
                // always fallback via symfony polyfill
10245
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10246 37
            },
10247 2
            $buf
10248
        );
10249
10250 37
        if ($buf === null) {
10251
            return '';
10252
        }
10253
10254
        // decode UTF-8 codepoints
10255
        if ($decodeHtmlEntityToUtf8 === true) {
10256
            $buf = self::html_entity_decode($buf);
10257
        }
10258
10259
        return $buf;
10260
    }
10261
10262
    /**
10263
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10264
     *
10265
     * INFO: This is slower then "trim()"
10266
     *
10267
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10268 214
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10269 11
     *
10270
     * @param string $str   <p>The string to be trimmed</p>
10271
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10272
     *
10273 206
     * @return string the trimmed string
10274 179
     */
10275
    public static function trim(string $str = '', $chars = \INF): string
10276 47
    {
10277 47
        if ($str === '') {
10278
            return '';
10279
        }
10280 206
10281
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10282
        if ($chars === \INF || !$chars) {
10283
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10284
        } else {
10285
            $chars = \preg_quote($chars, '/');
10286
            $pattern = "^[${chars}]+|[${chars}]+\$";
10287
        }
10288
10289
        return self::regex_replace($str, $pattern, '', '', '/');
10290
    }
10291
10292
    /**
10293
     * Makes string's first char uppercase.
10294
     *
10295
     * @param string      $str                   <p>The input string.</p>
10296 79
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10297
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10298
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10299 1
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10300
     *
10301
     * @return string the resulting string
10302 79
     */
10303
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10304 79
    {
10305 79
        if ($cleanUtf8 === true) {
10306 79
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10307 79
            // if invalid characters are found in $haystack before $needle
10308 79
            $str = self::clean($str);
10309 79
        }
10310
10311
        $strPartTwo = (string) self::substr($str, 1, null, $encoding);
10312 79
10313
        $strPartOne = self::strtoupper(
10314
            (string) self::substr($str, 0, 1, $encoding),
10315
            $encoding,
10316
            $cleanUtf8,
10317
            $lang,
10318
            $tryToKeepStringLength
10319
        );
10320
10321
        return $strPartOne . $strPartTwo;
10322
    }
10323
10324
    /**
10325
     * alias for "UTF8::ucfirst()"
10326
     *
10327
     * @see UTF8::ucfirst()
10328 1
     *
10329
     * @param string $str
10330
     * @param string $encoding
10331
     * @param bool   $cleanUtf8
10332
     *
10333
     * @return string
10334
     */
10335
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10336
    {
10337
        return self::ucfirst($str, $encoding, $cleanUtf8);
10338
    }
10339
10340
    /**
10341
     * Uppercase for all words in the string.
10342
     *
10343
     * @param string   $str        <p>The input string.</p>
10344
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10345 8
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10346 2
     *                             word.</p>
10347
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10348
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10349
     *
10350
     * @return string
10351
     */
10352 7
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10353
    {
10354
        if (!$str) {
10355 1
            return '';
10356
        }
10357
10358 7
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10359
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10360
10361 7
        if ($cleanUtf8 === true) {
10362
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10363 7
            // if invalid characters are found in $haystack before $needle
10364
            $str = self::clean($str);
10365
        }
10366
10367
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10368 7
10369 7
        if (
10370
            $usePhpDefaultFunctions === true
10371 7
            &&
10372 7
            self::is_ascii($str) === true
10373 7
        ) {
10374
            return \ucwords($str);
10375
        }
10376
10377 7
        $words = self::str_to_words($str, $charlist);
10378
        $useExceptions = \count($exceptions) > 0;
10379 7
10380
        foreach ($words as &$word) {
10381 7
            if (!$word) {
10382
                continue;
10383
            }
10384
10385 7
            if (
10386
                $useExceptions === false
10387
                ||
10388
                !\in_array($word, $exceptions, true)
10389
            ) {
10390
                $word = self::ucfirst($word, $encoding);
10391
            }
10392
        }
10393
10394
        return \implode('', $words);
10395
    }
10396
10397
    /**
10398
     * Multi decode html entity & fix urlencoded-win1252-chars.
10399
     *
10400
     * e.g:
10401
     * 'test+test'                     => 'test test'
10402
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10403
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10404
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10405
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10406
     * 'Düsseldorf'                   => 'Düsseldorf'
10407
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10408
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10409 2
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10410 2
     *
10411
     * @param string $str          <p>The input string.</p>
10412
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10413 2
     *
10414 2
     * @return string
10415 2
     */
10416
    public static function urldecode(string $str, bool $multi_decode = true): string
10417
    {
10418 2
        if ($str === '') {
10419
            return '';
10420 2
        }
10421
10422 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10423
        if (\preg_match($pattern, $str)) {
10424
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10425
        }
10426
10427 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10428 2
10429 2
        if ($multi_decode === true) {
10430 2
            do {
10431 2
                $str_compare = $str;
10432
10433
                /**
10434
                 * @psalm-suppress PossiblyInvalidArgument
10435 2
                 */
10436
                $str = self::fix_simple_utf8(
10437
                    \urldecode(
10438 2
                        self::html_entity_decode(
10439
                            self::to_utf8($str),
10440
                            $flags
10441
                        )
10442
                    )
10443
                );
10444
            } while ($str_compare !== $str);
10445
        }
10446
10447
        return $str;
10448
    }
10449
10450
    /**
10451 2
     * Return a array with "urlencoded"-win1252 -> UTF-8
10452
     *
10453
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10454
     *
10455
     * @return string[]
10456
     */
10457
    public static function urldecode_fix_win1252_chars(): array
10458
    {
10459
        return [
10460
            '%20' => ' ',
10461
            '%21' => '!',
10462
            '%22' => '"',
10463
            '%23' => '#',
10464
            '%24' => '$',
10465
            '%25' => '%',
10466
            '%26' => '&',
10467
            '%27' => "'",
10468
            '%28' => '(',
10469
            '%29' => ')',
10470
            '%2A' => '*',
10471
            '%2B' => '+',
10472
            '%2C' => ',',
10473
            '%2D' => '-',
10474
            '%2E' => '.',
10475
            '%2F' => '/',
10476
            '%30' => '0',
10477
            '%31' => '1',
10478
            '%32' => '2',
10479
            '%33' => '3',
10480
            '%34' => '4',
10481
            '%35' => '5',
10482
            '%36' => '6',
10483
            '%37' => '7',
10484
            '%38' => '8',
10485
            '%39' => '9',
10486
            '%3A' => ':',
10487
            '%3B' => ';',
10488
            '%3C' => '<',
10489
            '%3D' => '=',
10490
            '%3E' => '>',
10491
            '%3F' => '?',
10492
            '%40' => '@',
10493
            '%41' => 'A',
10494
            '%42' => 'B',
10495
            '%43' => 'C',
10496
            '%44' => 'D',
10497
            '%45' => 'E',
10498
            '%46' => 'F',
10499
            '%47' => 'G',
10500
            '%48' => 'H',
10501
            '%49' => 'I',
10502
            '%4A' => 'J',
10503
            '%4B' => 'K',
10504
            '%4C' => 'L',
10505
            '%4D' => 'M',
10506
            '%4E' => 'N',
10507
            '%4F' => 'O',
10508
            '%50' => 'P',
10509
            '%51' => 'Q',
10510
            '%52' => 'R',
10511
            '%53' => 'S',
10512
            '%54' => 'T',
10513
            '%55' => 'U',
10514
            '%56' => 'V',
10515
            '%57' => 'W',
10516
            '%58' => 'X',
10517
            '%59' => 'Y',
10518
            '%5A' => 'Z',
10519
            '%5B' => '[',
10520
            '%5C' => '\\',
10521
            '%5D' => ']',
10522
            '%5E' => '^',
10523
            '%5F' => '_',
10524
            '%60' => '`',
10525
            '%61' => 'a',
10526
            '%62' => 'b',
10527
            '%63' => 'c',
10528
            '%64' => 'd',
10529
            '%65' => 'e',
10530
            '%66' => 'f',
10531
            '%67' => 'g',
10532
            '%68' => 'h',
10533
            '%69' => 'i',
10534
            '%6A' => 'j',
10535
            '%6B' => 'k',
10536
            '%6C' => 'l',
10537
            '%6D' => 'm',
10538
            '%6E' => 'n',
10539
            '%6F' => 'o',
10540
            '%70' => 'p',
10541
            '%71' => 'q',
10542
            '%72' => 'r',
10543
            '%73' => 's',
10544
            '%74' => 't',
10545
            '%75' => 'u',
10546
            '%76' => 'v',
10547
            '%77' => 'w',
10548
            '%78' => 'x',
10549
            '%79' => 'y',
10550
            '%7A' => 'z',
10551
            '%7B' => '{',
10552
            '%7C' => '|',
10553
            '%7D' => '}',
10554
            '%7E' => '~',
10555
            '%7F' => '',
10556
            '%80' => '`',
10557
            '%81' => '',
10558
            '%82' => '‚',
10559
            '%83' => 'ƒ',
10560
            '%84' => '„',
10561
            '%85' => '…',
10562
            '%86' => '†',
10563
            '%87' => '‡',
10564
            '%88' => 'ˆ',
10565
            '%89' => '‰',
10566
            '%8A' => 'Š',
10567
            '%8B' => '‹',
10568
            '%8C' => 'Œ',
10569
            '%8D' => '',
10570
            '%8E' => 'Ž',
10571
            '%8F' => '',
10572
            '%90' => '',
10573
            '%91' => '‘',
10574
            '%92' => '’',
10575
            '%93' => '“',
10576
            '%94' => '”',
10577
            '%95' => '•',
10578
            '%96' => '–',
10579
            '%97' => '—',
10580
            '%98' => '˜',
10581
            '%99' => '™',
10582
            '%9A' => 'š',
10583
            '%9B' => '›',
10584
            '%9C' => 'œ',
10585
            '%9D' => '',
10586
            '%9E' => 'ž',
10587
            '%9F' => 'Ÿ',
10588
            '%A0' => '',
10589
            '%A1' => '¡',
10590
            '%A2' => '¢',
10591
            '%A3' => '£',
10592
            '%A4' => '¤',
10593
            '%A5' => '¥',
10594
            '%A6' => '¦',
10595
            '%A7' => '§',
10596
            '%A8' => '¨',
10597
            '%A9' => '©',
10598
            '%AA' => 'ª',
10599
            '%AB' => '«',
10600
            '%AC' => '¬',
10601
            '%AD' => '',
10602
            '%AE' => '®',
10603
            '%AF' => '¯',
10604
            '%B0' => '°',
10605
            '%B1' => '±',
10606
            '%B2' => '²',
10607
            '%B3' => '³',
10608
            '%B4' => '´',
10609
            '%B5' => 'µ',
10610
            '%B6' => '¶',
10611
            '%B7' => '·',
10612
            '%B8' => '¸',
10613
            '%B9' => '¹',
10614
            '%BA' => 'º',
10615
            '%BB' => '»',
10616
            '%BC' => '¼',
10617
            '%BD' => '½',
10618
            '%BE' => '¾',
10619
            '%BF' => '¿',
10620
            '%C0' => 'À',
10621
            '%C1' => 'Á',
10622
            '%C2' => 'Â',
10623
            '%C3' => 'Ã',
10624
            '%C4' => 'Ä',
10625
            '%C5' => 'Å',
10626
            '%C6' => 'Æ',
10627
            '%C7' => 'Ç',
10628
            '%C8' => 'È',
10629
            '%C9' => 'É',
10630
            '%CA' => 'Ê',
10631
            '%CB' => 'Ë',
10632
            '%CC' => 'Ì',
10633
            '%CD' => 'Í',
10634
            '%CE' => 'Î',
10635
            '%CF' => 'Ï',
10636
            '%D0' => 'Ð',
10637
            '%D1' => 'Ñ',
10638
            '%D2' => 'Ò',
10639
            '%D3' => 'Ó',
10640
            '%D4' => 'Ô',
10641
            '%D5' => 'Õ',
10642
            '%D6' => 'Ö',
10643
            '%D7' => '×',
10644
            '%D8' => 'Ø',
10645
            '%D9' => 'Ù',
10646
            '%DA' => 'Ú',
10647
            '%DB' => 'Û',
10648
            '%DC' => 'Ü',
10649
            '%DD' => 'Ý',
10650
            '%DE' => 'Þ',
10651
            '%DF' => 'ß',
10652
            '%E0' => 'à',
10653
            '%E1' => 'á',
10654
            '%E2' => 'â',
10655
            '%E3' => 'ã',
10656
            '%E4' => 'ä',
10657
            '%E5' => 'å',
10658
            '%E6' => 'æ',
10659
            '%E7' => 'ç',
10660
            '%E8' => 'è',
10661
            '%E9' => 'é',
10662
            '%EA' => 'ê',
10663
            '%EB' => 'ë',
10664
            '%EC' => 'ì',
10665
            '%ED' => 'í',
10666
            '%EE' => 'î',
10667
            '%EF' => 'ï',
10668
            '%F0' => 'ð',
10669
            '%F1' => 'ñ',
10670
            '%F2' => 'ò',
10671
            '%F3' => 'ó',
10672
            '%F4' => 'ô',
10673
            '%F5' => 'õ',
10674
            '%F6' => 'ö',
10675
            '%F7' => '÷',
10676
            '%F8' => 'ø',
10677
            '%F9' => 'ù',
10678
            '%FA' => 'ú',
10679
            '%FB' => 'û',
10680
            '%FC' => 'ü',
10681
            '%FD' => 'ý',
10682
            '%FE' => 'þ',
10683
            '%FF' => 'ÿ',
10684
        ];
10685
    }
10686
10687
    /**
10688 13
     * Decodes an UTF-8 string to ISO-8859-1.
10689 5
     *
10690
     * @param string $str           <p>The input string.</p>
10691
     * @param bool   $keepUtf8Chars
10692 13
     *
10693 13
     * @return string
10694
     */
10695 13
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10696 1
    {
10697
        if ($str === '') {
10698
            return '';
10699
        }
10700 1
10701 1
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10702
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10703
10704
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10705 13
            if (self::$WIN1252_TO_UTF8 === null) {
10706
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10707 13
            }
10708
10709
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10710
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10711
        }
10712 13
10713 13
        /** @noinspection PhpInternalEntityUsedInspection */
10714
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10715 13
10716
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10717
            self::checkForSupport();
10718
        }
10719 13
10720
        // save for later comparision
10721
        $str_backup = $str;
10722
        $len = self::strlen_in_byte($str);
10723 13
10724
        if (self::$ORD === null) {
10725 13
            self::$ORD = self::getData('ord');
10726 13
        }
10727 13
10728 12
        if (self::$CHR === null) {
10729 13
            self::$CHR = self::getData('chr');
10730 13
        }
10731
10732 13
        $noCharFound = '?';
10733
        /** @noinspection ForeachInvariantsInspection */
10734
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10735 12
            switch ($str[$i] & "\xF0") {
10736
                case "\xC0":
10737
                case "\xD0":
10738
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10739
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10740 12
10741 10
                    break;
10742 10
10743
                /** @noinspection PhpMissingBreakStatementInspection */
10744 10
                case "\xF0":
10745
                    ++$i;
10746
10747 12
                // no break
10748
10749
                case "\xE0":
10750
                    $str[$j] = $noCharFound;
10751 13
                    $i += 2;
10752 13
10753
                    break;
10754
10755
                default:
10756
                    $str[$j] = $str[$i];
10757 13
            }
10758
        }
10759 13
10760
        $return = self::substr_in_byte($str, 0, $j);
10761 2
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10762
            $return = '';
10763
        }
10764 13
10765
        if (
10766
            $keepUtf8Chars === true
10767
            &&
10768
            self::strlen($return) >= (int) self::strlen($str_backup)
10769
        ) {
10770
            return $str_backup;
10771
        }
10772
10773
        return $return;
10774
    }
10775
10776 14
    /**
10777 13
     * Encodes an ISO-8859-1 string to UTF-8.
10778
     *
10779
     * @param string $str <p>The input string.</p>
10780 14
     *
10781
     * @return string
10782
     */
10783
    public static function utf8_encode(string $str): string
10784
    {
10785 14
        if ($str === '') {
10786
            return '';
10787
        }
10788
10789 14
        $str = \utf8_encode($str);
10790 6
10791
        // the polyfill maybe return false
10792
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10793 12
        /** @psalm-suppress TypeDoesNotContainType */
10794 12
        if ($str === false) {
10795
            return '';
10796 12
        }
10797 1
10798
        if (\strpos($str, "\xC2") === false) {
10799
            return $str;
10800
        }
10801 1
10802 1
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10803
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10804
10805 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10806
            if (self::$WIN1252_TO_UTF8 === null) {
10807
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10808
            }
10809
10810
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10811
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10812
        }
10813
10814
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10815
    }
10816
10817
    /**
10818
     * fix -> utf8-win1252 chars
10819 2
     *
10820
     * @param string $str <p>The input string.</p>
10821
     *
10822
     * @return string
10823
     *
10824
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10825
     */
10826
    public static function utf8_fix_win1252_chars(string $str): string
10827
    {
10828
        return self::fix_simple_utf8($str);
10829
    }
10830
10831
    /**
10832
     * Returns an array with all utf8 whitespace characters.
10833
     *
10834
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10835 2
     *
10836
     * @author: Derek E. [email protected]
10837
     *
10838
     * @return string[]
10839
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
10840
     *                  as defined in above URL
10841
     */
10842
    public static function whitespace_table(): array
10843
    {
10844
        return self::$WHITESPACE_TABLE;
10845
    }
10846
10847
    /**
10848
     * Limit the number of words in a string.
10849 2
     *
10850 2
     * @param string $str      <p>The input string.</p>
10851
     * @param int    $limit    <p>The limit of words as integer.</p>
10852
     * @param string $strAddOn <p>Replacement for the striped string.</p>
10853 2
     *
10854 2
     * @return string
10855
     */
10856
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10857 2
    {
10858
        if ($str === '') {
10859
            return '';
10860 2
        }
10861
10862 2
        if ($limit < 1) {
10863
            return '';
10864 2
        }
10865
10866
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10867 2
10868
        if (
10869
            !isset($matches[0])
10870
            ||
10871
            self::strlen($str) === (int) self::strlen($matches[0])
10872
        ) {
10873
            return $str;
10874
        }
10875
10876
        return self::rtrim($matches[0]) . $strAddOn;
10877
    }
10878
10879
    /**
10880
     * Wraps a string to a given number of characters
10881
     *
10882
     * @see  http://php.net/manual/en/function.wordwrap.php
10883
     *
10884
     * @param string $str   <p>The input string.</p>
10885
     * @param int    $width [optional] <p>The column width.</p>
10886
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10887
     * @param bool   $cut   [optional] <p>
10888
     *                      If the cut is set to true, the string is
10889 10
     *                      always wrapped at or before the specified width. So if you have
10890 3
     *                      a word that is larger than the given width, it is broken apart.
10891
     *                      </p>
10892
     *
10893 8
     * @return string
10894 8
     *                <p>The given string wrapped at the specified column.</p>
10895 8
     */
10896
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10897
    {
10898 8
        if ($str === '' || $break === '') {
10899
            return '';
10900 8
        }
10901 8
10902 1
        $w = '';
10903 1
        $strSplit = \explode($break, $str);
10904
        if ($strSplit === false) {
10905
            return '';
10906 8
        }
10907 8
        $chars = [];
10908
10909 8
        foreach ($strSplit as $i => $iValue) {
10910 8
            if ($i) {
10911 8
                $chars[] = $break;
10912
                $w .= '#';
10913
            }
10914
10915 8
            $c = $iValue;
10916 8
            unset($strSplit[$i]);
10917 8
10918 8
            foreach (self::split($c) as $c) {
10919
                $chars[] = $c;
10920 8
                $w .= $c === ' ' ? ' ' : '?';
10921 6
            }
10922 6
        }
10923 6
10924
        $strReturn = '';
10925
        $j = 0;
10926 6
        $b = $i = -1;
10927 3
        $w = \wordwrap($w, $width, '#', $cut);
10928
10929
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
10930 6
            for (++$i; $i < $b; ++$i) {
10931
                $strReturn .= $chars[$j];
10932
                unset($chars[$j++]);
10933 8
            }
10934
10935
            if ($break === $chars[$j] || $chars[$j] === ' ') {
10936
                unset($chars[$j++]);
10937
            }
10938
10939
            $strReturn .= $break;
10940
        }
10941
10942
        return $strReturn . \implode('', $chars);
10943
    }
10944
10945
    /**
10946 1
     * Line-Wrap the string after $limit, but also after the next word.
10947
     *
10948 1
     * @param string $str
10949 1
     * @param int    $limit
10950 1
     *
10951
     * @return string
10952
     */
10953
    public static function wordwrap_per_line(string $str, int $limit): string
10954 1
    {
10955 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
10956
10957
        $string = '';
10958 1
        foreach ($strings as &$value) {
10959
            if ($value === false) {
10960
                continue;
10961
            }
10962
10963
            $string .= \wordwrap($value, $limit);
10964
            $string .= "\n";
10965
        }
10966
10967
        return $string;
10968 2
    }
10969
10970
    /**
10971
     * Returns an array of Unicode White Space characters.
10972
     *
10973
     * @return string[] an array with numeric code point as key and White Space Character as value
10974
     */
10975
    public static function ws(): array
10976
    {
10977
        return self::$WHITESPACE;
10978
    }
10979
10980
    /**
10981
     * Adds the specified amount of left and right padding to the given string.
10982
     * The default character used is a space.
10983
     *
10984
     * @param string $str
10985 25
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
10986
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
10987 25
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
10988 8
     * @param string $encoding [optional] <p>Default: UTF-8</p>
10989 8
     *
10990 17
     * @return string string with padding applied
10991 7
     */
10992 7
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding = 'UTF-8'): string
10993 10
    {
10994 10
        $strlen = (int) self::strlen($str, $encoding);
10995 10
10996
        if ($left && $right) {
10997
            $length = ($left + $right) + $strlen;
10998
            $type = \STR_PAD_BOTH;
10999
        } elseif ($left) {
11000
            $length = $left + $strlen;
11001 25
            $type = \STR_PAD_LEFT;
11002
        } elseif ($right) {
11003
            $length = $right + $strlen;
11004
            $type = \STR_PAD_RIGHT;
11005
        } else {
11006
            $length = ($left + $right) + $strlen;
11007
            $type = \STR_PAD_BOTH;
11008
        }
11009
11010
        return self::str_pad($str, $length, $padStr, $type, $encoding);
11011
    }
11012
11013 54
    /**
11014 54
     * @param string $str
11015
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
11016 54
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
11017 2
     *
11018 2
     * @return string
11019 2
     */
11020 2
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
11021
    {
11022
        $upper = self::$COMMON_CASE_FOLD['upper'];
11023 52
        $lower = self::$COMMON_CASE_FOLD['lower'];
11024 52
11025 52
        if ($useLower === true) {
11026 52
            $str = (string) \str_replace(
11027
                $upper,
11028
                $lower,
11029
                $str
11030 54
            );
11031 52
        } else {
11032 52
            $str = (string) \str_replace(
11033 1
                $lower,
11034
                $upper,
11035
                $str
11036 52
            );
11037 2
        }
11038
11039 50
        if ($fullCaseFold) {
11040
            static $FULL_CASE_FOLD = null;
11041
            if ($FULL_CASE_FOLD === null) {
11042
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
11043 54
            }
11044
11045
            if ($useLower === true) {
11046
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
11047
            } else {
11048
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
11049
            }
11050
        }
11051
11052
        return $str;
11053
    }
11054
11055
    /**
11056
     * get data from "/data/*.php"
11057 5
     *
11058
     * @param string $file
11059
     *
11060
     * @return mixed
11061
     */
11062
    private static function getData(string $file)
11063
    {
11064
        /** @noinspection PhpIncludeInspection */
11065
        /** @psalm-suppress UnresolvableInclude */
11066
        return include __DIR__ . '/data/' . $file . '.php';
11067
    }
11068
11069 9
    /**
11070 9
     * get data from "/data/*.php"
11071
     *
11072 8
     * @param string $file
11073
     *
11074
     * @return false|mixed will return false on error
11075 2
     */
11076
    private static function getDataIfExists(string $file)
11077
    {
11078
        $file = __DIR__ . '/data/' . $file . '.php';
11079
        if (\file_exists($file)) {
11080
            /** @noinspection PhpIncludeInspection */
11081
            return include $file;
11082
        }
11083
11084
        return false;
11085
    }
11086
11087
    /**
11088
     * Checks whether mbstring "overloaded" is active on the server.
11089
     *
11090
     * @return bool
11091
     */
11092
    private static function mbstring_overloaded(): bool
11093
    {
11094
        /**
11095
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
11096
         */
11097
11098
        /** @noinspection PhpComposerExtensionStubsInspection */
11099
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
11100
        return \defined('MB_OVERLOAD_STRING')
11101
               &&
11102
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
11103
    }
11104
11105
    /**
11106 2
     * @param array $strings
11107
     * @param bool  $removeEmptyValues
11108 2
     * @param int   $removeShortValues
11109
     *
11110 2
     * @return array
11111
     */
11112 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
11113
    {
11114 2
        // init
11115
        $return = [];
11116
11117
        foreach ($strings as &$str) {
11118 2
            if (
11119
                $removeShortValues !== null
11120 2
                &&
11121
                self::strlen($str) <= $removeShortValues
11122 2
            ) {
11123
                continue;
11124
            }
11125 2
11126
            if (
11127
                $removeEmptyValues === true
11128 2
                &&
11129
                \trim($str) === ''
11130
            ) {
11131
                continue;
11132
            }
11133
11134
            $return[] = $str;
11135
        }
11136
11137
        return $return;
11138
    }
11139
11140
    /**
11141 43
     * rxClass
11142
     *
11143 43
     * @param string $s
11144
     * @param string $class
11145 43
     *
11146 31
     * @return string
11147
     */
11148
    private static function rxClass(string $s, string $class = ''): string
11149 16
    {
11150
        static $RX_CLASSS_CACHE = [];
11151
11152 16
        $cacheKey = $s . $class;
11153 15
11154
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
11155 15
            return $RX_CLASSS_CACHE[$cacheKey];
11156 15
        }
11157 1
11158 1
        $class = [$class];
11159
11160 15
        /** @noinspection SuspiciousLoopInspection */
11161
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
11162
            if ($s === '-') {
11163 16
                $class[0] = '-' . $class[0];
11164
            } elseif (!isset($s[2])) {
11165 16
                $class[0] .= \preg_quote($s, '/');
11166 16
            } elseif (self::strlen($s) === 1) {
11167
                $class[0] .= $s;
11168
            } else {
11169 16
                $class[] = $s;
11170 16
            }
11171
        }
11172
        unset($s);
11173
11174
        if ($class[0]) {
11175 16
            $class[0] = '[' . $class[0] . ']';
11176
        }
11177 16
11178
        if (\count($class) === 1) {
11179
            $return = $class[0];
11180
        } else {
11181
            $return = '(?:' . \implode('|', $class) . ')';
11182
        }
11183
11184
        $RX_CLASSS_CACHE[$cacheKey] = $return;
11185
11186
        return $return;
11187
    }
11188
11189
    /**
11190
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
11191
     *
11192 1
     * @param string $names
11193
     * @param string $delimiter
11194 1
     * @param string $encoding
11195
     *
11196
     * @return string
11197
     */
11198
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
11199 1
    {
11200
        // init
11201
        $namesArray = \explode($delimiter, $names);
11202
11203
        if ($namesArray === false) {
11204
            return '';
11205
        }
11206
11207
        $specialCases = [
11208
            'names' => [
11209
                'ab',
11210
                'af',
11211
                'al',
11212
                'and',
11213
                'ap',
11214
                'bint',
11215
                'binte',
11216
                'da',
11217
                'de',
11218
                'del',
11219
                'den',
11220
                'der',
11221
                'di',
11222
                'dit',
11223
                'ibn',
11224
                'la',
11225
                'mac',
11226
                'nic',
11227
                'of',
11228
                'ter',
11229
                'the',
11230
                'und',
11231
                'van',
11232
                'von',
11233
                'y',
11234
                'zu',
11235
            ],
11236
            'prefixes' => [
11237
                'al-',
11238 1
                "d'",
11239 1
                'ff',
11240 1
                "l'",
11241
                'mac',
11242
                'mc',
11243 1
                'nic',
11244
            ],
11245 1
        ];
11246 1
11247 1
        foreach ($namesArray as &$name) {
11248 1
            if (\in_array($name, $specialCases['names'], true)) {
11249
                continue;
11250
            }
11251 1
11252
            $continue = false;
11253
11254 1
            if ($delimiter === '-') {
11255 1
                foreach ($specialCases['names'] as &$beginning) {
11256 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11257
                        $continue = true;
11258
                    }
11259 1
                }
11260
                unset($beginning);
11261 1
            }
11262 1
11263
            foreach ($specialCases['prefixes'] as &$beginning) {
11264
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11265 1
                    $continue = true;
11266
                }
11267
            }
11268 1
            unset($beginning);
11269
11270
            if ($continue === true) {
11271
                continue;
11272
            }
11273
11274
            $name = self::str_upper_first($name);
11275
        }
11276
11277
        return \implode($delimiter, $namesArray);
11278
    }
11279
11280
    /**
11281 6
     * Generic case sensitive transformation for collation matching.
11282
     *
11283
     * @param string $str <p>The input string</p>
11284
     *
11285
     * @return string|null
11286
     */
11287
    private static function strtonatfold(string $str)
11288
    {
11289
        /** @noinspection PhpUndefinedClassInspection */
11290
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
11291
    }
11292 30
11293
    /**
11294 30
     * @param int|string $input
11295 1
     *
11296
     * @return string
11297
     */
11298 30
    private static function to_utf8_convert_helper($input): string
11299 1
    {
11300
        // init
11301
        $buf = '';
11302 30
11303 1
        if (self::$ORD === null) {
11304
            self::$ORD = self::getData('ord');
11305
        }
11306 30
11307 30
        if (self::$CHR === null) {
11308 30
            self::$CHR = self::getData('chr');
11309
        }
11310 2
11311 2
        if (self::$WIN1252_TO_UTF8 === null) {
11312 2
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11313
        }
11314
11315 30
        $ordC1 = self::$ORD[$input];
11316
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
11317
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
11318
        } else {
11319
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
11320
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
11321
            $buf .= $cc1 . $cc2;
11322
        }
11323
11324
        return $buf;
11325
    }
11326
}
11327