Passed
Push — master ( 8e918b...ec4c7c )
by Lars
03:22
created

UTF8::normalizeEncoding()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
ccs 1
cts 1
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Changes all keys in an array.
269
     *
270
     * @param array $array <p>The array to work on</p>
271
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
272
     *                     or <strong>CASE_LOWER</strong> (default)</p>
273
     *
274
     * @return string[] an array with its keys lower or uppercased
275
     */
276 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
277
    {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower($key)
290 2
                : self::strtoupper($key);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
312
    {
313 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
314 16
        if ($posStart === false) {
315 2
            return '';
316
        }
317
318 14
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
319 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
320
        if (
321 14
            $posEnd === false
322
            ||
323 14
            $posEnd === $substrIndex
324
        ) {
325 4
            return '';
326
        }
327
328 10
        return (string) self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
329
    }
330
331
    /**
332
     * Convert binary into an string.
333
     *
334
     * @param mixed $bin 1|0
335
     *
336
     * @return string
337
     */
338 2
    public static function binary_to_str($bin): string
339
    {
340 2
        if (!isset($bin[0])) {
341
            return '';
342
        }
343
344 2
        $convert = \base_convert($bin, 2, 16);
345 2
        if ($convert === '0') {
346 1
            return '';
347
        }
348
349 2
        return \pack('H*', $convert);
350
    }
351
352
    /**
353
     * Returns the UTF-8 Byte Order Mark Character.
354
     *
355
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
356
     *
357
     * @return string UTF-8 Byte Order Mark
358
     */
359 4
    public static function bom(): string
360
    {
361 4
        return "\xef\xbb\xbf";
362
    }
363
364
    /**
365
     * @alias of UTF8::chr_map()
366
     *
367
     * @see   UTF8::chr_map()
368
     *
369
     * @param array|string $callback
370
     * @param string       $str
371
     *
372
     * @return string[]
373
     */
374 2
    public static function callback($callback, string $str): array
375
    {
376 2
        return self::chr_map($callback, $str);
377
    }
378
379
    /**
380
     * Returns the character at $index, with indexes starting at 0.
381
     *
382
     * @param string $str
383
     * @param int    $index    <p>Position of the character.</p>
384
     * @param string $encoding [optional] <p>Default is UTF-8</p>
385
     *
386
     * @return string the character at $index
387
     */
388 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
389
    {
390 9
        return (string) self::substr($str, $index, 1, $encoding);
391
    }
392
393
    /**
394
     * Returns an array consisting of the characters in the string.
395
     *
396
     * @param string $str <p>The input string.</p>
397
     *
398
     * @return string[] an array of chars
399
     */
400 3
    public static function chars(string $str): array
401
    {
402 3
        return self::str_split($str, 1);
403
    }
404
405
    /**
406
     * This method will auto-detect your server environment for UTF-8 support.
407
     *
408
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
409
     */
410 37
    public static function checkForSupport()
411
    {
412 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
413
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
414
415
            // http://php.net/manual/en/book.mbstring.php
416
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
417
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
418
419
            // http://php.net/manual/en/book.iconv.php
420
            self::$SUPPORT['iconv'] = self::iconv_loaded();
421
422
            // http://php.net/manual/en/book.intl.php
423
            self::$SUPPORT['intl'] = self::intl_loaded();
424
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
425
426
            if (
427
                self::$SUPPORT['intl'] === true
428
                &&
429
                \function_exists('transliterator_list_ids') === true
430
            ) {
431
                /** @noinspection PhpComposerExtensionStubsInspection */
432
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
433
            }
434
435
            // http://php.net/manual/en/class.intlchar.php
436
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
437
438
            // http://php.net/manual/en/book.ctype.php
439
            self::$SUPPORT['ctype'] = self::ctype_loaded();
440
441
            // http://php.net/manual/en/class.finfo.php
442
            self::$SUPPORT['finfo'] = self::finfo_loaded();
443
444
            // http://php.net/manual/en/book.json.php
445
            self::$SUPPORT['json'] = self::json_loaded();
446
447
            // http://php.net/manual/en/book.pcre.php
448
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
449
450
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
451
        }
452 37
    }
453
454
    /**
455
     * Generates a UTF-8 encoded character from the given code point.
456
     *
457
     * INFO: opposite to UTF8::ord()
458
     *
459
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
460
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
461
     *
462
     * @return string|null multi-byte character, returns null on failure or empty input
463
     */
464 17
    public static function chr($code_point, string $encoding = 'UTF-8')
465
    {
466
        // init
467 17
        static $CHAR_CACHE = [];
468
469 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
470
            self::checkForSupport();
471
        }
472
473 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
474 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
475
        }
476
477
        if (
478 17
            $encoding !== 'UTF-8'
479
            &&
480 17
            $encoding !== 'ISO-8859-1'
481
            &&
482 17
            $encoding !== 'WINDOWS-1252'
483
            &&
484 17
            self::$SUPPORT['mbstring'] === false
485
        ) {
486
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
487
        }
488
489 17
        $cacheKey = $code_point . $encoding;
490 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
491 16
            return $CHAR_CACHE[$cacheKey];
492
        }
493
494 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
495
496 10
            if (self::$CHR === null) {
497
                $chrTmp = self::getData('chr');
498
                if ($chrTmp) {
499
                    self::$CHR = (array) $chrTmp;
500
                }
501
            }
502
503
            /**
504
             * @psalm-suppress PossiblyNullArrayAccess
505
             */
506 10
            $chr = self::$CHR[$code_point];
507
508 10
            if ($encoding !== 'UTF-8') {
509 1
                $chr = self::encode($encoding, $chr);
510
            }
511
512 10
            return $CHAR_CACHE[$cacheKey] = $chr;
513
        }
514
515 7
        if (self::$SUPPORT['intlChar'] === true) {
516
            /** @noinspection PhpComposerExtensionStubsInspection */
517 7
            $chr = \IntlChar::chr($code_point);
518
519 7
            if ($encoding !== 'UTF-8') {
520
                $chr = self::encode($encoding, $chr);
521
            }
522
523 7
            return $CHAR_CACHE[$cacheKey] = $chr;
524
        }
525
526
        if (self::$CHR === null) {
527
            $chrTmp = self::getData('chr');
528
            if ($chrTmp) {
529
                self::$CHR = (array) $chrTmp;
530
            }
531
        }
532
533
        $code_point = (int) $code_point;
534
        if ($code_point <= 0x7F) {
535
            /**
536
             * @psalm-suppress PossiblyNullArrayAccess
537
             */
538
            $chr = self::$CHR[$code_point];
539
        } elseif ($code_point <= 0x7FF) {
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
544
                   self::$CHR[($code_point & 0x3F) + 0x80];
545
        } elseif ($code_point <= 0xFFFF) {
546
            /**
547
             * @psalm-suppress PossiblyNullArrayAccess
548
             */
549
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
550
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
551
                   self::$CHR[($code_point & 0x3F) + 0x80];
552
        } else {
553
            /**
554
             * @psalm-suppress PossiblyNullArrayAccess
555
             */
556
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
557
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
558
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
559
                   self::$CHR[($code_point & 0x3F) + 0x80];
560
        }
561
562
        if ($encoding !== 'UTF-8') {
563
            $chr = self::encode($encoding, $chr);
564
        }
565
566
        return $CHAR_CACHE[$cacheKey] = $chr;
567
    }
568
569
    /**
570
     * Applies callback to all characters of a string.
571
     *
572
     * @param array|string $callback <p>The callback function.</p>
573
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
574
     *
575
     * @return string[] the outcome of callback
576
     */
577 2
    public static function chr_map($callback, string $str): array
578
    {
579 2
        $chars = self::split($str);
580
581 2
        return \array_map($callback, $chars);
582
    }
583
584
    /**
585
     * Generates an array of byte length of each character of a Unicode string.
586
     *
587
     * 1 byte => U+0000  - U+007F
588
     * 2 byte => U+0080  - U+07FF
589
     * 3 byte => U+0800  - U+FFFF
590
     * 4 byte => U+10000 - U+10FFFF
591
     *
592
     * @param string $str <p>The original unicode string.</p>
593
     *
594
     * @return int[] an array of byte lengths of each character
595
     */
596 4
    public static function chr_size_list(string $str): array
597
    {
598 4
        if ($str === '') {
599 4
            return [];
600
        }
601
602 4
        $strSplit = self::split($str);
603
604 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
605
            self::checkForSupport();
606
        }
607
608 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
609
            return \array_map(
610
                static function (string $data): int {
611
                    return self::strlen_in_byte($data);
612
                },
613
                $strSplit
614
            );
615
        }
616
617 4
        return \array_map('\strlen', $strSplit);
618
    }
619
620
    /**
621
     * Get a decimal code representation of a specific character.
622
     *
623
     * @param string $char <p>The input character.</p>
624
     *
625
     * @return int
626
     */
627 4
    public static function chr_to_decimal(string $char): int
628
    {
629 4
        $code = self::ord($char[0]);
630 4
        $bytes = 1;
631
632 4
        if (!($code & 0x80)) {
633
            // 0xxxxxxx
634 4
            return $code;
635
        }
636
637 4
        if (($code & 0xe0) === 0xc0) {
638
            // 110xxxxx
639 4
            $bytes = 2;
640 4
            $code &= ~0xc0;
641 4
        } elseif (($code & 0xf0) === 0xe0) {
642
            // 1110xxxx
643 4
            $bytes = 3;
644 4
            $code &= ~0xe0;
645 2
        } elseif (($code & 0xf8) === 0xf0) {
646
            // 11110xxx
647 2
            $bytes = 4;
648 2
            $code &= ~0xf0;
649
        }
650
651 4
        for ($i = 2; $i <= $bytes; ++$i) {
652
            // 10xxxxxx
653 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
654
        }
655
656 4
        return $code;
657
    }
658
659
    /**
660
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
661
     *
662
     * @param int|string $char <p>The input character</p>
663
     * @param string     $pfix [optional]
664
     *
665
     * @return string The code point encoded as U+xxxx
666
     */
667 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
668
    {
669 2
        if ($char === '') {
670 2
            return '';
671
        }
672
673 2
        if ($char === '&#0;') {
674 2
            $char = '';
675
        }
676
677 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
678
    }
679
680
    /**
681
     * alias for "UTF8::chr_to_decimal()"
682
     *
683
     * @see UTF8::chr_to_decimal()
684
     *
685
     * @param string $chr
686
     *
687
     * @return int
688
     */
689 2
    public static function chr_to_int(string $chr): int
690
    {
691 2
        return self::chr_to_decimal($chr);
692
    }
693
694
    /**
695
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
696
     *
697
     * @param string $body     <p>The original string to be split.</p>
698
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
699
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
700
     *
701
     * @return string the chunked string
702
     */
703 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
704
    {
705 4
        return \implode($end, self::split($body, $chunklen));
706
    }
707
708
    /**
709
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
710
     *
711
     * @param string $str                           <p>The string to be sanitized.</p>
712
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
713
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
714
     *                                              whitespace.</p>
715
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
716
     *                                              e.g.: "…"
717
     *                                              => "..."</p>
718
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
719
     *                                              combination with
720
     *                                              $normalize_whitespace</p>
721
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
722
     *                                              mark e.g.: "�"</p>
723
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
724
     *                                              characters e.g.: "\0"</p>
725
     *
726
     * @return string clean UTF-8 encoded string
727
     */
728 111
    public static function clean(
729
        string $str,
730
        bool $remove_bom = false,
731
        bool $normalize_whitespace = false,
732
        bool $normalize_msword = false,
733
        bool $keep_non_breaking_space = false,
734
        bool $replace_diamond_question_mark = false,
735
        bool $remove_invisible_characters = true
736
    ): string {
737
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
738
        // caused connection reset problem on larger strings
739
740 111
        $regx = '/
741
          (
742
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
743
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
744
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
745
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
746
            ){1,100}                      # ...one or more times
747
          )
748
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
749
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
750
        /x';
751 111
        $str = (string) \preg_replace($regx, '$1', $str);
752
753 111
        if ($replace_diamond_question_mark === true) {
754 60
            $str = self::replace_diamond_question_mark($str, '');
755
        }
756
757 111
        if ($remove_invisible_characters === true) {
758 111
            $str = self::remove_invisible_characters($str);
759
        }
760
761 111
        if ($normalize_whitespace === true) {
762 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
763
        }
764
765 111
        if ($normalize_msword === true) {
766 32
            $str = self::normalize_msword($str);
767
        }
768
769 111
        if ($remove_bom === true) {
770 62
            $str = self::remove_bom($str);
771
        }
772
773 111
        return $str;
774
    }
775
776
    /**
777
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
778
     *
779
     * @param string $str <p>The input string.</p>
780
     *
781
     * @return string
782
     */
783 33
    public static function cleanup($str): string
784
    {
785
        // init
786 33
        $str = (string) $str;
787
788 33
        if ($str === '') {
789 5
            return '';
790
        }
791
792
        // fixed ISO <-> UTF-8 Errors
793 33
        $str = self::fix_simple_utf8($str);
794
795
        // remove all none UTF-8 symbols
796
        // && remove diamond question mark (�)
797
        // && remove remove invisible characters (e.g. "\0")
798
        // && remove BOM
799
        // && normalize whitespace chars (but keep non-breaking-spaces)
800 33
        return self::clean(
801 33
            $str,
802 33
            true,
803 33
            true,
804 33
            false,
805 33
            true,
806 33
            true,
807 33
            true
808
        );
809
    }
810
811
    /**
812
     * Accepts a string or a array of strings and returns an array of Unicode code points.
813
     *
814
     * INFO: opposite to UTF8::string()
815
     *
816
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
817
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
818
     *                                 default, code points will be returned as integers.</p>
819
     *
820
     * @return array<int|string>
821
     *                           The array of code points:<br>
822
     *                           array<int> for $u_style === false<br>
823
     *                           array<string> for $u_style === true<br>
824
     */
825 12
    public static function codepoints($arg, bool $u_style = false): array
826
    {
827 12
        if (\is_string($arg) === true) {
828 12
            $arg = self::split($arg);
829
        }
830
831 12
        $arg = \array_map(
832
            [
833 12
                self::class,
834
                'ord',
835
            ],
836 12
            $arg
837
        );
838
839 12
        if (\count($arg) === 0) {
840 7
            return [];
841
        }
842
843 11
        if ($u_style) {
844 2
            $arg = \array_map(
845
                [
846 2
                    self::class,
847
                    'int_to_hex',
848
                ],
849 2
                $arg
850
            );
851
        }
852
853 11
        return $arg;
854
    }
855
856
    /**
857
     * Trims the string and replaces consecutive whitespace characters with a
858
     * single space. This includes tabs and newline characters, as well as
859
     * multibyte whitespace such as the thin space and ideographic space.
860
     *
861
     * @param string $str <p>The input string.</p>
862
     *
863
     * @return string string with a trimmed $str and condensed whitespace
864
     */
865 13
    public static function collapse_whitespace(string $str): string
866
    {
867 13
        return self::trim(
868 13
            self::regex_replace($str, '[[:space:]]+', ' ')
869
        );
870
    }
871
872
    /**
873
     * Returns count of characters used in a string.
874
     *
875
     * @param string $str       <p>The input string.</p>
876
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
877
     *
878
     * @return int[] an associative array of Character as keys and
879
     *               their count as values
880
     */
881 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
882
    {
883 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
884
    }
885
886
    /**
887
     * Remove css media-queries.
888
     *
889
     * @param string $str
890
     *
891
     * @return string
892
     */
893 1
    public static function css_stripe_media_queries(string $str): string
894
    {
895 1
        return (string) \preg_replace(
896 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
897 1
            '',
898 1
            $str
899
        );
900
    }
901
902
    /**
903
     * Checks whether ctype is available on the server.
904
     *
905
     * @return bool
906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
907
     */
908
    public static function ctype_loaded(): bool
909
    {
910
        return \extension_loaded('ctype');
911
    }
912
913
    /**
914
     * Converts a int-value into an UTF-8 character.
915
     *
916
     * @param mixed $int
917
     *
918
     * @return string
919
     */
920 10
    public static function decimal_to_chr($int): string
921
    {
922 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
923
    }
924
925
    /**
926
     * Decodes a MIME header field
927
     *
928
     * @param string $str
929
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
930
     *
931
     * @return false|string
932
     *                      A decoded MIME field on success,
933
     *                      or false if an error occurs during the decoding
934
     */
935
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
936
    {
937
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
938
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
939
        }
940
941
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
942
            self::checkForSupport();
943
        }
944
945
        if (self::$SUPPORT['iconv'] === true) {
946
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
947
        }
948
949
        if ($encoding !== 'UTF-8') {
950
            $str = self::encode($encoding, $str);
951
        }
952
953
        return \mb_decode_mimeheader($str);
954
    }
955
956
    /**
957
     * Encode a string with a new charset-encoding.
958
     *
959
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
960
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
961
     *
962
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
963
     * @param string $str                    <p>The input string</p>
964
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
965
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
966
     *                                       string-encoding</p>
967
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
968
     *                                       A empty string will trigger the autodetect anyway.</p>
969
     *
970
     * @return string
971
     *
972
     * @psalm-suppress InvalidReturnStatement
973
     */
974 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
975
    {
976 28
        if ($str === '' || $toEncoding === '') {
977 12
            return $str;
978
        }
979
980 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
981 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
982
        }
983
984 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
985 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
986
        }
987
988 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
989
            return $str;
990
        }
991
992 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
993
            self::checkForSupport();
994
        }
995
996 28
        if ($toEncoding === 'JSON') {
997 1
            $return = self::json_encode($str);
998 1
            if ($return === false) {
999
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1000
            }
1001
1002 1
            return $return;
1003
        }
1004 28
        if ($fromEncoding === 'JSON') {
1005 1
            $str = self::json_decode($str);
1006 1
            $fromEncoding = '';
1007
        }
1008
1009 28
        if ($toEncoding === 'BASE64') {
1010 2
            return \base64_encode($str);
1011
        }
1012 28
        if ($fromEncoding === 'BASE64') {
1013 2
            $str = \base64_decode($str, true);
1014 2
            $fromEncoding = '';
1015
        }
1016
1017 28
        if ($toEncoding === 'HTML-ENTITIES') {
1018 2
            return self::html_encode($str, true, 'UTF-8');
1019
        }
1020 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1021 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1022 2
            $fromEncoding = '';
1023
        }
1024
1025 28
        $fromEncodingDetected = false;
1026
        if (
1027 28
            $autodetectFromEncoding === true
1028
            ||
1029 28
            !$fromEncoding
1030
        ) {
1031 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1032
        }
1033
1034
        // DEBUG
1035
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1036
1037 28
        if ($fromEncodingDetected !== false) {
1038 24
            $fromEncoding = $fromEncodingDetected;
1039 6
        } elseif ($autodetectFromEncoding === true) {
1040
            // fallback for the "autodetect"-mode
1041 6
            return self::to_utf8($str);
1042
        }
1043
1044
        if (
1045 24
            !$fromEncoding
1046
            ||
1047 24
            $fromEncoding === $toEncoding
1048
        ) {
1049 15
            return $str;
1050
        }
1051
1052
        if (
1053 18
            $toEncoding === 'UTF-8'
1054
            &&
1055
            (
1056 17
                $fromEncoding === 'WINDOWS-1252'
1057
                ||
1058 18
                $fromEncoding === 'ISO-8859-1'
1059
            )
1060
        ) {
1061 14
            return self::to_utf8($str);
1062
        }
1063
1064
        if (
1065 10
            $toEncoding === 'ISO-8859-1'
1066
            &&
1067
            (
1068 5
                $fromEncoding === 'WINDOWS-1252'
1069
                ||
1070 10
                $fromEncoding === 'UTF-8'
1071
            )
1072
        ) {
1073 5
            return self::to_iso8859($str);
1074
        }
1075
1076
        if (
1077 9
            $toEncoding !== 'UTF-8'
1078
            &&
1079 9
            $toEncoding !== 'ISO-8859-1'
1080
            &&
1081 9
            $toEncoding !== 'WINDOWS-1252'
1082
            &&
1083 9
            self::$SUPPORT['mbstring'] === false
1084
        ) {
1085
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1086
        }
1087
1088 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1089
            self::checkForSupport();
1090
        }
1091
1092 9
        if (self::$SUPPORT['mbstring'] === true) {
1093
            // info: do not use the symfony polyfill here
1094 9
            $strEncoded = \mb_convert_encoding(
1095 9
                $str,
1096 9
                $toEncoding,
1097 9
                $fromEncoding
1098
            );
1099
1100 9
            if ($strEncoded) {
1101 9
                return $strEncoded;
1102
            }
1103
        }
1104
1105
        $return = \iconv($fromEncoding, $toEncoding, $str);
1106
        if ($return !== false) {
1107
            return $return;
1108
        }
1109
1110
        return $str;
1111
    }
1112
1113
    /**
1114
     * @param string $str
1115
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1116
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1117
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1118
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1119
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1120
     *
1121
     * @return false|string
1122
     *                      An encoded MIME field on success,
1123
     *                      or false if an error occurs during the encoding
1124
     */
1125
    public static function encode_mimeheader(
1126
        $str,
1127
        $fromCharset = 'UTF-8',
1128
        $toCharset = 'UTF-8',
1129
        $transferEncoding = 'Q',
1130
        $linefeed = "\r\n",
1131
        $indent = 76
1132
    ) {
1133
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1134
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1135
        }
1136
1137
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1138
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1139
        }
1140
1141
        return \iconv_mime_encode(
1142
            '',
1143
            $str,
1144
            [
1145
                'scheme'           => $transferEncoding,
1146
                'line-length'      => $indent,
1147
                'input-charset'    => $fromCharset,
1148
                'output-charset'   => $toCharset,
1149
                'line-break-chars' => $linefeed,
1150
            ]
1151
        );
1152
    }
1153
1154
    /**
1155
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1156
     *
1157
     * @param string   $str                    <p>The input string.</p>
1158
     * @param string   $search                 <p>The searched string.</p>
1159
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1160
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1161
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1162
     *
1163
     * @return string
1164
     */
1165 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1166
    {
1167 1
        if ($str === '') {
1168 1
            return '';
1169
        }
1170
1171 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1172
1173 1
        if ($length === null) {
1174 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1175
        }
1176
1177 1
        if (empty($search)) {
1178 1
            if ($length > 0) {
1179 1
                $stringLength = (int) self::strlen($str, $encoding);
1180 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1181
            } else {
1182 1
                $end = 0;
1183
            }
1184
1185 1
            $pos = (int) \min(
1186 1
                self::strpos($str, ' ', $end, $encoding),
1187 1
                self::strpos($str, '.', $end, $encoding)
1188
            );
1189
1190 1
            if ($pos) {
1191 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1192 1
                if ($strSub === false) {
1193
                    return '';
1194
                }
1195
1196 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1197
            }
1198
1199
            return $str;
1200
        }
1201
1202 1
        $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1203 1
        $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1204
1205 1
        $pos_start = 0;
1206 1
        if ($halfSide > 0) {
1207 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1208 1
            if ($halfText !== false) {
1209 1
                $pos_start = (int) \max(
1210 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1211 1
                    self::strrpos($halfText, '.', 0, $encoding)
1212
                );
1213
            }
1214
        }
1215
1216 1
        if ($wordPos && $halfSide > 0) {
1217 1
            $offset = $pos_start + $length - 1;
1218 1
            $realLength = (int) self::strlen($str, $encoding);
1219
1220 1
            if ($offset > $realLength) {
1221
                $offset = $realLength;
1222
            }
1223
1224 1
            $pos_end = (int) \min(
1225 1
                    self::strpos($str, ' ', $offset, $encoding),
1226 1
                    self::strpos($str, '.', $offset, $encoding)
1227 1
                ) - $pos_start;
1228
1229 1
            if (!$pos_end || $pos_end <= 0) {
1230 1
                $strSub = self::substr($str, $pos_start, (int) self::strlen($str), $encoding);
1231 1
                if ($strSub !== false) {
1232 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1233
                } else {
1234 1
                    $extract = '';
1235
                }
1236
            } else {
1237 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1238 1
                if ($strSub !== false) {
1239 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1240
                } else {
1241 1
                    $extract = '';
1242
                }
1243
            }
1244
        } else {
1245 1
            $offset = $length - 1;
1246 1
            $trueLength = (int) self::strlen($str, $encoding);
1247
1248 1
            if ($offset > $trueLength) {
1249
                $offset = $trueLength;
1250
            }
1251
1252 1
            $pos_end = \min(
1253 1
                self::strpos($str, ' ', $offset, $encoding),
1254 1
                self::strpos($str, '.', $offset, $encoding)
1255
            );
1256
1257 1
            if ($pos_end) {
1258 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1259 1
                if ($strSub !== false) {
1260 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1261
                } else {
1262 1
                    $extract = '';
1263
                }
1264
            } else {
1265 1
                $extract = $str;
1266
            }
1267
        }
1268
1269 1
        return $extract;
1270
    }
1271
1272
    /**
1273
     * Reads entire file into a string.
1274
     *
1275
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1276
     *
1277
     * @see http://php.net/manual/en/function.file-get-contents.php
1278
     *
1279
     * @param string        $filename         <p>
1280
     *                                        Name of the file to read.
1281
     *                                        </p>
1282
     * @param bool          $use_include_path [optional] <p>
1283
     *                                        Prior to PHP 5, this parameter is called
1284
     *                                        use_include_path and is a bool.
1285
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1286
     *                                        to trigger include path
1287
     *                                        search.
1288
     *                                        </p>
1289
     * @param resource|null $context          [optional] <p>
1290
     *                                        A valid context resource created with
1291
     *                                        stream_context_create. If you don't need to use a
1292
     *                                        custom context, you can skip this parameter by &null;.
1293
     *                                        </p>
1294
     * @param int|null      $offset           [optional] <p>
1295
     *                                        The offset where the reading starts.
1296
     *                                        </p>
1297
     * @param int|null      $maxLength        [optional] <p>
1298
     *                                        Maximum length of data read. The default is to read until end
1299
     *                                        of file is reached.
1300
     *                                        </p>
1301
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1302
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1303
     *                                        some files, because they used non default utf-8 chars. Binary files
1304
     *                                        like images or pdf will not be converted.</p>
1305
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1306
     *                                        A empty string will trigger the autodetect anyway.</p>
1307
     *
1308
     * @return false|string the function returns the read data or false on failure
1309
     */
1310 12
    public static function file_get_contents(
1311
        string $filename,
1312
        bool $use_include_path = false,
1313
        $context = null,
1314
        int $offset = null,
1315
        int $maxLength = null,
1316
        int $timeout = 10,
1317
        bool $convertToUtf8 = true,
1318
        string $fromEncoding = ''
1319
    ) {
1320
        // init
1321 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1322
1323 12
        if ($timeout && $context === null) {
1324 9
            $context = \stream_context_create(
1325
                [
1326
                    'http' => [
1327 9
                        'timeout' => $timeout,
1328
                    ],
1329
                ]
1330
            );
1331
        }
1332
1333 12
        if ($offset === null) {
1334 12
            $offset = 0;
1335
        }
1336
1337 12
        if (\is_int($maxLength) === true) {
1338 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1339
        } else {
1340 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1341
        }
1342
1343
        // return false on error
1344 12
        if ($data === false) {
1345
            return false;
1346
        }
1347
1348 12
        if ($convertToUtf8 === true) {
1349
            if (
1350 12
                self::is_binary($data, true) === true
1351
                &&
1352 12
                self::is_utf16($data, false) === false
1353
                &&
1354 12
                self::is_utf32($data, false) === false
1355 7
            ) {
1356
                // do nothing, it's binary and not UTF16 or UTF32
1357
            } else {
1358 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1359 9
                $data = self::cleanup($data);
1360
            }
1361
        }
1362
1363 12
        return $data;
1364
    }
1365
1366
    /**
1367
     * Checks if a file starts with BOM (Byte Order Mark) character.
1368
     *
1369
     * @param string $file_path <p>Path to a valid file.</p>
1370
     *
1371
     * @throws \RuntimeException if file_get_contents() returned false
1372
     *
1373
     * @return bool
1374
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1375
     */
1376 2
    public static function file_has_bom(string $file_path): bool
1377
    {
1378 2
        $file_content = \file_get_contents($file_path);
1379 2
        if ($file_content === false) {
1380
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1381
        }
1382
1383 2
        return self::string_has_bom($file_content);
1384
    }
1385
1386
    /**
1387
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1388
     *
1389
     * @param mixed  $var
1390
     * @param int    $normalization_form
1391
     * @param string $leading_combining
1392
     *
1393
     * @return mixed
1394
     */
1395 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1396
    {
1397 43
        switch (\gettype($var)) {
1398 43
            case 'array':
1399 6
                foreach ($var as $k => &$v) {
1400 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1401
                }
1402 6
                unset($v);
1403
1404 6
                break;
1405 43
            case 'object':
1406 4
                foreach ($var as $k => &$v) {
1407 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1408
                }
1409 4
                unset($v);
1410
1411 4
                break;
1412 43
            case 'string':
1413
1414 43
                if (\strpos($var, "\r") !== false) {
1415
                    // Workaround https://bugs.php.net/65732
1416 3
                    $var = self::normalize_line_ending($var);
1417
                }
1418
1419 43
                if (self::is_ascii($var) === false) {
1420
                    /** @noinspection PhpUndefinedClassInspection */
1421 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1422 21
                        $n = '-';
1423
                    } else {
1424
                        /** @noinspection PhpUndefinedClassInspection */
1425 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1426
1427 13
                        if (isset($n[0])) {
1428 7
                            $var = $n;
1429
                        } else {
1430 9
                            $var = self::encode('UTF-8', $var, true);
1431
                        }
1432
                    }
1433
1434
                    if (
1435 26
                        $var[0] >= "\x80"
1436
                        &&
1437 26
                        isset($n[0], $leading_combining[0])
1438
                        &&
1439 26
                        \preg_match('/^\p{Mn}/u', $var)
1440
                    ) {
1441
                        // Prevent leading combining chars
1442
                        // for NFC-safe concatenations.
1443 3
                        $var = $leading_combining . $var;
1444
                    }
1445
                }
1446
1447 43
                break;
1448
        }
1449
1450 43
        return $var;
1451
    }
1452
1453
    /**
1454
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1455
     *
1456
     * Gets a specific external variable by name and optionally filters it
1457
     *
1458
     * @see  http://php.net/manual/en/function.filter-input.php
1459
     *
1460
     * @param int    $type          <p>
1461
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1462
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1463
     *                              <b>INPUT_ENV</b>.
1464
     *                              </p>
1465
     * @param string $variable_name <p>
1466
     *                              Name of a variable to get.
1467
     *                              </p>
1468
     * @param int    $filter        [optional] <p>
1469
     *                              The ID of the filter to apply. The
1470
     *                              manual page lists the available filters.
1471
     *                              </p>
1472
     * @param mixed  $options       [optional] <p>
1473
     *                              Associative array of options or bitwise disjunction of flags. If filter
1474
     *                              accepts options, flags can be provided in "flags" field of array.
1475
     *                              </p>
1476
     *
1477
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1478
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1479
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1480
     */
1481
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1482
    {
1483
        if (\func_num_args() < 4) {
1484
            $var = \filter_input($type, $variable_name, $filter);
1485
        } else {
1486
            $var = \filter_input($type, $variable_name, $filter, $options);
1487
        }
1488
1489
        return self::filter($var);
1490
    }
1491
1492
    /**
1493
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1494
     *
1495
     * Gets external variables and optionally filters them
1496
     *
1497
     * @see  http://php.net/manual/en/function.filter-input-array.php
1498
     *
1499
     * @param int   $type       <p>
1500
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1501
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1502
     *                          <b>INPUT_ENV</b>.
1503
     *                          </p>
1504
     * @param mixed $definition [optional] <p>
1505
     *                          An array defining the arguments. A valid key is a string
1506
     *                          containing a variable name and a valid value is either a filter type, or an array
1507
     *                          optionally specifying the filter, flags and options. If the value is an
1508
     *                          array, valid keys are filter which specifies the
1509
     *                          filter type,
1510
     *                          flags which specifies any flags that apply to the
1511
     *                          filter, and options which specifies any options that
1512
     *                          apply to the filter. See the example below for a better understanding.
1513
     *                          </p>
1514
     *                          <p>
1515
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1516
     *                          input array are filtered by this filter.
1517
     *                          </p>
1518
     * @param bool  $add_empty  [optional] <p>
1519
     *                          Add missing keys as <b>NULL</b> to the return value.
1520
     *                          </p>
1521
     *
1522
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1523
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1524
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1525
     *               is not set and <b>NULL</b> if the filter fails.
1526
     */
1527
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1528
    {
1529
        if (\func_num_args() < 2) {
1530
            $a = \filter_input_array($type);
1531
        } else {
1532
            $a = \filter_input_array($type, $definition, $add_empty);
1533
        }
1534
1535
        return self::filter($a);
1536
    }
1537
1538
    /**
1539
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1540
     *
1541
     * Filters a variable with a specified filter
1542
     *
1543
     * @see  http://php.net/manual/en/function.filter-var.php
1544
     *
1545
     * @param mixed $variable <p>
1546
     *                        Value to filter.
1547
     *                        </p>
1548
     * @param int   $filter   [optional] <p>
1549
     *                        The ID of the filter to apply. The
1550
     *                        manual page lists the available filters.
1551
     *                        </p>
1552
     * @param mixed $options  [optional] <p>
1553
     *                        Associative array of options or bitwise disjunction of flags. If filter
1554
     *                        accepts options, flags can be provided in "flags" field of array. For
1555
     *                        the "callback" filter, callable type should be passed. The
1556
     *                        callback must accept one argument, the value to be filtered, and return
1557
     *                        the value after filtering/sanitizing it.
1558
     *                        </p>
1559
     *                        <p>
1560
     *                        <code>
1561
     *                        // for filters that accept options, use this format
1562
     *                        $options = array(
1563
     *                        'options' => array(
1564
     *                        'default' => 3, // value to return if the filter fails
1565
     *                        // other options here
1566
     *                        'min_range' => 0
1567
     *                        ),
1568
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1569
     *                        );
1570
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1571
     *                        // for filter that only accept flags, you can pass them directly
1572
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1573
     *                        // for filter that only accept flags, you can also pass as an array
1574
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1575
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1576
     *                        // callback validate filter
1577
     *                        function foo($value)
1578
     *                        {
1579
     *                        // Expected format: Surname, GivenNames
1580
     *                        if (strpos($value, ", ") === false) return false;
1581
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1582
     *                        $empty = (empty($surname) || empty($givennames));
1583
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1584
     *                        if ($empty || $notstrings) {
1585
     *                        return false;
1586
     *                        } else {
1587
     *                        return $value;
1588
     *                        }
1589
     *                        }
1590
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1591
     *                        </code>
1592
     *                        </p>
1593
     *
1594
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1595
     */
1596 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1597
    {
1598 2
        if (\func_num_args() < 3) {
1599 2
            $variable = \filter_var($variable, $filter);
1600
        } else {
1601 2
            $variable = \filter_var($variable, $filter, $options);
1602
        }
1603
1604 2
        return self::filter($variable);
1605
    }
1606
1607
    /**
1608
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1609
     *
1610
     * Gets multiple variables and optionally filters them
1611
     *
1612
     * @see  http://php.net/manual/en/function.filter-var-array.php
1613
     *
1614
     * @param array $data       <p>
1615
     *                          An array with string keys containing the data to filter.
1616
     *                          </p>
1617
     * @param mixed $definition [optional] <p>
1618
     *                          An array defining the arguments. A valid key is a string
1619
     *                          containing a variable name and a valid value is either a
1620
     *                          filter type, or an
1621
     *                          array optionally specifying the filter, flags and options.
1622
     *                          If the value is an array, valid keys are filter
1623
     *                          which specifies the filter type,
1624
     *                          flags which specifies any flags that apply to the
1625
     *                          filter, and options which specifies any options that
1626
     *                          apply to the filter. See the example below for a better understanding.
1627
     *                          </p>
1628
     *                          <p>
1629
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1630
     *                          input array are filtered by this filter.
1631
     *                          </p>
1632
     * @param bool  $add_empty  [optional] <p>
1633
     *                          Add missing keys as <b>NULL</b> to the return value.
1634
     *                          </p>
1635
     *
1636
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1637
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1638
     *               set
1639
     */
1640 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1641
    {
1642 2
        if (\func_num_args() < 2) {
1643 2
            $a = \filter_var_array($data);
1644
        } else {
1645 2
            $a = \filter_var_array($data, $definition, $add_empty);
1646
        }
1647
1648 2
        return self::filter($a);
1649
    }
1650
1651
    /**
1652
     * Checks whether finfo is available on the server.
1653
     *
1654
     * @return bool
1655
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1656
     */
1657
    public static function finfo_loaded(): bool
1658
    {
1659
        return \class_exists('finfo');
1660
    }
1661
1662
    /**
1663
     * Returns the first $n characters of the string.
1664
     *
1665
     * @param string $str      <p>The input string.</p>
1666
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1667
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1668
     *
1669
     * @return string
1670
     */
1671 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1672
    {
1673 13
        if ($n <= 0) {
1674 4
            return '';
1675
        }
1676
1677 9
        return (string) self::substr($str, 0, $n, $encoding);
1678
    }
1679
1680
    /**
1681
     * Check if the number of unicode characters are not more than the specified integer.
1682
     *
1683
     * @param string $str      the original string to be checked
1684
     * @param int    $box_size the size in number of chars to be checked against string
1685
     *
1686
     * @return bool true if string is less than or equal to $box_size, false otherwise
1687
     */
1688 2
    public static function fits_inside(string $str, int $box_size): bool
1689
    {
1690 2
        return self::strlen($str) <= $box_size;
1691
    }
1692
1693
    /**
1694
     * Try to fix simple broken UTF-8 strings.
1695
     *
1696
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1697
     *
1698
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1699
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1700
     * See: http://en.wikipedia.org/wiki/Windows-1252
1701
     *
1702
     * @param string $str <p>The input string</p>
1703
     *
1704
     * @return string
1705
     */
1706 42
    public static function fix_simple_utf8(string $str): string
1707
    {
1708 42
        if ($str === '') {
1709 4
            return '';
1710
        }
1711
1712 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1713 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1714
1715 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1716 1
            if (self::$BROKEN_UTF8_FIX === null) {
1717 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1718
            }
1719
1720 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1721 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1722
        }
1723
1724 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1725
    }
1726
1727
    /**
1728
     * Fix a double (or multiple) encoded UTF8 string.
1729
     *
1730
     * @param string|string[] $str you can use a string or an array of strings
1731
     *
1732
     * @return string|string[]
1733
     *                         Will return the fixed input-"array" or
1734
     *                         the fixed input-"string"
1735
     *
1736
     * @psalm-suppress InvalidReturnType
1737
     */
1738 2
    public static function fix_utf8($str)
1739
    {
1740 2
        if (\is_array($str) === true) {
1741 2
            foreach ($str as $k => &$v) {
1742 2
                $v = self::fix_utf8($v);
1743
            }
1744 2
            unset($v);
1745
1746
            /**
1747
             * @psalm-suppress InvalidReturnStatement
1748
             */
1749 2
            return $str;
1750
        }
1751
1752 2
        $str = (string) $str;
1753 2
        $last = '';
1754 2
        while ($last !== $str) {
1755 2
            $last = $str;
1756
            /**
1757
             * @psalm-suppress PossiblyInvalidArgument
1758
             */
1759 2
            $str = self::to_utf8(
1760 2
                self::utf8_decode($str, true)
1761
            );
1762
        }
1763
1764
        /**
1765
         * @psalm-suppress InvalidReturnStatement
1766
         */
1767 2
        return $str;
1768
    }
1769
1770
    /**
1771
     * Get character of a specific character.
1772
     *
1773
     * @param string $char
1774
     *
1775
     * @return string 'RTL' or 'LTR'
1776
     */
1777 2
    public static function getCharDirection(string $char): string
1778
    {
1779 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1780
            self::checkForSupport();
1781
        }
1782
1783 2
        if (self::$SUPPORT['intlChar'] === true) {
1784
            /** @noinspection PhpComposerExtensionStubsInspection */
1785 2
            $tmpReturn = \IntlChar::charDirection($char);
1786
1787
            // from "IntlChar"-Class
1788
            $charDirection = [
1789 2
                'RTL' => [1, 13, 14, 15, 21],
1790
                'LTR' => [0, 11, 12, 20],
1791
            ];
1792
1793 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1794
                return 'LTR';
1795
            }
1796
1797 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1798 2
                return 'RTL';
1799
            }
1800
        }
1801
1802 2
        $c = static::chr_to_decimal($char);
1803
1804 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1805 2
            return 'LTR';
1806
        }
1807
1808 2
        if ($c <= 0x85e) {
1809 2
            if ($c === 0x5be ||
1810 2
                $c === 0x5c0 ||
1811 2
                $c === 0x5c3 ||
1812 2
                $c === 0x5c6 ||
1813 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1814 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1815 2
                $c === 0x608 ||
1816 2
                $c === 0x60b ||
1817 2
                $c === 0x60d ||
1818 2
                $c === 0x61b ||
1819 2
                ($c >= 0x61e && $c <= 0x64a) ||
1820
                ($c >= 0x66d && $c <= 0x66f) ||
1821
                ($c >= 0x671 && $c <= 0x6d5) ||
1822
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1823
                ($c >= 0x6ee && $c <= 0x6ef) ||
1824
                ($c >= 0x6fa && $c <= 0x70d) ||
1825
                $c === 0x710 ||
1826
                ($c >= 0x712 && $c <= 0x72f) ||
1827
                ($c >= 0x74d && $c <= 0x7a5) ||
1828
                $c === 0x7b1 ||
1829
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1830
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1831
                $c === 0x7fa ||
1832
                ($c >= 0x800 && $c <= 0x815) ||
1833
                $c === 0x81a ||
1834
                $c === 0x824 ||
1835
                $c === 0x828 ||
1836
                ($c >= 0x830 && $c <= 0x83e) ||
1837
                ($c >= 0x840 && $c <= 0x858) ||
1838 2
                $c === 0x85e
1839
            ) {
1840 2
                return 'RTL';
1841
            }
1842 2
        } elseif ($c === 0x200f) {
1843
            return 'RTL';
1844 2
        } elseif ($c >= 0xfb1d) {
1845 2
            if ($c === 0xfb1d ||
1846 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1847 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1848 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1849 2
                $c === 0xfb3e ||
1850 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1851 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1852 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1853 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1854 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1855 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1856 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1857 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1858 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1859 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1860 2
                $c === 0x10808 ||
1861 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1862 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1863 2
                $c === 0x1083c ||
1864 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1865 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1866 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1867 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1868 2
                $c === 0x1093f ||
1869 2
                $c === 0x10a00 ||
1870 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1871 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1872 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1873 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1874 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1875 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1876 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1877 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1878 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1879 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1880
            ) {
1881 2
                return 'RTL';
1882
            }
1883
        }
1884
1885 2
        return 'LTR';
1886
    }
1887
1888
    /**
1889
     * Check for php-support.
1890
     *
1891
     * @param string|null $key
1892
     *
1893
     * @return mixed
1894
     *               Return the full support-"array", if $key === null<br>
1895
     *               return bool-value, if $key is used and available<br>
1896
     *               otherwise return <strong>null</strong>
1897
     */
1898 26
    public static function getSupportInfo(string $key = null)
1899
    {
1900 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1901
            self::checkForSupport();
1902
        }
1903
1904 26
        if ($key === null) {
1905 4
            return self::$SUPPORT;
1906
        }
1907
1908 24
        if (!isset(self::$SUPPORT[$key])) {
1909 2
            return null;
1910
        }
1911
1912 22
        return self::$SUPPORT[$key];
1913
    }
1914
1915
    /**
1916
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1917
     *          if you need more supported types, please use e.g. "finfo"
1918
     *
1919
     * @param string $str
1920
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1921
     *
1922
     * @return array
1923
     *               with this keys: 'ext', 'mime', 'type'
1924
     */
1925 39
    public static function get_file_type(
1926
        string $str,
1927
        array $fallback = [
1928
            'ext'  => null,
1929
            'mime' => 'application/octet-stream',
1930
            'type' => null,
1931
        ]
1932
    ): array {
1933 39
        if ($str === '') {
1934
            return $fallback;
1935
        }
1936
1937 39
        $str_info = self::substr_in_byte($str, 0, 2);
1938 39
        if ($str_info === false || self::strlen_in_byte($str_info) !== 2) {
1939 10
            return $fallback;
1940
        }
1941
1942 35
        $str_info = \unpack('C2chars', $str_info);
1943 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
1944
1945
        // DEBUG
1946
        //var_dump($type_code);
1947
1948
        switch ($type_code) {
1949 35
            case 3780:
1950 5
                $ext = 'pdf';
1951 5
                $mime = 'application/pdf';
1952 5
                $type = 'binary';
1953
1954 5
                break;
1955 35
            case 7790:
1956
                $ext = 'exe';
1957
                $mime = 'application/octet-stream';
1958
                $type = 'binary';
1959
1960
                break;
1961 35
            case 7784:
1962
                $ext = 'midi';
1963
                $mime = 'audio/x-midi';
1964
                $type = 'binary';
1965
1966
                break;
1967 35
            case 8075:
1968 7
                $ext = 'zip';
1969 7
                $mime = 'application/zip';
1970 7
                $type = 'binary';
1971
1972 7
                break;
1973 35
            case 8297:
1974
                $ext = 'rar';
1975
                $mime = 'application/rar';
1976
                $type = 'binary';
1977
1978
                break;
1979 35
            case 255216:
1980
                $ext = 'jpg';
1981
                $mime = 'image/jpeg';
1982
                $type = 'binary';
1983
1984
                break;
1985 35
            case 7173:
1986
                $ext = 'gif';
1987
                $mime = 'image/gif';
1988
                $type = 'binary';
1989
1990
                break;
1991 35
            case 6677:
1992
                $ext = 'bmp';
1993
                $mime = 'image/bmp';
1994
                $type = 'binary';
1995
1996
                break;
1997 35
            case 13780:
1998 7
                $ext = 'png';
1999 7
                $mime = 'image/png';
2000 7
                $type = 'binary';
2001
2002 7
                break;
2003
            default:
2004 32
                return $fallback;
2005
        }
2006
2007
        return [
2008 7
            'ext'  => $ext,
2009 7
            'mime' => $mime,
2010 7
            'type' => $type,
2011
        ];
2012
    }
2013
2014
    /**
2015
     * @param int    $length        <p>Length of the random string.</p>
2016
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2017
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2018
     *
2019
     * @return string
2020
     */
2021 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2022
    {
2023
        // init
2024 1
        $i = 0;
2025 1
        $str = '';
2026 1
        $maxlength = (int) self::strlen($possibleChars, $encoding);
2027
2028 1
        if ($maxlength === 0) {
2029 1
            return '';
2030
        }
2031
2032
        // add random chars
2033 1
        while ($i < $length) {
2034
            try {
2035 1
                $randInt = \random_int(0, $maxlength - 1);
2036
            } catch (\Exception $e) {
2037
                /** @noinspection RandomApiMigrationInspection */
2038
                $randInt = \mt_rand(0, $maxlength - 1);
2039
            }
2040 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2041 1
            if ($char !== false) {
2042 1
                $str .= $char;
2043 1
                ++$i;
2044
            }
2045
        }
2046
2047 1
        return $str;
2048
    }
2049
2050
    /**
2051
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2052
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2053
     *
2054
     * @return string
2055
     */
2056 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2057
    {
2058 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2059 1
                        \session_id() .
2060 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2061 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2062 1
                        $entropyExtra;
2063
2064 1
        $uniqueString = \uniqid($uniqueHelper, true);
2065
2066 1
        if ($md5) {
2067 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2068
        }
2069
2070 1
        return $uniqueString;
2071
    }
2072
2073
    /**
2074
     * alias for "UTF8::string_has_bom()"
2075
     *
2076
     * @see        UTF8::string_has_bom()
2077
     *
2078
     * @param string $str
2079
     *
2080
     * @return bool
2081
     *
2082
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2083
     */
2084 2
    public static function hasBom(string $str): bool
2085
    {
2086 2
        return self::string_has_bom($str);
2087
    }
2088
2089
    /**
2090
     * Returns true if the string contains a lower case char, false otherwise.
2091
     *
2092
     * @param string $str <p>The input string.</p>
2093
     *
2094
     * @return bool whether or not the string contains a lower case character
2095
     */
2096 47
    public static function has_lowercase(string $str): bool
2097
    {
2098 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2099
    }
2100
2101
    /**
2102
     * Returns true if the string contains an upper case char, false otherwise.
2103
     *
2104
     * @param string $str <p>The input string.</p>
2105
     *
2106
     * @return bool whether or not the string contains an upper case character
2107
     */
2108 12
    public static function has_uppercase(string $str): bool
2109
    {
2110 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2111
    }
2112
2113
    /**
2114
     * Converts a hexadecimal-value into an UTF-8 character.
2115
     *
2116
     * @param string $hexdec <p>The hexadecimal value.</p>
2117
     *
2118
     * @return false|string one single UTF-8 character
2119
     */
2120 4
    public static function hex_to_chr(string $hexdec)
2121
    {
2122 4
        return self::decimal_to_chr(\hexdec($hexdec));
2123
    }
2124
2125
    /**
2126
     * Converts hexadecimal U+xxxx code point representation to integer.
2127
     *
2128
     * INFO: opposite to UTF8::int_to_hex()
2129
     *
2130
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2131
     *
2132
     * @return false|int the code point, or false on failure
2133
     */
2134 2
    public static function hex_to_int($hexDec)
2135
    {
2136
        // init
2137 2
        $hexDec = (string) $hexDec;
2138
2139 2
        if ($hexDec === '') {
2140 2
            return false;
2141
        }
2142
2143 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2144 2
            return \intval($match[1], 16);
2145
        }
2146
2147 2
        return false;
2148
    }
2149
2150
    /**
2151
     * alias for "UTF8::html_entity_decode()"
2152
     *
2153
     * @see UTF8::html_entity_decode()
2154
     *
2155
     * @param string $str
2156
     * @param int    $flags
2157
     * @param string $encoding
2158
     *
2159
     * @return string
2160
     */
2161 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2162
    {
2163 4
        return self::html_entity_decode($str, $flags, $encoding);
2164
    }
2165
2166
    /**
2167
     * Converts a UTF-8 string to a series of HTML numbered entities.
2168
     *
2169
     * INFO: opposite to UTF8::html_decode()
2170
     *
2171
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2172
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2173
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2174
     *
2175
     * @return string HTML numbered entities
2176
     */
2177 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2178
    {
2179 13
        if ($str === '') {
2180 4
            return '';
2181
        }
2182
2183 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2184 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2185
        }
2186
2187 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2188
            self::checkForSupport();
2189
        }
2190
2191
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2192 13
        if (self::$SUPPORT['mbstring'] === true) {
2193 13
            $startCode = 0x00;
2194 13
            if ($keepAsciiChars === true) {
2195 13
                $startCode = 0x80;
2196
            }
2197
2198 13
            return \mb_encode_numericentity(
2199 13
                $str,
2200 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2201 13
                $encoding
2202
            );
2203
        }
2204
2205
        //
2206
        // fallback via vanilla php
2207
        //
2208
2209
        return \implode(
2210
            '',
2211
            \array_map(
2212
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2213
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2214
                },
2215
                self::split($str)
2216
            )
2217
        );
2218
    }
2219
2220
    /**
2221
     * UTF-8 version of html_entity_decode()
2222
     *
2223
     * The reason we are not using html_entity_decode() by itself is because
2224
     * while it is not technically correct to leave out the semicolon
2225
     * at the end of an entity most browsers will still interpret the entity
2226
     * correctly. html_entity_decode() does not convert entities without
2227
     * semicolons, so we are left with our own little solution here. Bummer.
2228
     *
2229
     * Convert all HTML entities to their applicable characters
2230
     *
2231
     * INFO: opposite to UTF8::html_encode()
2232
     *
2233
     * @see http://php.net/manual/en/function.html-entity-decode.php
2234
     *
2235
     * @param string $str      <p>
2236
     *                         The input string.
2237
     *                         </p>
2238
     * @param int    $flags    [optional] <p>
2239
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2240
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2241
     *                         <table>
2242
     *                         Available <i>flags</i> constants
2243
     *                         <tr valign="top">
2244
     *                         <td>Constant Name</td>
2245
     *                         <td>Description</td>
2246
     *                         </tr>
2247
     *                         <tr valign="top">
2248
     *                         <td><b>ENT_COMPAT</b></td>
2249
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2250
     *                         </tr>
2251
     *                         <tr valign="top">
2252
     *                         <td><b>ENT_QUOTES</b></td>
2253
     *                         <td>Will convert both double and single quotes.</td>
2254
     *                         </tr>
2255
     *                         <tr valign="top">
2256
     *                         <td><b>ENT_NOQUOTES</b></td>
2257
     *                         <td>Will leave both double and single quotes unconverted.</td>
2258
     *                         </tr>
2259
     *                         <tr valign="top">
2260
     *                         <td><b>ENT_HTML401</b></td>
2261
     *                         <td>
2262
     *                         Handle code as HTML 4.01.
2263
     *                         </td>
2264
     *                         </tr>
2265
     *                         <tr valign="top">
2266
     *                         <td><b>ENT_XML1</b></td>
2267
     *                         <td>
2268
     *                         Handle code as XML 1.
2269
     *                         </td>
2270
     *                         </tr>
2271
     *                         <tr valign="top">
2272
     *                         <td><b>ENT_XHTML</b></td>
2273
     *                         <td>
2274
     *                         Handle code as XHTML.
2275
     *                         </td>
2276
     *                         </tr>
2277
     *                         <tr valign="top">
2278
     *                         <td><b>ENT_HTML5</b></td>
2279
     *                         <td>
2280
     *                         Handle code as HTML 5.
2281
     *                         </td>
2282
     *                         </tr>
2283
     *                         </table>
2284
     *                         </p>
2285
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2286
     *
2287
     * @return string the decoded string
2288
     */
2289 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2290
    {
2291 40
        if ($str === '') {
2292 12
            return '';
2293
        }
2294
2295 40
        if (!isset($str[3])) { // examples: &; || &x;
2296 19
            return $str;
2297
        }
2298
2299
        if (
2300 39
            \strpos($str, '&') === false
2301
            ||
2302
            (
2303 39
                \strpos($str, '&#') === false
2304
                &&
2305 39
                \strpos($str, ';') === false
2306
            )
2307
        ) {
2308 18
            return $str;
2309
        }
2310
2311 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2312 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2313
        }
2314
2315 39
        if ($flags === null) {
2316 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2317
        }
2318
2319
        if (
2320 39
            $encoding !== 'UTF-8'
2321
            &&
2322 39
            $encoding !== 'ISO-8859-1'
2323
            &&
2324 39
            $encoding !== 'WINDOWS-1252'
2325
            &&
2326 39
            self::$SUPPORT['mbstring'] === false
2327
        ) {
2328
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2329
        }
2330
2331 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2332
            self::checkForSupport();
2333
        }
2334
2335
        do {
2336 39
            $str_compare = $str;
2337
2338
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2339 39
            if (self::$SUPPORT['mbstring'] === true) {
2340 39
                $str = \mb_decode_numericentity(
2341 39
                    $str,
2342 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2343 39
                    $encoding
2344
                );
2345
            } else {
2346
                $str = (string) \preg_replace_callback(
2347
                    "/&#\d{2,6};/",
2348
                    /**
2349
                     * @param string[] $matches
2350
                     *
2351
                     * @return string
2352
                     */
2353
                    static function (array $matches) use ($encoding): string {
2354
                        // always fallback via symfony polyfill
2355
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2356
2357
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2358
                            return $returnTmp;
2359
                        }
2360
2361
                        return $matches[0];
2362
                    },
2363
                    $str
2364
                );
2365
            }
2366
2367
            // decode numeric & UTF16 two byte entities
2368 39
            $str = \html_entity_decode(
2369 39
                (string) \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2370 39
                $flags,
2371 39
                $encoding
2372
            );
2373 39
        } while ($str_compare !== $str);
2374
2375 39
        return $str;
2376
    }
2377
2378
    /**
2379
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2380
     *
2381
     * @param string $str
2382
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2383
     *
2384
     * @return string
2385
     */
2386 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2387
    {
2388 6
        return self::htmlspecialchars(
2389 6
            $str,
2390 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2391 6
            $encoding
2392
        );
2393
    }
2394
2395
    /**
2396
     * Remove empty html-tag.
2397
     *
2398
     * e.g.: <tag></tag>
2399
     *
2400
     * @param string $str
2401
     *
2402
     * @return string
2403
     */
2404 1
    public static function html_stripe_empty_tags(string $str): string
2405
    {
2406 1
        return (string) \preg_replace(
2407 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2408 1
            '',
2409 1
            $str
2410
        );
2411
    }
2412
2413
    /**
2414
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2415
     *
2416
     * @see http://php.net/manual/en/function.htmlentities.php
2417
     *
2418
     * @param string $str           <p>
2419
     *                              The input string.
2420
     *                              </p>
2421
     * @param int    $flags         [optional] <p>
2422
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2423
     *                              invalid code unit sequences and the used document type. The default is
2424
     *                              ENT_COMPAT | ENT_HTML401.
2425
     *                              <table>
2426
     *                              Available <i>flags</i> constants
2427
     *                              <tr valign="top">
2428
     *                              <td>Constant Name</td>
2429
     *                              <td>Description</td>
2430
     *                              </tr>
2431
     *                              <tr valign="top">
2432
     *                              <td><b>ENT_COMPAT</b></td>
2433
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2434
     *                              </tr>
2435
     *                              <tr valign="top">
2436
     *                              <td><b>ENT_QUOTES</b></td>
2437
     *                              <td>Will convert both double and single quotes.</td>
2438
     *                              </tr>
2439
     *                              <tr valign="top">
2440
     *                              <td><b>ENT_NOQUOTES</b></td>
2441
     *                              <td>Will leave both double and single quotes unconverted.</td>
2442
     *                              </tr>
2443
     *                              <tr valign="top">
2444
     *                              <td><b>ENT_IGNORE</b></td>
2445
     *                              <td>
2446
     *                              Silently discard invalid code unit sequences instead of returning
2447
     *                              an empty string. Using this flag is discouraged as it
2448
     *                              may have security implications.
2449
     *                              </td>
2450
     *                              </tr>
2451
     *                              <tr valign="top">
2452
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2453
     *                              <td>
2454
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2455
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2456
     *                              </td>
2457
     *                              </tr>
2458
     *                              <tr valign="top">
2459
     *                              <td><b>ENT_DISALLOWED</b></td>
2460
     *                              <td>
2461
     *                              Replace invalid code points for the given document type with a
2462
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2463
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2464
     *                              instance, to ensure the well-formedness of XML documents with
2465
     *                              embedded external content.
2466
     *                              </td>
2467
     *                              </tr>
2468
     *                              <tr valign="top">
2469
     *                              <td><b>ENT_HTML401</b></td>
2470
     *                              <td>
2471
     *                              Handle code as HTML 4.01.
2472
     *                              </td>
2473
     *                              </tr>
2474
     *                              <tr valign="top">
2475
     *                              <td><b>ENT_XML1</b></td>
2476
     *                              <td>
2477
     *                              Handle code as XML 1.
2478
     *                              </td>
2479
     *                              </tr>
2480
     *                              <tr valign="top">
2481
     *                              <td><b>ENT_XHTML</b></td>
2482
     *                              <td>
2483
     *                              Handle code as XHTML.
2484
     *                              </td>
2485
     *                              </tr>
2486
     *                              <tr valign="top">
2487
     *                              <td><b>ENT_HTML5</b></td>
2488
     *                              <td>
2489
     *                              Handle code as HTML 5.
2490
     *                              </td>
2491
     *                              </tr>
2492
     *                              </table>
2493
     *                              </p>
2494
     * @param string $encoding      [optional] <p>
2495
     *                              Like <b>htmlspecialchars</b>,
2496
     *                              <b>htmlentities</b> takes an optional third argument
2497
     *                              <i>encoding</i> which defines encoding used in
2498
     *                              conversion.
2499
     *                              Although this argument is technically optional, you are highly
2500
     *                              encouraged to specify the correct value for your code.
2501
     *                              </p>
2502
     * @param bool   $double_encode [optional] <p>
2503
     *                              When <i>double_encode</i> is turned off PHP will not
2504
     *                              encode existing html entities. The default is to convert everything.
2505
     *                              </p>
2506
     *
2507
     * @return string
2508
     *                <p>
2509
     *                The encoded string.
2510
     *                <br><br>
2511
     *                If the input <i>string</i> contains an invalid code unit
2512
     *                sequence within the given <i>encoding</i> an empty string
2513
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2514
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2515
     *                </p>
2516
     */
2517 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2518
    {
2519 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2520 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2521
        }
2522
2523 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2524
2525
        /**
2526
         * PHP doesn't replace a backslash to its html entity since this is something
2527
         * that's mostly used to escape characters when inserting in a database. Since
2528
         * we're using a decent database layer, we don't need this shit and we're replacing
2529
         * the double backslashes by its' html entity equivalent.
2530
         *
2531
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2532
         */
2533 9
        $str = \str_replace('\\', '&#92;', $str);
2534
2535 9
        return self::html_encode($str, true, $encoding);
2536
    }
2537
2538
    /**
2539
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2540
     *
2541
     * INFO: Take a look at "UTF8::htmlentities()"
2542
     *
2543
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2544
     *
2545
     * @param string $str           <p>
2546
     *                              The string being converted.
2547
     *                              </p>
2548
     * @param int    $flags         [optional] <p>
2549
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2550
     *                              invalid code unit sequences and the used document type. The default is
2551
     *                              ENT_COMPAT | ENT_HTML401.
2552
     *                              <table>
2553
     *                              Available <i>flags</i> constants
2554
     *                              <tr valign="top">
2555
     *                              <td>Constant Name</td>
2556
     *                              <td>Description</td>
2557
     *                              </tr>
2558
     *                              <tr valign="top">
2559
     *                              <td><b>ENT_COMPAT</b></td>
2560
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2561
     *                              </tr>
2562
     *                              <tr valign="top">
2563
     *                              <td><b>ENT_QUOTES</b></td>
2564
     *                              <td>Will convert both double and single quotes.</td>
2565
     *                              </tr>
2566
     *                              <tr valign="top">
2567
     *                              <td><b>ENT_NOQUOTES</b></td>
2568
     *                              <td>Will leave both double and single quotes unconverted.</td>
2569
     *                              </tr>
2570
     *                              <tr valign="top">
2571
     *                              <td><b>ENT_IGNORE</b></td>
2572
     *                              <td>
2573
     *                              Silently discard invalid code unit sequences instead of returning
2574
     *                              an empty string. Using this flag is discouraged as it
2575
     *                              may have security implications.
2576
     *                              </td>
2577
     *                              </tr>
2578
     *                              <tr valign="top">
2579
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2580
     *                              <td>
2581
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2582
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2583
     *                              </td>
2584
     *                              </tr>
2585
     *                              <tr valign="top">
2586
     *                              <td><b>ENT_DISALLOWED</b></td>
2587
     *                              <td>
2588
     *                              Replace invalid code points for the given document type with a
2589
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2590
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2591
     *                              instance, to ensure the well-formedness of XML documents with
2592
     *                              embedded external content.
2593
     *                              </td>
2594
     *                              </tr>
2595
     *                              <tr valign="top">
2596
     *                              <td><b>ENT_HTML401</b></td>
2597
     *                              <td>
2598
     *                              Handle code as HTML 4.01.
2599
     *                              </td>
2600
     *                              </tr>
2601
     *                              <tr valign="top">
2602
     *                              <td><b>ENT_XML1</b></td>
2603
     *                              <td>
2604
     *                              Handle code as XML 1.
2605
     *                              </td>
2606
     *                              </tr>
2607
     *                              <tr valign="top">
2608
     *                              <td><b>ENT_XHTML</b></td>
2609
     *                              <td>
2610
     *                              Handle code as XHTML.
2611
     *                              </td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_HTML5</b></td>
2615
     *                              <td>
2616
     *                              Handle code as HTML 5.
2617
     *                              </td>
2618
     *                              </tr>
2619
     *                              </table>
2620
     *                              </p>
2621
     * @param string $encoding      [optional] <p>
2622
     *                              Defines encoding used in conversion.
2623
     *                              </p>
2624
     *                              <p>
2625
     *                              For the purposes of this function, the encodings
2626
     *                              ISO-8859-1, ISO-8859-15,
2627
     *                              UTF-8, cp866,
2628
     *                              cp1251, cp1252, and
2629
     *                              KOI8-R are effectively equivalent, provided the
2630
     *                              <i>string</i> itself is valid for the encoding, as
2631
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2632
     *                              the same positions in all of these encodings.
2633
     *                              </p>
2634
     * @param bool   $double_encode [optional] <p>
2635
     *                              When <i>double_encode</i> is turned off PHP will not
2636
     *                              encode existing html entities, the default is to convert everything.
2637
     *                              </p>
2638
     *
2639
     * @return string the converted string.
2640
     *                </p>
2641
     *                <p>
2642
     *                If the input <i>string</i> contains an invalid code unit
2643
     *                sequence within the given <i>encoding</i> an empty string
2644
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2645
     *                <b>ENT_SUBSTITUTE</b> flags are set
2646
     */
2647 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2648
    {
2649 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2650 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2651
        }
2652
2653 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2654
    }
2655
2656
    /**
2657
     * Checks whether iconv is available on the server.
2658
     *
2659
     * @return bool
2660
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2661
     */
2662
    public static function iconv_loaded(): bool
2663
    {
2664
        return \extension_loaded('iconv');
2665
    }
2666
2667
    /**
2668
     * alias for "UTF8::decimal_to_chr()"
2669
     *
2670
     * @see UTF8::decimal_to_chr()
2671
     *
2672
     * @param mixed $int
2673
     *
2674
     * @return string
2675
     */
2676 4
    public static function int_to_chr($int): string
2677
    {
2678 4
        return self::decimal_to_chr($int);
2679
    }
2680
2681
    /**
2682
     * Converts Integer to hexadecimal U+xxxx code point representation.
2683
     *
2684
     * INFO: opposite to UTF8::hex_to_int()
2685
     *
2686
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2687
     * @param string $pfix [optional]
2688
     *
2689
     * @return string the code point, or empty string on failure
2690
     */
2691 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2692
    {
2693 6
        $hex = \dechex($int);
2694
2695 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2696
2697 6
        return $pfix . $hex . '';
2698
    }
2699
2700
    /**
2701
     * Checks whether intl-char is available on the server.
2702
     *
2703
     * @return bool
2704
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2705
     */
2706
    public static function intlChar_loaded(): bool
2707
    {
2708
        return \class_exists('IntlChar');
2709
    }
2710
2711
    /**
2712
     * Checks whether intl is available on the server.
2713
     *
2714
     * @return bool
2715
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2716
     */
2717 5
    public static function intl_loaded(): bool
2718
    {
2719 5
        return \extension_loaded('intl');
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::is_ascii()"
2724
     *
2725
     * @see        UTF8::is_ascii()
2726
     *
2727
     * @param string $str
2728
     *
2729
     * @return bool
2730
     *
2731
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2732
     */
2733 2
    public static function isAscii(string $str): bool
2734
    {
2735 2
        return self::is_ascii($str);
2736
    }
2737
2738
    /**
2739
     * alias for "UTF8::is_base64()"
2740
     *
2741
     * @see        UTF8::is_base64()
2742
     *
2743
     * @param string $str
2744
     *
2745
     * @return bool
2746
     *
2747
     * @deprecated <p>use "UTF8::is_base64()"</p>
2748
     */
2749 2
    public static function isBase64($str): bool
2750
    {
2751 2
        return self::is_base64($str);
2752
    }
2753
2754
    /**
2755
     * alias for "UTF8::is_binary()"
2756
     *
2757
     * @see        UTF8::is_binary()
2758
     *
2759
     * @param mixed $str
2760
     * @param bool  $strict
2761
     *
2762
     * @return bool
2763
     *
2764
     * @deprecated <p>use "UTF8::is_binary()"</p>
2765
     */
2766 4
    public static function isBinary($str, $strict = false): bool
2767
    {
2768 4
        return self::is_binary($str, $strict);
2769
    }
2770
2771
    /**
2772
     * alias for "UTF8::is_bom()"
2773
     *
2774
     * @see        UTF8::is_bom()
2775
     *
2776
     * @param string $utf8_chr
2777
     *
2778
     * @return bool
2779
     *
2780
     * @deprecated <p>use "UTF8::is_bom()"</p>
2781
     */
2782 2
    public static function isBom(string $utf8_chr): bool
2783
    {
2784 2
        return self::is_bom($utf8_chr);
2785
    }
2786
2787
    /**
2788
     * alias for "UTF8::is_html()"
2789
     *
2790
     * @see        UTF8::is_html()
2791
     *
2792
     * @param string $str
2793
     *
2794
     * @return bool
2795
     *
2796
     * @deprecated <p>use "UTF8::is_html()"</p>
2797
     */
2798 2
    public static function isHtml(string $str): bool
2799
    {
2800 2
        return self::is_html($str);
2801
    }
2802
2803
    /**
2804
     * alias for "UTF8::is_json()"
2805
     *
2806
     * @see        UTF8::is_json()
2807
     *
2808
     * @param string $str
2809
     *
2810
     * @return bool
2811
     *
2812
     * @deprecated <p>use "UTF8::is_json()"</p>
2813
     */
2814
    public static function isJson(string $str): bool
2815
    {
2816
        return self::is_json($str);
2817
    }
2818
2819
    /**
2820
     * alias for "UTF8::is_utf16()"
2821
     *
2822
     * @see        UTF8::is_utf16()
2823
     *
2824
     * @param mixed $str
2825
     *
2826
     * @return false|int
2827
     *                   <strong>false</strong> if is't not UTF16,<br>
2828
     *                   <strong>1</strong> for UTF-16LE,<br>
2829
     *                   <strong>2</strong> for UTF-16BE
2830
     *
2831
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2832
     */
2833 2
    public static function isUtf16($str)
2834
    {
2835 2
        return self::is_utf16($str);
2836
    }
2837
2838
    /**
2839
     * alias for "UTF8::is_utf32()"
2840
     *
2841
     * @see        UTF8::is_utf32()
2842
     *
2843
     * @param mixed $str
2844
     *
2845
     * @return false|int
2846
     *                   <strong>false</strong> if is't not UTF16,
2847
     *                   <strong>1</strong> for UTF-32LE,
2848
     *                   <strong>2</strong> for UTF-32BE
2849
     *
2850
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2851
     */
2852 2
    public static function isUtf32($str)
2853
    {
2854 2
        return self::is_utf32($str);
2855
    }
2856
2857
    /**
2858
     * alias for "UTF8::is_utf8()"
2859
     *
2860
     * @see        UTF8::is_utf8()
2861
     *
2862
     * @param string $str
2863
     * @param bool   $strict
2864
     *
2865
     * @return bool
2866
     *
2867
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2868
     */
2869 17
    public static function isUtf8($str, $strict = false): bool
2870
    {
2871 17
        return self::is_utf8($str, $strict);
2872
    }
2873
2874
    /**
2875
     * Returns true if the string contains only alphabetic chars, false otherwise.
2876
     *
2877
     * @param string $str
2878
     *
2879
     * @return bool
2880
     *              Whether or not $str contains only alphabetic chars
2881
     */
2882 10
    public static function is_alpha(string $str): bool
2883
    {
2884 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2885
    }
2886
2887
    /**
2888
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2889
     *
2890
     * @param string $str
2891
     *
2892
     * @return bool
2893
     *              Whether or not $str contains only alphanumeric chars
2894
     */
2895 13
    public static function is_alphanumeric(string $str): bool
2896
    {
2897 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2898
    }
2899
2900
    /**
2901
     * Checks if a string is 7 bit ASCII.
2902
     *
2903
     * @param string $str <p>The string to check.</p>
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if it is ASCII<br>
2907
     *              <strong>false</strong> otherwise
2908
     */
2909 204
    public static function is_ascii(string $str): bool
2910
    {
2911 204
        if ($str === '') {
2912 10
            return true;
2913
        }
2914
2915 203
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2916
    }
2917
2918
    /**
2919
     * Returns true if the string is base64 encoded, false otherwise.
2920
     *
2921
     * @param string $str <p>The input string.</p>
2922
     *
2923
     * @return bool whether or not $str is base64 encoded
2924
     */
2925 9
    public static function is_base64($str): bool
2926
    {
2927 9
        if ($str === '') {
2928 3
            return false;
2929
        }
2930
2931
        /**
2932
         * @psalm-suppress RedundantConditionGivenDocblockType
2933
         */
2934 8
        if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2935 2
            return false;
2936
        }
2937
2938 8
        $base64String = (string) \base64_decode($str, true);
2939
2940 8
        return $base64String && \base64_encode($base64String) === $str;
2941
    }
2942
2943
    /**
2944
     * Check if the input is binary... (is look like a hack).
2945
     *
2946
     * @param mixed $input
2947
     * @param bool  $strict
2948
     *
2949
     * @return bool
2950
     */
2951 39
    public static function is_binary($input, bool $strict = false): bool
2952
    {
2953 39
        $input = (string) $input;
2954 39
        if ($input === '') {
2955 10
            return false;
2956
        }
2957
2958 39
        if (\preg_match('~^[01]+$~', $input)) {
2959 12
            return true;
2960
        }
2961
2962 39
        $ext = self::get_file_type($input);
2963 39
        if ($ext['type'] === 'binary') {
2964 7
            return true;
2965
        }
2966
2967 36
        $testLength = self::strlen_in_byte($input);
2968 36
        if ($testLength) {
2969 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2970
                self::checkForSupport();
2971
            }
2972
2973 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2974
            if (
2975 36
                $testNull !== false
2976
                &&
2977 36
                ($testNull / $testLength) > 0.25
2978
            ) {
2979 12
                return true;
2980
            }
2981
        }
2982
2983 34
        if ($strict === true) {
2984 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2985
                self::checkForSupport();
2986
            }
2987
2988 34
            if (self::$SUPPORT['finfo'] === false) {
2989
                throw new \RuntimeException('ext-fileinfo: is not installed');
2990
            }
2991
2992
            /** @noinspection PhpComposerExtensionStubsInspection */
2993 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
2994 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
2995 14
                return true;
2996
            }
2997
        }
2998
2999 30
        return false;
3000
    }
3001
3002
    /**
3003
     * Check if the file is binary.
3004
     *
3005
     * @param string $file
3006
     *
3007
     * @return bool
3008
     */
3009 6
    public static function is_binary_file($file): bool
3010
    {
3011
        // init
3012 6
        $block = '';
3013
3014 6
        $fp = \fopen($file, 'rb');
3015 6
        if (\is_resource($fp)) {
3016 6
            $block = \fread($fp, 512);
3017 6
            \fclose($fp);
3018
        }
3019
3020 6
        if ($block === '') {
3021 2
            return false;
3022
        }
3023
3024 6
        return self::is_binary($block, true);
3025
    }
3026
3027
    /**
3028
     * Returns true if the string contains only whitespace chars, false otherwise.
3029
     *
3030
     * @param string $str
3031
     *
3032
     * @return bool
3033
     *              Whether or not $str contains only whitespace characters
3034
     */
3035 15
    public static function is_blank(string $str): bool
3036
    {
3037 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3038
    }
3039
3040
    /**
3041
     * Checks if the given string is equal to any "Byte Order Mark".
3042
     *
3043
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3044
     *
3045
     * @param string $str <p>The input string.</p>
3046
     *
3047
     * @return bool
3048
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3049
     */
3050 2
    public static function is_bom($str): bool
3051
    {
3052
        /** @noinspection PhpUnusedLocalVariableInspection */
3053 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3054 2
            if ($str === $bomString) {
3055 2
                return true;
3056
            }
3057
        }
3058
3059 2
        return false;
3060
    }
3061
3062
    /**
3063
     * Determine whether the string is considered to be empty.
3064
     *
3065
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3066
     * empty() does not generate a warning if the variable does not exist.
3067
     *
3068
     * @param mixed $str
3069
     *
3070
     * @return bool whether or not $str is empty()
3071
     */
3072
    public static function is_empty($str): bool
3073
    {
3074
        return empty($str);
3075
    }
3076
3077
    /**
3078
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3079
     *
3080
     * @param string $str
3081
     *
3082
     * @return bool
3083
     *              Whether or not $str contains only hexadecimal chars
3084
     */
3085 13
    public static function is_hexadecimal(string $str): bool
3086
    {
3087 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3088
    }
3089
3090
    /**
3091
     * Check if the string contains any html-tags <lall>.
3092
     *
3093
     * @param string $str <p>The input string.</p>
3094
     *
3095
     * @return bool
3096
     */
3097 3
    public static function is_html(string $str): bool
3098
    {
3099 3
        if ($str === '') {
3100 3
            return false;
3101
        }
3102
3103
        // init
3104 3
        $matches = [];
3105
3106 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3107
3108 3
        return \count($matches) !== 0;
3109
    }
3110
3111
    /**
3112
     * Try to check if "$str" is an json-string.
3113
     *
3114
     * @param string $str <p>The input string.</p>
3115
     *
3116
     * @return bool
3117
     */
3118 22
    public static function is_json(string $str): bool
3119
    {
3120 22
        if ($str === '') {
3121 3
            return false;
3122
        }
3123
3124 21
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3125
            self::checkForSupport();
3126
        }
3127
3128 21
        if (self::$SUPPORT['json'] === false) {
3129
            throw new \RuntimeException('ext-json: is not installed');
3130
        }
3131
3132 21
        $json = self::json_decode($str);
3133
3134
        /** @noinspection PhpComposerExtensionStubsInspection */
3135
        return (
3136 21
                   \is_object($json) === true
3137
                   ||
3138 21
                   \is_array($json) === true
3139
               )
3140
               &&
3141 21
               \json_last_error() === \JSON_ERROR_NONE;
3142
    }
3143
3144
    /**
3145
     * @param string $str
3146
     *
3147
     * @return bool
3148
     */
3149 8
    public static function is_lowercase(string $str): bool
3150
    {
3151 8
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3152
    }
3153
3154
    /**
3155
     * Returns true if the string is serialized, false otherwise.
3156
     *
3157
     * @param string $str
3158
     *
3159
     * @return bool whether or not $str is serialized
3160
     */
3161 7
    public static function is_serialized(string $str): bool
3162
    {
3163 7
        if ($str === '') {
3164 1
            return false;
3165
        }
3166
3167
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3168
        /** @noinspection UnserializeExploitsInspection */
3169 6
        return $str === 'b:0;'
3170
               ||
3171 6
               @\unserialize($str) !== false;
3172
    }
3173
3174
    /**
3175
     * Returns true if the string contains only lower case chars, false
3176
     * otherwise.
3177
     *
3178
     * @param string $str <p>The input string.</p>
3179
     *
3180
     * @return bool
3181
     *              Whether or not $str contains only lower case characters
3182
     */
3183 8
    public static function is_uppercase(string $str): bool
3184
    {
3185 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3186
    }
3187
3188
    /**
3189
     * Check if the string is UTF-16.
3190
     *
3191
     * @param mixed $str                   <p>The input string.</p>
3192
     * @param bool  $checkIfStringIsBinary
3193
     *
3194
     * @return false|int
3195
     *                   <strong>false</strong> if is't not UTF-16,<br>
3196
     *                   <strong>1</strong> for UTF-16LE,<br>
3197
     *                   <strong>2</strong> for UTF-16BE
3198
     */
3199 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3200
    {
3201
        // init
3202 21
        $str = (string) $str;
3203 21
        $strChars = [];
3204
3205
        if (
3206 21
            $checkIfStringIsBinary === true
3207
            &&
3208 21
            self::is_binary($str, true) === false
3209
        ) {
3210 2
            return false;
3211
        }
3212
3213 21
        if (self::$SUPPORT['mbstring'] === false) {
3214 2
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3215
        }
3216
3217 21
        $str = self::remove_bom($str);
3218
3219 21
        $maybeUTF16LE = 0;
3220 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3221 21
        if ($test) {
3222 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3223 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3224 15
            if ($test3 === $test) {
3225 15
                if (\count($strChars) === 0) {
3226 15
                    $strChars = self::count_chars($str, true);
3227
                }
3228 15
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3229 15
                    if (\in_array($test3char, $strChars, true) === true) {
3230 15
                        ++$maybeUTF16LE;
3231
                    }
3232
                }
3233 15
                unset($test3charEmpty);
3234
            }
3235
        }
3236
3237 21
        $maybeUTF16BE = 0;
3238 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3239 21
        if ($test) {
3240 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3241 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3242 15
            if ($test3 === $test) {
3243 15
                if (\count($strChars) === 0) {
3244 7
                    $strChars = self::count_chars($str, true);
3245
                }
3246 15
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3247 15
                    if (\in_array($test3char, $strChars, true) === true) {
3248 15
                        ++$maybeUTF16BE;
3249
                    }
3250
                }
3251 15
                unset($test3charEmpty);
3252
            }
3253
        }
3254
3255 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3256 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3257 4
                return 1;
3258
            }
3259
3260 6
            return 2;
3261
        }
3262
3263 17
        return false;
3264
    }
3265
3266
    /**
3267
     * Check if the string is UTF-32.
3268
     *
3269
     * @param mixed $str                   <p>The input string.</p>
3270
     * @param bool  $checkIfStringIsBinary
3271
     *
3272
     * @return false|int
3273
     *                   <strong>false</strong> if is't not UTF-32,<br>
3274
     *                   <strong>1</strong> for UTF-32LE,<br>
3275
     *                   <strong>2</strong> for UTF-32BE
3276
     */
3277 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3278
    {
3279
        // init
3280 17
        $str = (string) $str;
3281 17
        $strChars = [];
3282
3283
        if (
3284 17
            $checkIfStringIsBinary === true
3285
            &&
3286 17
            self::is_binary($str, true) === false
3287
        ) {
3288 2
            return false;
3289
        }
3290
3291 17
        if (self::$SUPPORT['mbstring'] === false) {
3292 2
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3293
        }
3294
3295 17
        $str = self::remove_bom($str);
3296
3297 17
        $maybeUTF32LE = 0;
3298 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3299 17
        if ($test) {
3300 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3301 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3302 11
            if ($test3 === $test) {
3303 11
                if (\count($strChars) === 0) {
3304 11
                    $strChars = self::count_chars($str, true);
3305
                }
3306 11
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3307 11
                    if (\in_array($test3char, $strChars, true) === true) {
3308 11
                        ++$maybeUTF32LE;
3309
                    }
3310
                }
3311 11
                unset($test3charEmpty);
3312
            }
3313
        }
3314
3315 17
        $maybeUTF32BE = 0;
3316 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3317 17
        if ($test) {
3318 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3319 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3320 11
            if ($test3 === $test) {
3321 11
                if (\count($strChars) === 0) {
3322 7
                    $strChars = self::count_chars($str, true);
3323
                }
3324 11
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3325 11
                    if (\in_array($test3char, $strChars, true) === true) {
3326 11
                        ++$maybeUTF32BE;
3327
                    }
3328
                }
3329 11
                unset($test3charEmpty);
3330
            }
3331
        }
3332
3333 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3334 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3335 2
                return 1;
3336
            }
3337
3338 2
            return 2;
3339
        }
3340
3341 17
        return false;
3342
    }
3343
3344
    /**
3345
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3346
     *
3347
     * @see    http://hsivonen.iki.fi/php-utf8/
3348
     *
3349
     * @param string|string[] $str    <p>The string to be checked.</p>
3350
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3351
     *
3352
     * @return bool
3353
     */
3354 106
    public static function is_utf8($str, bool $strict = false): bool
3355
    {
3356 106
        if (\is_array($str) === true) {
3357 2
            foreach ($str as &$v) {
3358 2
                if (self::is_utf8($v, $strict) === false) {
3359 2
                    return false;
3360
                }
3361
            }
3362
3363
            return true;
3364
        }
3365
3366 106
        if ($str === '') {
3367 12
            return true;
3368
        }
3369
3370 102
        if ($strict === true) {
3371 2
            $isBinary = self::is_binary($str, true);
3372
3373 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3374 2
                return false;
3375
            }
3376
3377
            if ($isBinary && self::is_utf32($str, false) !== false) {
3378
                return false;
3379
            }
3380
        }
3381
3382 102
        if (self::pcre_utf8_support() !== true) {
3383
3384
            // If even just the first character can be matched, when the /u
3385
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3386
            // invalid, nothing at all will match, even if the string contains
3387
            // some valid sequences
3388
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3389
        }
3390
3391 102
        $mState = 0; // cached expected number of octets after the current octet
3392
        // until the beginning of the next UTF8 character sequence
3393 102
        $mUcs4 = 0; // cached Unicode character
3394 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3395
3396 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3397
            self::checkForSupport();
3398
        }
3399
3400 102
        if (self::$ORD === null) {
3401
            self::$ORD = self::getData('ord');
3402
        }
3403
3404 102
        $len = self::strlen_in_byte((string) $str);
3405
        /** @noinspection ForeachInvariantsInspection */
3406 102
        for ($i = 0; $i < $len; ++$i) {
3407 102
            $in = self::$ORD[$str[$i]];
3408 102
            if ($mState === 0) {
3409
                // When mState is zero we expect either a US-ASCII character or a
3410
                // multi-octet sequence.
3411 102
                if ((0x80 & $in) === 0) {
3412
                    // US-ASCII, pass straight through.
3413 98
                    $mBytes = 1;
3414 83
                } elseif ((0xE0 & $in) === 0xC0) {
3415
                    // First octet of 2 octet sequence.
3416 74
                    $mUcs4 = $in;
3417 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3418 74
                    $mState = 1;
3419 74
                    $mBytes = 2;
3420 58
                } elseif ((0xF0 & $in) === 0xE0) {
3421
                    // First octet of 3 octet sequence.
3422 41
                    $mUcs4 = $in;
3423 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3424 41
                    $mState = 2;
3425 41
                    $mBytes = 3;
3426 30
                } elseif ((0xF8 & $in) === 0xF0) {
3427
                    // First octet of 4 octet sequence.
3428 19
                    $mUcs4 = $in;
3429 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3430 19
                    $mState = 3;
3431 19
                    $mBytes = 4;
3432 13
                } elseif ((0xFC & $in) === 0xF8) {
3433
                    /* First octet of 5 octet sequence.
3434
                     *
3435
                     * This is illegal because the encoded codepoint must be either
3436
                     * (a) not the shortest form or
3437
                     * (b) outside the Unicode range of 0-0x10FFFF.
3438
                     * Rather than trying to resynchronize, we will carry on until the end
3439
                     * of the sequence and let the later error handling code catch it.
3440
                     */
3441 5
                    $mUcs4 = $in;
3442 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3443 5
                    $mState = 4;
3444 5
                    $mBytes = 5;
3445 10
                } elseif ((0xFE & $in) === 0xFC) {
3446
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3447 5
                    $mUcs4 = $in;
3448 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3449 5
                    $mState = 5;
3450 5
                    $mBytes = 6;
3451
                } else {
3452
                    // Current octet is neither in the US-ASCII range nor a legal first
3453
                    // octet of a multi-octet sequence.
3454 102
                    return false;
3455
                }
3456
            } else {
3457
                // When mState is non-zero, we expect a continuation of the multi-octet
3458
                // sequence
3459 83
                if ((0xC0 & $in) === 0x80) {
3460
                    // Legal continuation.
3461 75
                    $shift = ($mState - 1) * 6;
3462 75
                    $tmp = $in;
3463 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3464 75
                    $mUcs4 |= $tmp;
3465
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3466
                    // Unicode code point to be output.
3467 75
                    if (--$mState === 0) {
3468
                        // Check for illegal sequences and code points.
3469
                        //
3470
                        // From Unicode 3.1, non-shortest form is illegal
3471
                        if (
3472 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3473
                            ||
3474 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3475
                            ||
3476 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3477
                            ||
3478 75
                            ($mBytes > 4)
3479
                            ||
3480
                            // From Unicode 3.2, surrogate characters are illegal.
3481 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3482
                            ||
3483
                            // Code points outside the Unicode range are illegal.
3484 75
                            ($mUcs4 > 0x10FFFF)
3485
                        ) {
3486 8
                            return false;
3487
                        }
3488
                        // initialize UTF8 cache
3489 75
                        $mState = 0;
3490 75
                        $mUcs4 = 0;
3491 75
                        $mBytes = 1;
3492
                    }
3493
                } else {
3494
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3495
                    // Incomplete multi-octet sequence.
3496 36
                    return false;
3497
                }
3498
            }
3499
        }
3500
3501 66
        return true;
3502
    }
3503
3504
    /**
3505
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3506
     * Decodes a JSON string
3507
     *
3508
     * @see http://php.net/manual/en/function.json-decode.php
3509
     *
3510
     * @param string $json    <p>
3511
     *                        The <i>json</i> string being decoded.
3512
     *                        </p>
3513
     *                        <p>
3514
     *                        This function only works with UTF-8 encoded strings.
3515
     *                        </p>
3516
     *                        <p>PHP implements a superset of
3517
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3518
     *                        only supports these values when they are nested inside an array or an object.
3519
     *                        </p>
3520
     * @param bool   $assoc   [optional] <p>
3521
     *                        When <b>TRUE</b>, returned objects will be converted into
3522
     *                        associative arrays.
3523
     *                        </p>
3524
     * @param int    $depth   [optional] <p>
3525
     *                        User specified recursion depth.
3526
     *                        </p>
3527
     * @param int    $options [optional] <p>
3528
     *                        Bitmask of JSON decode options. Currently only
3529
     *                        <b>JSON_BIGINT_AS_STRING</b>
3530
     *                        is supported (default is to cast large integers as floats)
3531
     *                        </p>
3532
     *
3533
     * @return mixed
3534
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3535
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3536
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3537
     *               is deeper than the recursion limit.
3538
     */
3539 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3540
    {
3541 24
        $json = self::filter($json);
3542
3543 24
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3544
            self::checkForSupport();
3545
        }
3546
3547 24
        if (self::$SUPPORT['json'] === false) {
3548
            throw new \RuntimeException('ext-json: is not installed');
3549
        }
3550
3551
        /** @noinspection PhpComposerExtensionStubsInspection */
3552 24
        return \json_decode($json, $assoc, $depth, $options);
3553
    }
3554
3555
    /**
3556
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3557
     * Returns the JSON representation of a value.
3558
     *
3559
     * @see http://php.net/manual/en/function.json-encode.php
3560
     *
3561
     * @param mixed $value   <p>
3562
     *                       The <i>value</i> being encoded. Can be any type except
3563
     *                       a resource.
3564
     *                       </p>
3565
     *                       <p>
3566
     *                       All string data must be UTF-8 encoded.
3567
     *                       </p>
3568
     *                       <p>PHP implements a superset of
3569
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3570
     *                       only supports these values when they are nested inside an array or an object.
3571
     *                       </p>
3572
     * @param int   $options [optional] <p>
3573
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3574
     *                       <b>JSON_HEX_TAG</b>,
3575
     *                       <b>JSON_HEX_AMP</b>,
3576
     *                       <b>JSON_HEX_APOS</b>,
3577
     *                       <b>JSON_NUMERIC_CHECK</b>,
3578
     *                       <b>JSON_PRETTY_PRINT</b>,
3579
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3580
     *                       <b>JSON_FORCE_OBJECT</b>,
3581
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3582
     *                       constants is described on
3583
     *                       the JSON constants page.
3584
     *                       </p>
3585
     * @param int   $depth   [optional] <p>
3586
     *                       Set the maximum depth. Must be greater than zero.
3587
     *                       </p>
3588
     *
3589
     * @return false|string
3590
     *                      A JSON encoded <strong>string</strong> on success or<br>
3591
     *                      <strong>FALSE</strong> on failure
3592
     */
3593 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3594
    {
3595 5
        $value = self::filter($value);
3596
3597 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3598
            self::checkForSupport();
3599
        }
3600
3601 5
        if (self::$SUPPORT['json'] === false) {
3602
            throw new \RuntimeException('ext-json: is not installed');
3603
        }
3604
3605
        /** @noinspection PhpComposerExtensionStubsInspection */
3606 5
        return \json_encode($value, $options, $depth);
3607
    }
3608
3609
    /**
3610
     * Checks whether JSON is available on the server.
3611
     *
3612
     * @return bool
3613
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3614
     */
3615
    public static function json_loaded(): bool
3616
    {
3617
        return \function_exists('json_decode');
3618
    }
3619
3620
    /**
3621
     * Makes string's first char lowercase.
3622
     *
3623
     * @param string      $str                   <p>The input string</p>
3624
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3625
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3626
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3627
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3628
     *
3629
     * @return string the resulting string
3630
     */
3631 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3632
    {
3633 46
        $strPartTwo = (string) self::substr($str, 1, null, $encoding, $cleanUtf8);
3634
3635 46
        $strPartOne = self::strtolower(
3636 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3637 46
            $encoding,
3638 46
            $cleanUtf8,
3639 46
            $lang,
3640 46
            $tryToKeepStringLength
3641
        );
3642
3643 46
        return $strPartOne . $strPartTwo;
3644
    }
3645
3646
    /**
3647
     * alias for "UTF8::lcfirst()"
3648
     *
3649
     * @see UTF8::lcfirst()
3650
     *
3651
     * @param string      $str
3652
     * @param string      $encoding
3653
     * @param bool        $cleanUtf8
3654
     * @param string|null $lang
3655
     * @param bool        $tryToKeepStringLength
3656
     *
3657
     * @return string
3658
     */
3659 2
    public static function lcword(
3660
        string $str,
3661
        string $encoding = 'UTF-8',
3662
        bool $cleanUtf8 = false,
3663
        string $lang = null,
3664
        bool $tryToKeepStringLength = false
3665
    ): string {
3666 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3667
    }
3668
3669
    /**
3670
     * Lowercase for all words in the string.
3671
     *
3672
     * @param string      $str                   <p>The input string.</p>
3673
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3674
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3675
     *                                           a new word.</p>
3676
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3677
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3678
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3679
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3680
     *
3681
     * @return string
3682
     */
3683 2
    public static function lcwords(
3684
        string $str,
3685
        array $exceptions = [],
3686
        string $charlist = '',
3687
        string $encoding = 'UTF-8',
3688
        bool $cleanUtf8 = false,
3689
        string $lang = null,
3690
        bool $tryToKeepStringLength = false
3691
    ): string {
3692 2
        if (!$str) {
3693 2
            return '';
3694
        }
3695
3696 2
        $words = self::str_to_words($str, $charlist);
3697 2
        $useExceptions = \count($exceptions) > 0;
3698
3699 2
        foreach ($words as &$word) {
3700 2
            if (!$word) {
3701 2
                continue;
3702
            }
3703
3704
            if (
3705 2
                $useExceptions === false
3706
                ||
3707 2
                !\in_array($word, $exceptions, true)
3708
            ) {
3709 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3710
            }
3711
        }
3712
3713 2
        return \implode('', $words);
3714
    }
3715
3716
    /**
3717
     * alias for "UTF8::lcfirst()"
3718
     *
3719
     * @see UTF8::lcfirst()
3720
     *
3721
     * @param string      $str
3722
     * @param string      $encoding
3723
     * @param bool        $cleanUtf8
3724
     * @param string|null $lang
3725
     * @param bool        $tryToKeepStringLength
3726
     *
3727
     * @return string
3728
     */
3729 5
    public static function lowerCaseFirst(
3730
        string $str,
3731
        string $encoding = 'UTF-8',
3732
        bool $cleanUtf8 = false,
3733
        string $lang = null,
3734
        bool $tryToKeepStringLength = false
3735
    ): string {
3736 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3737
    }
3738
3739
    /**
3740
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3741
     *
3742
     * @param string $str   <p>The string to be trimmed</p>
3743
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3744
     *
3745
     * @return string the string with unwanted characters stripped from the left
3746
     */
3747 22
    public static function ltrim(string $str = '', $chars = \INF): string
3748
    {
3749 22
        if ($str === '') {
3750 3
            return '';
3751
        }
3752
3753
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3754 21
        if ($chars === \INF || !$chars) {
3755 14
            $pattern = "^[\pZ\pC]+";
3756
        } else {
3757 10
            $chars = \preg_quote($chars, '/');
3758 10
            $pattern = "^[${chars}]+";
3759
        }
3760
3761
        return self::regex_replace($str, $pattern, '', '', '/');
3762
    }
3763
3764
    /**
3765
     * Returns the UTF-8 character with the maximum code point in the given data.
3766
     *
3767
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3768
     *
3769
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3770
     */
3771
    public static function max($arg)
3772
    {
3773 2
        if (\is_array($arg) === true) {
3774 2
            $arg = \implode('', $arg);
3775
        }
3776
3777 2
        $codepoints = self::codepoints($arg, false);
3778 2
        if (\count($codepoints) === 0) {
3779 2
            return null;
3780
        }
3781
3782 2
        $codepoint_max = \max($codepoints);
3783
3784 2
        return self::chr($codepoint_max);
3785
    }
3786
3787
    /**
3788
     * Calculates and returns the maximum number of bytes taken by any
3789
     * UTF-8 encoded character in the given string.
3790
     *
3791
     * @param string $str <p>The original Unicode string.</p>
3792
     *
3793
     * @return int max byte lengths of the given chars
3794
     */
3795
    public static function max_chr_width(string $str): int
3796
    {
3797 2
        $bytes = self::chr_size_list($str);
3798 2
        if (\count($bytes) > 0) {
3799 2
            return (int) \max($bytes);
3800
        }
3801
3802 2
        return 0;
3803
    }
3804
3805
    /**
3806
     * Checks whether mbstring is available on the server.
3807
     *
3808
     * @return bool
3809
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3810
     */
3811
    public static function mbstring_loaded(): bool
3812
    {
3813 27
        $return = \extension_loaded('mbstring');
3814 27
        if ($return === true) {
3815 27
            \mb_internal_encoding('UTF-8');
3816
        }
3817
3818 27
        return $return;
3819
    }
3820
3821
    /**
3822
     * Returns the UTF-8 character with the minimum code point in the given data.
3823
     *
3824
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3825
     *
3826
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3827
     */
3828
    public static function min($arg)
3829
    {
3830 2
        if (\is_array($arg) === true) {
3831 2
            $arg = \implode('', $arg);
3832
        }
3833
3834 2
        $codepoints = self::codepoints($arg, false);
3835 2
        if (\count($codepoints) === 0) {
3836 2
            return null;
3837
        }
3838
3839 2
        $codepoint_min = \min($codepoints);
3840
3841 2
        return self::chr($codepoint_min);
3842
    }
3843
3844
    /**
3845
     * alias for "UTF8::normalize_encoding()"
3846
     *
3847
     * @see        UTF8::normalize_encoding()
3848
     *
3849
     * @param mixed $encoding
3850
     * @param mixed $fallback
3851
     *
3852
     * @return mixed
3853
     *
3854
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3855
     */
3856
    public static function normalizeEncoding($encoding, $fallback = '')
3857
    {
3858 2
        return self::normalize_encoding($encoding, $fallback);
3859
    }
3860
3861
    /**
3862
     * Normalize the encoding-"name" input.
3863
     *
3864
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3865
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3866
     *
3867
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3868
     */
3869
    public static function normalize_encoding($encoding, $fallback = '')
3870
    {
3871 354
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3872
3873
        // init
3874 354
        $encoding = (string) $encoding;
3875
3876
        if (
3877 354
            !$encoding
3878
            ||
3879 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3880
            ||
3881 354
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3882
        ) {
3883 309
            return $fallback;
3884
        }
3885
3886
        if (
3887 49
            $encoding === 'UTF-8'
3888
            ||
3889 49
            $encoding === 'UTF8'
3890
        ) {
3891 22
            return 'UTF-8';
3892
        }
3893
3894
        if (
3895 42
            $encoding === '8BIT'
3896
            ||
3897 42
            $encoding === 'BINARY'
3898
        ) {
3899
            return 'CP850';
3900
        }
3901
3902
        if (
3903 42
            $encoding === 'HTML'
3904
            ||
3905 42
            $encoding === 'HTML-ENTITIES'
3906
        ) {
3907 2
            return 'HTML-ENTITIES';
3908
        }
3909
3910 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3911 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3912
        }
3913
3914 6
        if (self::$ENCODINGS === null) {
3915 1
            self::$ENCODINGS = self::getData('encodings');
3916
        }
3917
3918 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
3919 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3920
3921 4
            return $encoding;
3922
        }
3923
3924 5
        $encodingOrig = $encoding;
3925 5
        $encoding = \strtoupper($encoding);
3926 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3927
3928
        $equivalences = [
3929 5
            'ISO8859'     => 'ISO-8859-1',
3930
            'ISO88591'    => 'ISO-8859-1',
3931
            'ISO'         => 'ISO-8859-1',
3932
            'LATIN'       => 'ISO-8859-1',
3933
            'LATIN1'      => 'ISO-8859-1', // Western European
3934
            'ISO88592'    => 'ISO-8859-2',
3935
            'LATIN2'      => 'ISO-8859-2', // Central European
3936
            'ISO88593'    => 'ISO-8859-3',
3937
            'LATIN3'      => 'ISO-8859-3', // Southern European
3938
            'ISO88594'    => 'ISO-8859-4',
3939
            'LATIN4'      => 'ISO-8859-4', // Northern European
3940
            'ISO88595'    => 'ISO-8859-5',
3941
            'ISO88596'    => 'ISO-8859-6', // Greek
3942
            'ISO88597'    => 'ISO-8859-7',
3943
            'ISO88598'    => 'ISO-8859-8', // Hebrew
3944
            'ISO88599'    => 'ISO-8859-9',
3945
            'LATIN5'      => 'ISO-8859-9', // Turkish
3946
            'ISO885911'   => 'ISO-8859-11',
3947
            'TIS620'      => 'ISO-8859-11', // Thai
3948
            'ISO885910'   => 'ISO-8859-10',
3949
            'LATIN6'      => 'ISO-8859-10', // Nordic
3950
            'ISO885913'   => 'ISO-8859-13',
3951
            'LATIN7'      => 'ISO-8859-13', // Baltic
3952
            'ISO885914'   => 'ISO-8859-14',
3953
            'LATIN8'      => 'ISO-8859-14', // Celtic
3954
            'ISO885915'   => 'ISO-8859-15',
3955
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3956
            'ISO885916'   => 'ISO-8859-16',
3957
            'LATIN10'     => 'ISO-8859-16', // Southeast European
3958
            'CP1250'      => 'WINDOWS-1250',
3959
            'WIN1250'     => 'WINDOWS-1250',
3960
            'WINDOWS1250' => 'WINDOWS-1250',
3961
            'CP1251'      => 'WINDOWS-1251',
3962
            'WIN1251'     => 'WINDOWS-1251',
3963
            'WINDOWS1251' => 'WINDOWS-1251',
3964
            'CP1252'      => 'WINDOWS-1252',
3965
            'WIN1252'     => 'WINDOWS-1252',
3966
            'WINDOWS1252' => 'WINDOWS-1252',
3967
            'CP1253'      => 'WINDOWS-1253',
3968
            'WIN1253'     => 'WINDOWS-1253',
3969
            'WINDOWS1253' => 'WINDOWS-1253',
3970
            'CP1254'      => 'WINDOWS-1254',
3971
            'WIN1254'     => 'WINDOWS-1254',
3972
            'WINDOWS1254' => 'WINDOWS-1254',
3973
            'CP1255'      => 'WINDOWS-1255',
3974
            'WIN1255'     => 'WINDOWS-1255',
3975
            'WINDOWS1255' => 'WINDOWS-1255',
3976
            'CP1256'      => 'WINDOWS-1256',
3977
            'WIN1256'     => 'WINDOWS-1256',
3978
            'WINDOWS1256' => 'WINDOWS-1256',
3979
            'CP1257'      => 'WINDOWS-1257',
3980
            'WIN1257'     => 'WINDOWS-1257',
3981
            'WINDOWS1257' => 'WINDOWS-1257',
3982
            'CP1258'      => 'WINDOWS-1258',
3983
            'WIN1258'     => 'WINDOWS-1258',
3984
            'WINDOWS1258' => 'WINDOWS-1258',
3985
            'UTF16'       => 'UTF-16',
3986
            'UTF32'       => 'UTF-32',
3987
            'UTF8'        => 'UTF-8',
3988
            'UTF'         => 'UTF-8',
3989
            'UTF7'        => 'UTF-7',
3990
            '8BIT'        => 'CP850',
3991
            'BINARY'      => 'CP850',
3992
        ];
3993
3994 5
        if (!empty($equivalences[$encodingUpperHelper])) {
3995 4
            $encoding = $equivalences[$encodingUpperHelper];
3996
        }
3997
3998 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3999
4000 5
        return $encoding;
4001
    }
4002
4003
    /**
4004
     * Standardize line ending to unix-like.
4005
     *
4006
     * @param string $str
4007
     *
4008
     * @return string
4009
     */
4010
    public static function normalize_line_ending(string $str): string
4011
    {
4012 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
4013
    }
4014
4015
    /**
4016
     * Normalize some MS Word special characters.
4017
     *
4018
     * @param string $str <p>The string to be normalized.</p>
4019
     *
4020
     * @return string
4021
     */
4022
    public static function normalize_msword(string $str): string
4023
    {
4024 38
        if ($str === '') {
4025 2
            return '';
4026
        }
4027
4028 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
4029 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4030
4031 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4032 1
            if (self::$UTF8_MSWORD === null) {
4033 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4034
            }
4035
4036 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4037 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4038
        }
4039
4040 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4041
    }
4042
4043
    /**
4044
     * Normalize the whitespace.
4045
     *
4046
     * @param string $str                     <p>The string to be normalized.</p>
4047
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4048
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4049
     *                                        bidirectional text chars.</p>
4050
     *
4051
     * @return string
4052
     */
4053
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4054
    {
4055 86
        if ($str === '') {
4056 9
            return '';
4057
        }
4058
4059 86
        static $WHITESPACE_CACHE = [];
4060 86
        $cacheKey = (int) $keepNonBreakingSpace;
4061
4062 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4063 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4064
4065 2
            if ($keepNonBreakingSpace === true) {
4066 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4067
            }
4068
4069 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4070
        }
4071
4072 86
        if ($keepBidiUnicodeControls === false) {
4073 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4074
4075 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4076 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4077
            }
4078
4079 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4080
        }
4081
4082 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4083
    }
4084
4085
    /**
4086
     * Calculates Unicode code point of the given UTF-8 encoded character.
4087
     *
4088
     * INFO: opposite to UTF8::chr()
4089
     *
4090
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4091
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4092
     *
4093
     * @return int
4094
     *             Unicode code point of the given character,<br>
4095
     *             0 on invalid UTF-8 byte sequence
4096
     */
4097
    public static function ord($chr, string $encoding = 'UTF-8'): int
4098
    {
4099
        // init
4100 30
        $chr = (string) $chr;
4101
4102 30
        static $CHAR_CACHE = [];
4103
4104
        // save the original string
4105 30
        $chr_orig = $chr;
4106
4107 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4108 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4109
        }
4110
4111 30
        $cacheKey = $chr_orig . $encoding;
4112 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4113 23
            return $CHAR_CACHE[$cacheKey];
4114
        }
4115
4116 25
        if (self::$ORD === null) {
4117
            self::$ORD = self::getData('ord');
4118
        }
4119
4120 25
        if (isset(self::$ORD[$chr])) {
4121 25
            return self::$ORD[$chr];
4122
        }
4123
4124
        // check again, if it's still not UTF-8
4125 7
        if ($encoding !== 'UTF-8') {
4126 1
            $chr = self::encode($encoding, $chr);
4127
        }
4128
4129 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4130
            self::checkForSupport();
4131
        }
4132
4133 7
        if (self::$SUPPORT['intlChar'] === true) {
4134
            /** @noinspection PhpComposerExtensionStubsInspection */
4135 6
            $code = \IntlChar::ord($chr);
4136 6
            if ($code) {
4137 5
                return $CHAR_CACHE[$cacheKey] = $code;
4138
            }
4139
        }
4140
4141
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4142 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4143 2
        $code = $chr ? $chr[1] : 0;
4144
4145 2
        if ($code >= 0xF0 && isset($chr[4])) {
4146
            /** @noinspection UnnecessaryCastingInspection */
4147
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4148
        }
4149
4150 2
        if ($code >= 0xE0 && isset($chr[3])) {
4151
            /** @noinspection UnnecessaryCastingInspection */
4152 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4153
        }
4154
4155 2
        if ($code >= 0xC0 && isset($chr[2])) {
4156
            /** @noinspection UnnecessaryCastingInspection */
4157 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4158
        }
4159
4160 1
        return $CHAR_CACHE[$cacheKey] = $code;
4161
    }
4162
4163
    /**
4164
     * Parses the string into an array (into the the second parameter).
4165
     *
4166
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4167
     *          if the second parameter is not set!
4168
     *
4169
     * @see http://php.net/manual/en/function.parse-str.php
4170
     *
4171
     * @param string $str       <p>The input string.</p>
4172
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4173
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4174
     *
4175
     * @return bool
4176
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4177
     */
4178
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4179
    {
4180 2
        if ($cleanUtf8 === true) {
4181 2
            $str = self::clean($str);
4182
        }
4183
4184 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4185
            self::checkForSupport();
4186
        }
4187
4188 2
        if (self::$SUPPORT['mbstring'] === true) {
4189 2
            $return = \mb_parse_str($str, $result);
4190
4191 2
            return $return !== false && !empty($result);
4192
        }
4193
4194
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4195
        \parse_str($str, $result);
4196
4197
        return !empty($result);
4198
    }
4199
4200
    /**
4201
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4202
     *
4203
     * @return bool
4204
     *              <strong>true</strong> if support is available,<br>
4205
     *              <strong>false</strong> otherwise
4206
     */
4207
    public static function pcre_utf8_support(): bool
4208
    {
4209
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4210 102
        return (bool) @\preg_match('//u', '');
4211
    }
4212
4213
    /**
4214
     * Create an array containing a range of UTF-8 characters.
4215
     *
4216
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4217
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4218
     *
4219
     * @return string[]
4220
     */
4221
    public static function range($var1, $var2): array
4222
    {
4223 2
        if (!$var1 || !$var2) {
4224 2
            return [];
4225
        }
4226
4227 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4228
            self::checkForSupport();
4229
        }
4230
4231 2
        if (self::$SUPPORT['ctype'] === false) {
4232
            throw new \RuntimeException('ext-ctype: is not installed');
4233
        }
4234
4235
        /** @noinspection PhpComposerExtensionStubsInspection */
4236 2
        if (\ctype_digit((string) $var1)) {
4237 2
            $start = (int) $var1;
4238 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4239
            $start = (int) self::hex_to_int($var1);
4240
        } else {
4241 2
            $start = self::ord($var1);
4242
        }
4243
4244 2
        if (!$start) {
4245
            return [];
4246
        }
4247
4248
        /** @noinspection PhpComposerExtensionStubsInspection */
4249 2
        if (\ctype_digit((string) $var2)) {
4250 2
            $end = (int) $var2;
4251 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4252
            $end = (int) self::hex_to_int($var2);
4253
        } else {
4254 2
            $end = self::ord($var2);
4255
        }
4256
4257 2
        if (!$end) {
4258
            return [];
4259
        }
4260
4261 2
        return \array_map(
4262
            static function (int $i): string {
4263 2
                return (string) self::chr($i);
4264 2
            },
4265 2
            \range($start, $end)
4266
        );
4267
    }
4268
4269
    /**
4270
     * Multi decode html entity & fix urlencoded-win1252-chars.
4271
     *
4272
     * e.g:
4273
     * 'test+test'                     => 'test+test'
4274
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4275
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4276
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4277
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4278
     * 'Düsseldorf'                   => 'Düsseldorf'
4279
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4280
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4281
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4282
     *
4283
     * @param string $str          <p>The input string.</p>
4284
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4285
     *
4286
     * @return string
4287
     */
4288
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4289
    {
4290 3
        if ($str === '') {
4291 2
            return '';
4292
        }
4293
4294 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4295 3
        if (\preg_match($pattern, $str)) {
4296 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4297
        }
4298
4299 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4300
4301 3
        if ($multi_decode === true) {
4302
            do {
4303 3
                $str_compare = $str;
4304
4305
                /**
4306
                 * @psalm-suppress PossiblyInvalidArgument
4307
                 */
4308 3
                $str = self::fix_simple_utf8(
4309 3
                    \rawurldecode(
4310 3
                        self::html_entity_decode(
4311 3
                            self::to_utf8($str),
4312 3
                            $flags
4313
                        )
4314
                    )
4315
                );
4316 3
            } while ($str_compare !== $str);
4317
        }
4318
4319 3
        return $str;
4320
    }
4321
4322
    /**
4323
     * Replaces all occurrences of $pattern in $str by $replacement.
4324
     *
4325
     * @param string $str         <p>The input string.</p>
4326
     * @param string $pattern     <p>The regular expression pattern.</p>
4327
     * @param string $replacement <p>The string to replace with.</p>
4328
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4329
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4330
     *
4331
     * @return string
4332
     */
4333
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4334
    {
4335 259
        if ($options === 'msr') {
4336 9
            $options = 'ms';
4337
        }
4338
4339
        // fallback
4340 259
        if (!$delimiter) {
4341
            $delimiter = '/';
4342
        }
4343
4344 259
        return (string) \preg_replace(
4345 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4346 259
            $replacement,
4347 259
            $str
4348
        );
4349
    }
4350
4351
    /**
4352
     * alias for "UTF8::remove_bom()"
4353
     *
4354
     * @see        UTF8::remove_bom()
4355
     *
4356
     * @param string $str
4357
     *
4358
     * @return string
4359
     *
4360
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4361
     */
4362
    public static function removeBOM(string $str): string
4363
    {
4364
        return self::remove_bom($str);
4365
    }
4366
4367
    /**
4368
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4369
     *
4370
     * @param string $str <p>The input string.</p>
4371
     *
4372
     * @return string string without UTF-BOM
4373
     */
4374
    public static function remove_bom(string $str): string
4375
    {
4376 79
        if ($str === '') {
4377 7
            return '';
4378
        }
4379
4380 79
        $strLength = self::strlen_in_byte($str);
4381 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4382 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4383 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4384 10
                if ($strTmp === false) {
4385
                    return '';
4386
                }
4387
4388 10
                $strLength -= (int) $bomByteLength;
4389
4390 79
                $str = (string) $strTmp;
4391
            }
4392
        }
4393
4394 79
        return $str;
4395
    }
4396
4397
    /**
4398
     * Removes duplicate occurrences of a string in another string.
4399
     *
4400
     * @param string          $str  <p>The base string.</p>
4401
     * @param string|string[] $what <p>String to search for in the base string.</p>
4402
     *
4403
     * @return string the result string with removed duplicates
4404
     */
4405
    public static function remove_duplicates(string $str, $what = ' '): string
4406
    {
4407 2
        if (\is_string($what) === true) {
4408 2
            $what = [$what];
4409
        }
4410
4411 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4412
            /** @noinspection ForeachSourceInspection */
4413 2
            foreach ($what as $item) {
4414 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4415
            }
4416
        }
4417
4418 2
        return $str;
4419
    }
4420
4421
    /**
4422
     * Remove html via "strip_tags()" from the string.
4423
     *
4424
     * @param string $str
4425
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4426
     *                              not be stripped. Default: null
4427
     *                              </p>
4428
     *
4429
     * @return string
4430
     */
4431
    public static function remove_html(string $str, string $allowableTags = ''): string
4432
    {
4433 6
        return \strip_tags($str, $allowableTags);
4434
    }
4435
4436
    /**
4437
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4438
     *
4439
     * @param string $str
4440
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4441
     *
4442
     * @return string
4443
     */
4444
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4445
    {
4446 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4447
    }
4448
4449
    /**
4450
     * Remove invisible characters from a string.
4451
     *
4452
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4453
     *
4454
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4455
     *
4456
     * @param string $str
4457
     * @param bool   $url_encoded
4458
     * @param string $replacement
4459
     *
4460
     * @return string
4461
     */
4462
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4463
    {
4464
        // init
4465 113
        $non_displayables = [];
4466
4467
        // every control character except newline (dec 10),
4468
        // carriage return (dec 13) and horizontal tab (dec 09)
4469 113
        if ($url_encoded) {
4470 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4471 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4472
        }
4473
4474 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4475
4476
        do {
4477 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4478 113
        } while ($count !== 0);
4479
4480 113
        return $str;
4481
    }
4482
4483
    /**
4484
     * Returns a new string with the prefix $substring removed, if present.
4485
     *
4486
     * @param string $str
4487
     * @param string $substring <p>The prefix to remove.</p>
4488
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4489
     *
4490
     * @return string string without the prefix $substring
4491
     */
4492
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4493
    {
4494 12
        if (self::str_starts_with($str, $substring)) {
4495 6
            return (string) self::substr(
4496 6
                $str,
4497 6
                (int) self::strlen($substring, $encoding),
4498 6
                null,
4499 6
                $encoding
4500
            );
4501
        }
4502
4503 6
        return $str;
4504
    }
4505
4506
    /**
4507
     * Returns a new string with the suffix $substring removed, if present.
4508
     *
4509
     * @param string $str
4510
     * @param string $substring <p>The suffix to remove.</p>
4511
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4512
     *
4513
     * @return string string having a $str without the suffix $substring
4514
     */
4515
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4516
    {
4517 12
        if (self::str_ends_with($str, $substring)) {
4518 6
            return (string) self::substr(
4519 6
                $str,
4520 6
                0,
4521 6
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding)
4522
            );
4523
        }
4524
4525 6
        return $str;
4526
    }
4527
4528
    /**
4529
     * Replaces all occurrences of $search in $str by $replacement.
4530
     *
4531
     * @param string $str           <p>The input string.</p>
4532
     * @param string $search        <p>The needle to search for.</p>
4533
     * @param string $replacement   <p>The string to replace with.</p>
4534
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4535
     *
4536
     * @return string string after the replacements
4537
     */
4538
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4539
    {
4540 29
        if ($caseSensitive) {
4541 22
            return self::str_replace($search, $replacement, $str);
4542
        }
4543
4544 7
        return self::str_ireplace($search, $replacement, $str);
4545
    }
4546
4547
    /**
4548
     * Replaces all occurrences of $search in $str by $replacement.
4549
     *
4550
     * @param string       $str           <p>The input string.</p>
4551
     * @param array        $search        <p>The elements to search for.</p>
4552
     * @param array|string $replacement   <p>The string to replace with.</p>
4553
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4554
     *
4555
     * @return string string after the replacements
4556
     */
4557
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4558
    {
4559 30
        if ($caseSensitive) {
4560 23
            return self::str_replace($search, $replacement, $str);
4561
        }
4562
4563 7
        return self::str_ireplace($search, $replacement, $str);
4564
    }
4565
4566
    /**
4567
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4568
     *
4569
     * @param string $str                <p>The input string</p>
4570
     * @param string $replacementChar    <p>The replacement character.</p>
4571
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4572
     *
4573
     * @return string
4574
     */
4575
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4576
    {
4577 62
        if ($str === '') {
4578 9
            return '';
4579
        }
4580
4581 62
        if ($processInvalidUtf8 === true) {
4582 62
            $replacementCharHelper = $replacementChar;
4583 62
            if ($replacementChar === '') {
4584 62
                $replacementCharHelper = 'none';
4585
            }
4586
4587 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4588
                self::checkForSupport();
4589
            }
4590
4591 62
            if (self::$SUPPORT['mbstring'] === false) {
4592
                // if there is no native support for "mbstring",
4593
                // then we need to clean the string before ...
4594
                $str = self::clean($str);
4595
            }
4596
4597
            // always fallback via symfony polyfill
4598 62
            $save = \mb_substitute_character();
4599 62
            \mb_substitute_character($replacementCharHelper);
4600 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4601 62
            \mb_substitute_character($save);
4602
4603
            // the polyfill maybe return false
4604
            /** @psalm-suppress RedundantCondition */
4605 62
            $str = \is_string($strTmp) ? $strTmp : '';
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4606
        }
4607
4608 62
        return \str_replace(
4609
            [
4610 62
                "\xEF\xBF\xBD",
4611
                '�',
4612
            ],
4613
            [
4614 62
                $replacementChar,
4615 62
                $replacementChar,
4616
            ],
4617 62
            $str
4618
        );
4619
    }
4620
4621
    /**
4622
     * Strip whitespace or other characters from end of a UTF-8 string.
4623
     *
4624
     * @param string $str   <p>The string to be trimmed.</p>
4625
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4626
     *
4627
     * @return string the string with unwanted characters stripped from the right
4628
     */
4629
    public static function rtrim(string $str = '', $chars = \INF): string
4630
    {
4631 22
        if ($str === '') {
4632 3
            return '';
4633
        }
4634
4635
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4636 21
        if ($chars === \INF || !$chars) {
4637 16
            $pattern = "[\pZ\pC]+\$";
4638
        } else {
4639 8
            $chars = \preg_quote($chars, '/');
4640 8
            $pattern = "[${chars}]+\$";
4641
        }
4642
4643 21
        return self::regex_replace($str, $pattern, '', '', '/');
4644
    }
4645
4646
    /**
4647
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4648
     */
4649
    public static function showSupport()
4650
    {
4651 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4652
            self::checkForSupport();
4653
        }
4654
4655 2
        echo '<pre>';
4656 2
        foreach (self::$SUPPORT as $key => &$value) {
4657 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4658
        }
4659 2
        unset($value);
4660 2
        echo '</pre>';
4661 2
    }
4662
4663
    /**
4664
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4665
     *
4666
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4667
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4668
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4669
     *
4670
     * @return string the HTML numbered entity
4671
     */
4672
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4673
    {
4674 2
        if ($char === '') {
4675 2
            return '';
4676
        }
4677
4678
        if (
4679 2
            $keepAsciiChars === true
4680
            &&
4681 2
            self::is_ascii($char) === true
4682
        ) {
4683 2
            return $char;
4684
        }
4685
4686 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4687 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4688
        }
4689
4690 2
        return '&#' . self::ord($char, $encoding) . ';';
4691
    }
4692
4693
    /**
4694
     * @param string $str
4695
     * @param int    $tabLength
4696
     *
4697
     * @return string
4698
     */
4699
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4700
    {
4701 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4702
    }
4703
4704
    /**
4705
     * Convert a string to an array of Unicode characters.
4706
     *
4707
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4708
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4709
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4710
     *
4711
     * @return array
4712
     *               <p>An array containing chunks of the input.</p>
4713
     */
4714
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4715
    {
4716 87
        if ($length <= 0) {
4717 3
            return [];
4718
        }
4719
4720 86
        if (\is_array($str) === true) {
4721 2
            foreach ($str as $k => &$v) {
4722 2
                $v = self::split($v, $length);
4723
            }
4724
4725 2
            return $str;
4726
        }
4727
4728
        // init
4729 86
        $str = (string) $str;
4730
4731 86
        if ($str === '') {
4732 13
            return [];
4733
        }
4734
4735
        // init
4736 83
        $ret = [];
4737
4738 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4739
            self::checkForSupport();
4740
        }
4741
4742 83
        if ($cleanUtf8 === true) {
4743 19
            $str = self::clean($str);
4744
        }
4745
4746 83
        if (self::$SUPPORT['pcre_utf8'] === true) {
4747 79
            \preg_match_all('/./us', $str, $retArray);
4748 79
            if (isset($retArray[0])) {
4749 79
                $ret = $retArray[0];
4750
            }
4751 79
            unset($retArray);
4752
        } else {
4753
4754
            // fallback
4755
4756 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4757
                self::checkForSupport();
4758
            }
4759
4760 8
            $len = self::strlen_in_byte($str);
4761
4762
            /** @noinspection ForeachInvariantsInspection */
4763 8
            for ($i = 0; $i < $len; ++$i) {
4764 8
                if (($str[$i] & "\x80") === "\x00") {
4765 8
                    $ret[] = $str[$i];
4766
                } elseif (
4767 8
                    isset($str[$i + 1])
4768
                    &&
4769 8
                    ($str[$i] & "\xE0") === "\xC0"
4770
                ) {
4771 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4772 4
                        $ret[] = $str[$i] . $str[$i + 1];
4773
4774 4
                        ++$i;
4775
                    }
4776
                } elseif (
4777 6
                    isset($str[$i + 2])
4778
                    &&
4779 6
                    ($str[$i] & "\xF0") === "\xE0"
4780
                ) {
4781
                    if (
4782 6
                        ($str[$i + 1] & "\xC0") === "\x80"
4783
                        &&
4784 6
                        ($str[$i + 2] & "\xC0") === "\x80"
4785
                    ) {
4786 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4787
4788 6
                        $i += 2;
4789
                    }
4790
                } elseif (
4791
                    isset($str[$i + 3])
4792
                    &&
4793
                    ($str[$i] & "\xF8") === "\xF0"
4794
                ) {
4795
                    if (
4796
                        ($str[$i + 1] & "\xC0") === "\x80"
4797
                        &&
4798
                        ($str[$i + 2] & "\xC0") === "\x80"
4799
                        &&
4800
                        ($str[$i + 3] & "\xC0") === "\x80"
4801
                    ) {
4802
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4803
4804
                        $i += 3;
4805
                    }
4806
                }
4807
            }
4808
        }
4809
4810 83
        if ($length > 1) {
4811 11
            $ret = \array_chunk($ret, $length);
4812
4813 11
            return \array_map(
4814
                static function (array $item): string {
4815 11
                    return \implode('', $item);
4816 11
                },
4817 11
                $ret
4818
            );
4819
        }
4820
4821 76
        if (isset($ret[0]) && $ret[0] === '') {
4822
            return [];
4823
        }
4824
4825 76
        return $ret;
4826
    }
4827
4828
    /**
4829
     * Returns a camelCase version of the string. Trims surrounding spaces,
4830
     * capitalizes letters following digits, spaces, dashes and underscores,
4831
     * and removes spaces, dashes, as well as underscores.
4832
     *
4833
     * @param string      $str                   <p>The input string.</p>
4834
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
4835
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4836
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4837
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4838
     *
4839
     * @return string
4840
     */
4841
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
4842
    {
4843 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4844 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4845
4846 32
        $str = (string) \preg_replace_callback(
4847 32
            '/[-_\s]+(.)?/u',
4848
            /**
4849
             * @param array $match
4850
             *
4851
             * @return string
4852
             */
4853
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4854 27
                if (isset($match[1])) {
4855 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4856
                }
4857
4858 1
                return '';
4859 32
            },
4860 32
            $str
4861
        );
4862
4863 32
        return (string) \preg_replace_callback(
4864 32
            '/[\d]+(.)?/u',
4865
            /**
4866
             * @param array $match
4867
             *
4868
             * @return string
4869
             */
4870
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4871 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4872 32
            },
4873 32
            $str
4874
        );
4875
    }
4876
4877
    /**
4878
     * Returns the string with the first letter of each word capitalized,
4879
     * except for when the word is a name which shouldn't be capitalized.
4880
     *
4881
     * @param string $str
4882
     *
4883
     * @return string string with $str capitalized
4884
     */
4885
    public static function str_capitalize_name(string $str): string
4886
    {
4887 1
        $str = self::collapse_whitespace($str);
4888
4889 1
        $str = self::str_capitalize_name_helper($str, ' ');
4890
4891 1
        return self::str_capitalize_name_helper($str, '-');
4892
    }
4893
4894
    /**
4895
     * Returns true if the string contains $needle, false otherwise. By default
4896
     * the comparison is case-sensitive, but can be made insensitive by setting
4897
     * $caseSensitive to false.
4898
     *
4899
     * @param string $haystack      <p>The input string.</p>
4900
     * @param string $needle        <p>Substring to look for.</p>
4901
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4902
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4903
     *
4904
     * @return bool whether or not $haystack contains $needle
4905
     */
4906
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4907
    {
4908 106
        if ($haystack === '' || $needle === '') {
4909 1
            return false;
4910
        }
4911
4912
        // only a fallback to prevent BC in the api ...
4913
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4914 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4915 2
            $encoding = (string) $caseSensitive;
4916
        }
4917
4918 105
        if ($caseSensitive) {
4919 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
4920
        }
4921
4922 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
4923
    }
4924
4925
    /**
4926
     * Returns true if the string contains all $needles, false otherwise. By
4927
     * default the comparison is case-sensitive, but can be made insensitive by
4928
     * setting $caseSensitive to false.
4929
     *
4930
     * @param string $haystack      <p>The input string.</p>
4931
     * @param array  $needles       <p>SubStrings to look for.</p>
4932
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4933
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4934
     *
4935
     * @return bool whether or not $haystack contains $needle
4936
     */
4937
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4938
    {
4939 44
        if ($haystack === '') {
4940
            return false;
4941
        }
4942
4943 44
        if (empty($needles)) {
4944 1
            return false;
4945
        }
4946
4947
        // only a fallback to prevent BC in the api ...
4948
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4949 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4950 1
            $encoding = (string) $caseSensitive;
4951
        }
4952
4953 43
        foreach ($needles as &$needle) {
4954 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4955 43
                return false;
4956
            }
4957
        }
4958
4959 24
        return true;
4960
    }
4961
4962
    /**
4963
     * Returns true if the string contains any $needles, false otherwise. By
4964
     * default the comparison is case-sensitive, but can be made insensitive by
4965
     * setting $caseSensitive to false.
4966
     *
4967
     * @param string $haystack      <p>The input string.</p>
4968
     * @param array  $needles       <p>SubStrings to look for.</p>
4969
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4970
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4971
     *
4972
     * @return bool
4973
     *              Whether or not $str contains $needle
4974
     */
4975
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4976
    {
4977 43
        if (empty($needles)) {
4978 1
            return false;
4979
        }
4980
4981 42
        foreach ($needles as &$needle) {
4982 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4983 42
                return true;
4984
            }
4985
        }
4986
4987 18
        return false;
4988
    }
4989
4990
    /**
4991
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
4992
     * inserted before uppercase characters (with the exception of the first
4993
     * character of the string), and in place of spaces as well as underscores.
4994
     *
4995
     * @param string $str      <p>The input string.</p>
4996
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4997
     *
4998
     * @return string
4999
     */
5000
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5001
    {
5002 19
        return self::str_delimit($str, '-', $encoding);
5003
    }
5004
5005
    /**
5006
     * Returns a lowercase and trimmed string separated by the given delimiter.
5007
     * Delimiters are inserted before uppercase characters (with the exception
5008
     * of the first character of the string), and in place of spaces, dashes,
5009
     * and underscores. Alpha delimiters are not converted to lowercase.
5010
     *
5011
     * @param string      $str                   <p>The input string.</p>
5012
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5013
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5014
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5015
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5016
     *                                           tr</p>
5017
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5018
     *                                           ß</p>
5019
     *
5020
     * @return string
5021
     */
5022
    public static function str_delimit(
5023
        string $str,
5024
        string $delimiter,
5025
        string $encoding = 'UTF-8',
5026
        bool $cleanUtf8 = false,
5027
        string $lang = null,
5028
        bool $tryToKeepStringLength = false
5029
    ): string {
5030 49
        $str = self::trim($str);
5031
5032 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
5033
5034 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5035
5036 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5037
    }
5038
5039
    /**
5040
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5041
     *
5042
     * @param string $str <p>The input string.</p>
5043
     *
5044
     * @return false|string
5045
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5046
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5047
     */
5048
    public static function str_detect_encoding($str)
5049
    {
5050
        // init
5051 30
        $str = (string) $str;
5052
5053
        //
5054
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5055
        //
5056
5057 30
        if (self::is_binary($str, true) === true) {
5058 10
            $isUtf16 = self::is_utf16($str, false);
5059 10
            if ($isUtf16 === 1) {
5060 2
                return 'UTF-16LE';
5061
            }
5062 10
            if ($isUtf16 === 2) {
5063 2
                return 'UTF-16BE';
5064
            }
5065
5066 8
            $isUtf32 = self::is_utf32($str, false);
5067 8
            if ($isUtf32 === 1) {
5068
                return 'UTF-32LE';
5069
            }
5070 8
            if ($isUtf32 === 2) {
5071
                return 'UTF-32BE';
5072
            }
5073
5074
            // is binary but not "UTF-16" or "UTF-32"
5075 8
            return false;
5076
        }
5077
5078
        //
5079
        // 2.) simple check for ASCII chars
5080
        //
5081
5082 26
        if (self::is_ascii($str) === true) {
5083 9
            return 'ASCII';
5084
        }
5085
5086
        //
5087
        // 3.) simple check for UTF-8 chars
5088
        //
5089
5090 26
        if (self::is_utf8($str) === true) {
5091 18
            return 'UTF-8';
5092
        }
5093
5094
        //
5095
        // 4.) check via "mb_detect_encoding()"
5096
        //
5097
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5098
5099
        $detectOrder = [
5100 16
            'ISO-8859-1',
5101
            'ISO-8859-2',
5102
            'ISO-8859-3',
5103
            'ISO-8859-4',
5104
            'ISO-8859-5',
5105
            'ISO-8859-6',
5106
            'ISO-8859-7',
5107
            'ISO-8859-8',
5108
            'ISO-8859-9',
5109
            'ISO-8859-10',
5110
            'ISO-8859-13',
5111
            'ISO-8859-14',
5112
            'ISO-8859-15',
5113
            'ISO-8859-16',
5114
            'WINDOWS-1251',
5115
            'WINDOWS-1252',
5116
            'WINDOWS-1254',
5117
            'CP932',
5118
            'CP936',
5119
            'CP950',
5120
            'CP866',
5121
            'CP850',
5122
            'CP51932',
5123
            'CP50220',
5124
            'CP50221',
5125
            'CP50222',
5126
            'ISO-2022-JP',
5127
            'ISO-2022-KR',
5128
            'JIS',
5129
            'JIS-ms',
5130
            'EUC-CN',
5131
            'EUC-JP',
5132
        ];
5133
5134 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5135
            self::checkForSupport();
5136
        }
5137
5138 16
        if (self::$SUPPORT['mbstring'] === true) {
5139
            // info: do not use the symfony polyfill here
5140 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5141 16
            if ($encoding) {
5142 16
                return $encoding;
5143
            }
5144
        }
5145
5146
        //
5147
        // 5.) check via "iconv()"
5148
        //
5149
5150
        if (self::$ENCODINGS === null) {
5151
            self::$ENCODINGS = self::getData('encodings');
5152
        }
5153
5154
        foreach (self::$ENCODINGS as $encodingTmp) {
5155
            // INFO: //IGNORE but still throw notice
5156
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5157
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5158
                return $encodingTmp;
5159
            }
5160
        }
5161
5162
        return false;
5163
    }
5164
5165
    /**
5166
     * Check if the string ends with the given substring.
5167
     *
5168
     * @param string $haystack <p>The string to search in.</p>
5169
     * @param string $needle   <p>The substring to search for.</p>
5170
     *
5171
     * @return bool
5172
     */
5173
    public static function str_ends_with(string $haystack, string $needle): bool
5174
    {
5175 40
        if ($haystack === '' || $needle === '') {
5176 4
            return false;
5177
        }
5178
5179 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5180
    }
5181
5182
    /**
5183
     * Returns true if the string ends with any of $substrings, false otherwise.
5184
     *
5185
     * - case-sensitive
5186
     *
5187
     * @param string   $str        <p>The input string.</p>
5188
     * @param string[] $substrings <p>Substrings to look for.</p>
5189
     *
5190
     * @return bool whether or not $str ends with $substring
5191
     */
5192
    public static function str_ends_with_any(string $str, array $substrings): bool
5193
    {
5194 7
        if (empty($substrings)) {
5195
            return false;
5196
        }
5197
5198 7
        foreach ($substrings as &$substring) {
5199 7
            if (self::str_ends_with($str, $substring)) {
5200 7
                return true;
5201
            }
5202
        }
5203
5204 6
        return false;
5205
    }
5206
5207
    /**
5208
     * Ensures that the string begins with $substring. If it doesn't, it's
5209
     * prepended.
5210
     *
5211
     * @param string $str       <p>The input string.</p>
5212
     * @param string $substring <p>The substring to add if not present.</p>
5213
     *
5214
     * @return string
5215
     */
5216
    public static function str_ensure_left(string $str, string $substring): string
5217
    {
5218 10
        if (!self::str_starts_with($str, $substring)) {
5219 4
            $str = $substring . $str;
5220
        }
5221
5222 10
        return $str;
5223
    }
5224
5225
    /**
5226
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5227
     *
5228
     * @param string $str       <p>The input string.</p>
5229
     * @param string $substring <p>The substring to add if not present.</p>
5230
     *
5231
     * @return string
5232
     */
5233
    public static function str_ensure_right(string $str, string $substring): string
5234
    {
5235 10
        if (!self::str_ends_with($str, $substring)) {
5236 4
            $str .= $substring;
5237
        }
5238
5239 10
        return $str;
5240
    }
5241
5242
    /**
5243
     * Capitalizes the first word of the string, replaces underscores with
5244
     * spaces, and strips '_id'.
5245
     *
5246
     * @param string $str
5247
     *
5248
     * @return string
5249
     */
5250
    public static function str_humanize($str): string
5251
    {
5252 3
        $str = self::str_replace(
5253
            [
5254 3
                '_id',
5255
                '_',
5256
            ],
5257
            [
5258 3
                '',
5259
                ' ',
5260
            ],
5261 3
            $str
5262
        );
5263
5264 3
        return self::ucfirst(self::trim($str));
5265
    }
5266
5267
    /**
5268
     * Check if the string ends with the given substring, case insensitive.
5269
     *
5270
     * @param string $haystack <p>The string to search in.</p>
5271
     * @param string $needle   <p>The substring to search for.</p>
5272
     *
5273
     * @return bool
5274
     */
5275
    public static function str_iends_with(string $haystack, string $needle): bool
5276
    {
5277 12
        if ($haystack === '' || $needle === '') {
5278 2
            return false;
5279
        }
5280
5281 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5282
    }
5283
5284
    /**
5285
     * Returns true if the string ends with any of $substrings, false otherwise.
5286
     *
5287
     * - case-insensitive
5288
     *
5289
     * @param string   $str        <p>The input string.</p>
5290
     * @param string[] $substrings <p>Substrings to look for.</p>
5291
     *
5292
     * @return bool whether or not $str ends with $substring
5293
     */
5294
    public static function str_iends_with_any(string $str, array $substrings): bool
5295
    {
5296 4
        if (empty($substrings)) {
5297
            return false;
5298
        }
5299
5300 4
        foreach ($substrings as &$substring) {
5301 4
            if (self::str_iends_with($str, $substring)) {
5302 4
                return true;
5303
            }
5304
        }
5305
5306
        return false;
5307
    }
5308
5309
    /**
5310
     * Returns the index of the first occurrence of $needle in the string,
5311
     * and false if not found. Accepts an optional offset from which to begin
5312
     * the search.
5313
     *
5314
     * @param string $str      <p>The input string.</p>
5315
     * @param string $needle   <p>Substring to look for.</p>
5316
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5317
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5318
     *
5319
     * @return false|int
5320
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5321
     */
5322
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5323
    {
5324 2
        return self::stripos(
5325 2
            $str,
5326 2
            $needle,
5327 2
            $offset,
5328 2
            $encoding
5329
        );
5330
    }
5331
5332
    /**
5333
     * Returns the index of the last occurrence of $needle in the string,
5334
     * and false if not found. Accepts an optional offset from which to begin
5335
     * the search. Offsets may be negative to count from the last character
5336
     * in the string.
5337
     *
5338
     * @param string $str      <p>The input string.</p>
5339
     * @param string $needle   <p>Substring to look for.</p>
5340
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5341
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5342
     *
5343
     * @return false|int
5344
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5345
     */
5346
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5347
    {
5348 2
        return self::strripos(
5349 2
            $str,
5350 2
            $needle,
5351 2
            $offset,
5352 2
            $encoding
5353
        );
5354
    }
5355
5356
    /**
5357
     * Returns the index of the first occurrence of $needle in the string,
5358
     * and false if not found. Accepts an optional offset from which to begin
5359
     * the search.
5360
     *
5361
     * @param string $str      <p>The input string.</p>
5362
     * @param string $needle   <p>Substring to look for.</p>
5363
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5364
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5365
     *
5366
     * @return false|int
5367
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5368
     */
5369
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5370
    {
5371 12
        return self::strpos(
5372 12
            $str,
5373 12
            $needle,
5374 12
            $offset,
5375 12
            $encoding
5376
        );
5377
    }
5378
5379
    /**
5380
     * Returns the index of the last occurrence of $needle in the string,
5381
     * and false if not found. Accepts an optional offset from which to begin
5382
     * the search. Offsets may be negative to count from the last character
5383
     * in the string.
5384
     *
5385
     * @param string $str      <p>The input string.</p>
5386
     * @param string $needle   <p>Substring to look for.</p>
5387
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5388
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5389
     *
5390
     * @return false|int
5391
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5392
     */
5393
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5394
    {
5395 12
        return self::strrpos(
5396 12
            $str,
5397 12
            $needle,
5398 12
            $offset,
5399 12
            $encoding
5400
        );
5401
    }
5402
5403
    /**
5404
     * Inserts $substring into the string at the $index provided.
5405
     *
5406
     * @param string $str       <p>The input string.</p>
5407
     * @param string $substring <p>String to be inserted.</p>
5408
     * @param int    $index     <p>The index at which to insert the substring.</p>
5409
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5410
     *
5411
     * @return string
5412
     */
5413
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5414
    {
5415 8
        $len = (int) self::strlen($str, $encoding);
5416
5417 8
        if ($index > $len) {
5418 1
            return $str;
5419
        }
5420
5421 7
        return (string) self::substr($str, 0, $index, $encoding) .
5422 7
               $substring .
5423 7
               (string) self::substr($str, $index, $len, $encoding);
5424
    }
5425
5426
    /**
5427
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5428
     *
5429
     * @see  http://php.net/manual/en/function.str-ireplace.php
5430
     *
5431
     * @param mixed $search  <p>
5432
     *                       Every replacement with search array is
5433
     *                       performed on the result of previous replacement.
5434
     *                       </p>
5435
     * @param mixed $replace <p>
5436
     *                       </p>
5437
     * @param mixed $subject <p>
5438
     *                       If subject is an array, then the search and
5439
     *                       replace is performed with every entry of
5440
     *                       subject, and the return value is an array as
5441
     *                       well.
5442
     *                       </p>
5443
     * @param int   $count   [optional] <p>
5444
     *                       The number of matched and replaced needles will
5445
     *                       be returned in count which is passed by
5446
     *                       reference.
5447
     *                       </p>
5448
     *
5449
     * @return mixed a string or an array of replacements
5450
     */
5451
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5452
    {
5453 29
        $search = (array) $search;
5454
5455
        /** @noinspection AlterInForeachInspection */
5456 29
        foreach ($search as &$s) {
5457 29
            $s = (string) $s;
5458 29
            if ($s === '') {
5459 6
                $s = '/^(?<=.)$/';
5460
            } else {
5461 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5462
            }
5463
        }
5464
5465 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5466 29
        $count = $replace; // used as reference parameter
5467
5468 29
        return $subject;
5469
    }
5470
5471
    /**
5472
     * Replaces $search from the beginning of string with $replacement.
5473
     *
5474
     * @param string $str         <p>The input string.</p>
5475
     * @param string $search      <p>The string to search for.</p>
5476
     * @param string $replacement <p>The replacement.</p>
5477
     *
5478
     * @return string string after the replacements
5479
     */
5480
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5481
    {
5482 17
        if ($str === '') {
5483 4
            if ($replacement === '') {
5484 2
                return '';
5485
            }
5486
5487 2
            if ($search === '') {
5488 2
                return $replacement;
5489
            }
5490
        }
5491
5492 13
        if ($search === '') {
5493 2
            return $str . $replacement;
5494
        }
5495
5496 11
        if (\stripos($str, $search) === 0) {
5497 10
            return $replacement . \substr($str, \strlen($search));
5498
        }
5499
5500 1
        return $str;
5501
    }
5502
5503
    /**
5504
     * Replaces $search from the ending of string with $replacement.
5505
     *
5506
     * @param string $str         <p>The input string.</p>
5507
     * @param string $search      <p>The string to search for.</p>
5508
     * @param string $replacement <p>The replacement.</p>
5509
     *
5510
     * @return string string after the replacements
5511
     */
5512
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5513
    {
5514 17
        if ($str === '') {
5515 4
            if ($replacement === '') {
5516 2
                return '';
5517
            }
5518
5519 2
            if ($search === '') {
5520 2
                return $replacement;
5521
            }
5522
        }
5523
5524 13
        if ($search === '') {
5525 2
            return $str . $replacement;
5526
        }
5527
5528 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5529 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5530
        }
5531
5532 11
        return $str;
5533
    }
5534
5535
    /**
5536
     * Check if the string starts with the given substring, case insensitive.
5537
     *
5538
     * @param string $haystack <p>The string to search in.</p>
5539
     * @param string $needle   <p>The substring to search for.</p>
5540
     *
5541
     * @return bool
5542
     */
5543
    public static function str_istarts_with(string $haystack, string $needle): bool
5544
    {
5545 12
        if ($haystack === '' || $needle === '') {
5546 2
            return false;
5547
        }
5548
5549 12
        return self::stripos($haystack, $needle) === 0;
5550
    }
5551
5552
    /**
5553
     * Returns true if the string begins with any of $substrings, false otherwise.
5554
     *
5555
     * - case-insensitive
5556
     *
5557
     * @param string $str        <p>The input string.</p>
5558
     * @param array  $substrings <p>Substrings to look for.</p>
5559
     *
5560
     * @return bool whether or not $str starts with $substring
5561
     */
5562
    public static function str_istarts_with_any(string $str, array $substrings): bool
5563
    {
5564 4
        if ($str === '') {
5565
            return false;
5566
        }
5567
5568 4
        if (empty($substrings)) {
5569
            return false;
5570
        }
5571
5572 4
        foreach ($substrings as &$substring) {
5573 4
            if (self::str_istarts_with($str, $substring)) {
5574 4
                return true;
5575
            }
5576
        }
5577
5578
        return false;
5579
    }
5580
5581
    /**
5582
     * Gets the substring after the first occurrence of a separator.
5583
     *
5584
     * @param string $str       <p>The input string.</p>
5585
     * @param string $separator <p>The string separator.</p>
5586
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5587
     *
5588
     * @return string
5589
     */
5590
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5591
    {
5592
        if (
5593 1
            $separator === ''
5594
            ||
5595 1
            $str === ''
5596
        ) {
5597 1
            return '';
5598
        }
5599
5600 1
        $offset = self::str_iindex_first($str, $separator);
5601 1
        if ($offset === false) {
5602 1
            return '';
5603
        }
5604
5605 1
        return (string) self::substr(
5606 1
            $str,
5607 1
            $offset + (int) self::strlen($separator, $encoding),
5608 1
            null,
5609 1
            $encoding
5610
        );
5611
    }
5612
5613
    /**
5614
     * Gets the substring after the last occurrence of a separator.
5615
     *
5616
     * @param string $str       <p>The input string.</p>
5617
     * @param string $separator <p>The string separator.</p>
5618
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5619
     *
5620
     * @return string
5621
     */
5622
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5623
    {
5624
        if (
5625 1
            $separator === ''
5626
            ||
5627 1
            $str === ''
5628
        ) {
5629 1
            return '';
5630
        }
5631
5632 1
        $offset = self::str_iindex_last($str, $separator);
5633 1
        if ($offset === false) {
5634 1
            return '';
5635
        }
5636
5637 1
        return (string) self::substr(
5638 1
            $str,
5639 1
            $offset + (int) self::strlen($separator, $encoding),
5640 1
            null,
5641 1
            $encoding
5642
        );
5643
    }
5644
5645
    /**
5646
     * Gets the substring before the first occurrence of a separator.
5647
     *
5648
     * @param string $str       <p>The input string.</p>
5649
     * @param string $separator <p>The string separator.</p>
5650
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5651
     *
5652
     * @return string
5653
     */
5654
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5655
    {
5656
        if (
5657 1
            $separator === ''
5658
            ||
5659 1
            $str === ''
5660
        ) {
5661 1
            return '';
5662
        }
5663
5664 1
        $offset = self::str_iindex_first($str, $separator);
5665 1
        if ($offset === false) {
5666 1
            return '';
5667
        }
5668
5669 1
        return (string) self::substr($str, 0, $offset, $encoding);
5670
    }
5671
5672
    /**
5673
     * Gets the substring before the last occurrence of a separator.
5674
     *
5675
     * @param string $str       <p>The input string.</p>
5676
     * @param string $separator <p>The string separator.</p>
5677
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5678
     *
5679
     * @return string
5680
     */
5681
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5682
    {
5683
        if (
5684 1
            $separator === ''
5685
            ||
5686 1
            $str === ''
5687
        ) {
5688 1
            return '';
5689
        }
5690
5691 1
        $offset = self::str_iindex_last($str, $separator);
5692 1
        if ($offset === false) {
5693 1
            return '';
5694
        }
5695
5696 1
        return (string) self::substr($str, 0, $offset, $encoding);
5697
    }
5698
5699
    /**
5700
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5701
     *
5702
     * @param string $str          <p>The input string.</p>
5703
     * @param string $needle       <p>The string to look for.</p>
5704
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5705
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5706
     *
5707
     * @return string
5708
     */
5709
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5710
    {
5711
        if (
5712 2
            $needle === ''
5713
            ||
5714 2
            $str === ''
5715
        ) {
5716 2
            return '';
5717
        }
5718
5719 2
        $part = self::stristr(
5720 2
            $str,
5721 2
            $needle,
5722 2
            $beforeNeedle,
5723 2
            $encoding
5724
        );
5725 2
        if ($part === false) {
5726 2
            return '';
5727
        }
5728
5729 2
        return $part;
5730
    }
5731
5732
    /**
5733
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5734
     *
5735
     * @param string $str          <p>The input string.</p>
5736
     * @param string $needle       <p>The string to look for.</p>
5737
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5738
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5739
     *
5740
     * @return string
5741
     */
5742
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5743
    {
5744
        if (
5745 1
            $needle === ''
5746
            ||
5747 1
            $str === ''
5748
        ) {
5749 1
            return '';
5750
        }
5751
5752 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5753 1
        if ($part === false) {
5754 1
            return '';
5755
        }
5756
5757 1
        return $part;
5758
    }
5759
5760
    /**
5761
     * Returns the last $n characters of the string.
5762
     *
5763
     * @param string $str      <p>The input string.</p>
5764
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5765
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5766
     *
5767
     * @return string
5768
     */
5769
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5770
    {
5771 12
        if ($n <= 0) {
5772 4
            return '';
5773
        }
5774
5775 8
        return (string) self::substr($str, -$n, null, $encoding);
5776
    }
5777
5778
    /**
5779
     * Limit the number of characters in a string.
5780
     *
5781
     * @param string $str      <p>The input string.</p>
5782
     * @param int    $length   [optional] <p>Default: 100</p>
5783
     * @param string $strAddOn [optional] <p>Default: …</p>
5784
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5785
     *
5786
     * @return string
5787
     */
5788
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5789
    {
5790 2
        if ($str === '') {
5791 2
            return '';
5792
        }
5793
5794 2
        if ($length <= 0) {
5795 2
            return '';
5796
        }
5797
5798 2
        if ((int) self::strlen($str, $encoding) <= $length) {
5799 2
            return $str;
5800
        }
5801
5802 2
        return (string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding) . $strAddOn;
5803
    }
5804
5805
    /**
5806
     * Limit the number of characters in a string, but also after the next word.
5807
     *
5808
     * @param string $str      <p>The input string.</p>
5809
     * @param int    $length   [optional] <p>Default: 100</p>
5810
     * @param string $strAddOn [optional] <p>Default: …</p>
5811
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5812
     *
5813
     * @return string
5814
     */
5815
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5816
    {
5817 6
        if ($str === '') {
5818 2
            return '';
5819
        }
5820
5821 6
        if ($length <= 0) {
5822 2
            return '';
5823
        }
5824
5825 6
        if ((int) self::strlen($str, $encoding) <= $length) {
5826 2
            return $str;
5827
        }
5828
5829 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5830 5
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5831
        }
5832
5833 3
        $str = self::substr($str, 0, $length, $encoding);
5834 3
        if ($str === false) {
5835
            return '' . $strAddOn;
5836
        }
5837
5838 3
        $array = \explode(' ', $str);
5839 3
        \array_pop($array);
5840 3
        $new_str = \implode(' ', $array);
5841
5842 3
        if ($new_str === '') {
5843 2
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5844
        }
5845
5846 3
        return $new_str . $strAddOn;
5847
    }
5848
5849
    /**
5850
     * Returns the longest common prefix between the string and $otherStr.
5851
     *
5852
     * @param string $str      <p>The input sting.</p>
5853
     * @param string $otherStr <p>Second string for comparison.</p>
5854
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5855
     *
5856
     * @return string
5857
     */
5858
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5859
    {
5860 10
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5861
5862 10
        $longestCommonPrefix = '';
5863 10
        for ($i = 0; $i < $maxLength; ++$i) {
5864 8
            $char = self::substr($str, $i, 1, $encoding);
5865
5866
            if (
5867 8
                $char !== false
5868
                &&
5869 8
                $char === self::substr($otherStr, $i, 1, $encoding)
5870
            ) {
5871 6
                $longestCommonPrefix .= $char;
5872
            } else {
5873 6
                break;
5874
            }
5875
        }
5876
5877 10
        return $longestCommonPrefix;
5878
    }
5879
5880
    /**
5881
     * Returns the longest common substring between the string and $otherStr.
5882
     * In the case of ties, it returns that which occurs first.
5883
     *
5884
     * @param string $str
5885
     * @param string $otherStr <p>Second string for comparison.</p>
5886
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5887
     *
5888
     * @return string string with its $str being the longest common substring
5889
     */
5890
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5891
    {
5892
        // Uses dynamic programming to solve
5893
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5894 11
        $strLength = (int) self::strlen($str, $encoding);
5895 11
        $otherLength = (int) self::strlen($otherStr, $encoding);
5896
5897
        // Return if either string is empty
5898 11
        if ($strLength === 0 || $otherLength === 0) {
5899 2
            return '';
5900
        }
5901
5902 9
        $len = 0;
5903 9
        $end = 0;
5904 9
        $table = \array_fill(
5905 9
            0,
5906 9
            $strLength + 1,
5907 9
            \array_fill(0, $otherLength + 1, 0)
5908
        );
5909
5910 9
        for ($i = 1; $i <= $strLength; ++$i) {
5911 9
            for ($j = 1; $j <= $otherLength; ++$j) {
5912 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
5913 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5914
5915 9
                if ($strChar === $otherChar) {
5916 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5917 8
                    if ($table[$i][$j] > $len) {
5918 8
                        $len = $table[$i][$j];
5919 8
                        $end = $i;
5920
                    }
5921
                } else {
5922 9
                    $table[$i][$j] = 0;
5923
                }
5924
            }
5925
        }
5926
5927 9
        return (string) self::substr($str, $end - $len, $len, $encoding);
5928
    }
5929
5930
    /**
5931
     * Returns the longest common suffix between the string and $otherStr.
5932
     *
5933
     * @param string $str
5934
     * @param string $otherStr <p>Second string for comparison.</p>
5935
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5936
     *
5937
     * @return string
5938
     */
5939
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5940
    {
5941 10
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5942
5943 10
        $longestCommonSuffix = '';
5944 10
        for ($i = 1; $i <= $maxLength; ++$i) {
5945 8
            $char = self::substr($str, -$i, 1, $encoding);
5946
5947
            if (
5948 8
                $char !== false
5949
                &&
5950 8
                $char === self::substr($otherStr, -$i, 1, $encoding)
5951
            ) {
5952 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
5953
            } else {
5954 6
                break;
5955
            }
5956
        }
5957
5958 10
        return $longestCommonSuffix;
5959
    }
5960
5961
    /**
5962
     * Returns true if $str matches the supplied pattern, false otherwise.
5963
     *
5964
     * @param string $str     <p>The input string.</p>
5965
     * @param string $pattern <p>Regex pattern to match against.</p>
5966
     *
5967
     * @return bool whether or not $str matches the pattern
5968
     */
5969
    public static function str_matches_pattern(string $str, string $pattern): bool
5970
    {
5971 126
        return (bool) \preg_match('/' . $pattern . '/u', $str);
5972
    }
5973
5974
    /**
5975
     * Returns whether or not a character exists at an index. Offsets may be
5976
     * negative to count from the last character in the string. Implements
5977
     * part of the ArrayAccess interface.
5978
     *
5979
     * @param string $str      <p>The input string.</p>
5980
     * @param int    $offset   <p>The index to check.</p>
5981
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5982
     *
5983
     * @return bool whether or not the index exists
5984
     */
5985
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5986
    {
5987
        // init
5988 6
        $length = (int) self::strlen($str, $encoding);
5989
5990 6
        if ($offset >= 0) {
5991 3
            return $length > $offset;
5992
        }
5993
5994 3
        return $length >= \abs($offset);
5995
    }
5996
5997
    /**
5998
     * Returns the character at the given index. Offsets may be negative to
5999
     * count from the last character in the string. Implements part of the
6000
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6001
     * does not exist.
6002
     *
6003
     * @param string $str      <p>The input string.</p>
6004
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6005
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6006
     *
6007
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6008
     *
6009
     * @return string the character at the specified index
6010
     */
6011
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6012
    {
6013
        // init
6014 2
        $length = (int) self::strlen($str);
6015
6016
        if (
6017 2
            ($index >= 0 && $length <= $index)
6018
            ||
6019 2
            $length < \abs($index)
6020
        ) {
6021 1
            throw new \OutOfBoundsException('No character exists at the index');
6022
        }
6023
6024 1
        return self::char_at($str, $index, $encoding);
6025
    }
6026
6027
    /**
6028
     * Pad a UTF-8 string to given length with another string.
6029
     *
6030
     * @param string     $str        <p>The input string.</p>
6031
     * @param int        $pad_length <p>The length of return string.</p>
6032
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6033
     * @param int|string $pad_type   [optional] <p>
6034
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6035
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6036
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6037
     *                               </p>
6038
     * @param string     $encoding   [optional] <p>Default: UTF-8</p>
6039
     *
6040
     * @return string returns the padded string
6041
     */
6042
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6043
    {
6044 41
        if ($str === '') {
6045
            return '';
6046
        }
6047
6048 41
        if ($pad_type !== (int) $pad_type) {
6049 13
            if ($pad_type === 'left') {
6050 3
                $pad_type = \STR_PAD_LEFT;
6051 10
            } elseif ($pad_type === 'right') {
6052 6
                $pad_type = \STR_PAD_RIGHT;
6053 4
            } elseif ($pad_type === 'both') {
6054 3
                $pad_type = \STR_PAD_BOTH;
6055
            } else {
6056 1
                throw new \InvalidArgumentException(
6057 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6058
                );
6059
            }
6060
        }
6061
6062 40
        $str_length = (int) self::strlen($str, $encoding);
6063
6064
        if (
6065 40
            $pad_length > 0
6066
            &&
6067 40
            $pad_length >= $str_length
6068
        ) {
6069 39
            $ps_length = (int) self::strlen($pad_string, $encoding);
6070
6071 39
            $diff = ($pad_length - $str_length);
6072
6073
            switch ($pad_type) {
6074 39
                case \STR_PAD_LEFT:
6075 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6076 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6077 13
                    $post = '';
6078
6079 13
                    break;
6080
6081 29
                case \STR_PAD_BOTH:
6082 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6083 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6084 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6085 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6086
6087 14
                    break;
6088
6089 18
                case \STR_PAD_RIGHT:
6090
                default:
6091 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6092 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6093 18
                    $pre = '';
6094
            }
6095
6096 39
            return $pre . $str . $post;
6097
        }
6098
6099 4
        return $str;
6100
    }
6101
6102
    /**
6103
     * Returns a new string of a given length such that both sides of the
6104
     * string are padded. Alias for pad() with a $padType of 'both'.
6105
     *
6106
     * @param string $str
6107
     * @param int    $length   <p>Desired string length after padding.</p>
6108
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6109
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6110
     *
6111
     * @return string string with padding applied
6112
     */
6113
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6114
    {
6115 11
        $padding = $length - (int) self::strlen($str, $encoding);
6116
6117 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6118
    }
6119
6120
    /**
6121
     * Returns a new string of a given length such that the beginning of the
6122
     * string is padded. Alias for pad() with a $padType of 'left'.
6123
     *
6124
     * @param string $str
6125
     * @param int    $length   <p>Desired string length after padding.</p>
6126
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6127
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6128
     *
6129
     * @return string string with left padding
6130
     */
6131
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6132
    {
6133 7
        return self::apply_padding($str, $length - (int) self::strlen($str), 0, $padStr, $encoding);
6134
    }
6135
6136
    /**
6137
     * Returns a new string of a given length such that the end of the string
6138
     * is padded. Alias for pad() with a $padType of 'right'.
6139
     *
6140
     * @param string $str
6141
     * @param int    $length   <p>Desired string length after padding.</p>
6142
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6143
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6144
     *
6145
     * @return string string with right padding
6146
     */
6147
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6148
    {
6149 7
        return self::apply_padding($str, 0, $length - (int) self::strlen($str), $padStr, $encoding);
6150
    }
6151
6152
    /**
6153
     * Repeat a string.
6154
     *
6155
     * @param string $str        <p>
6156
     *                           The string to be repeated.
6157
     *                           </p>
6158
     * @param int    $multiplier <p>
6159
     *                           Number of time the input string should be
6160
     *                           repeated.
6161
     *                           </p>
6162
     *                           <p>
6163
     *                           multiplier has to be greater than or equal to 0.
6164
     *                           If the multiplier is set to 0, the function
6165
     *                           will return an empty string.
6166
     *                           </p>
6167
     *
6168
     * @return string the repeated string
6169
     */
6170
    public static function str_repeat(string $str, int $multiplier): string
6171
    {
6172 9
        $str = self::filter($str);
6173
6174 9
        return \str_repeat($str, $multiplier);
6175
    }
6176
6177
    /**
6178
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6179
     *
6180
     * Replace all occurrences of the search string with the replacement string
6181
     *
6182
     * @see http://php.net/manual/en/function.str-replace.php
6183
     *
6184
     * @param mixed $search  <p>
6185
     *                       The value being searched for, otherwise known as the needle.
6186
     *                       An array may be used to designate multiple needles.
6187
     *                       </p>
6188
     * @param mixed $replace <p>
6189
     *                       The replacement value that replaces found search
6190
     *                       values. An array may be used to designate multiple replacements.
6191
     *                       </p>
6192
     * @param mixed $subject <p>
6193
     *                       The string or array being searched and replaced on,
6194
     *                       otherwise known as the haystack.
6195
     *                       </p>
6196
     *                       <p>
6197
     *                       If subject is an array, then the search and
6198
     *                       replace is performed with every entry of
6199
     *                       subject, and the return value is an array as
6200
     *                       well.
6201
     *                       </p>
6202
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6203
     *
6204
     * @return mixed this function returns a string or an array with the replaced values
6205
     */
6206
    public static function str_replace($search, $replace, $subject, int &$count = null)
6207
    {
6208
        /** @psalm-suppress PossiblyNullArgument */
6209 60
        return \str_replace($search, $replace, $subject, $count);
6210
    }
6211
6212
    /**
6213
     * Replaces $search from the beginning of string with $replacement.
6214
     *
6215
     * @param string $str         <p>The input string.</p>
6216
     * @param string $search      <p>The string to search for.</p>
6217
     * @param string $replacement <p>The replacement.</p>
6218
     *
6219
     * @return string string after the replacements
6220
     */
6221
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6222
    {
6223 17
        if ($str === '') {
6224 4
            if ($replacement === '') {
6225 2
                return '';
6226
            }
6227
6228 2
            if ($search === '') {
6229 2
                return $replacement;
6230
            }
6231
        }
6232
6233 13
        if ($search === '') {
6234 2
            return $str . $replacement;
6235
        }
6236
6237 11
        if (\strpos($str, $search) === 0) {
6238 9
            return $replacement . \substr($str, \strlen($search));
6239
        }
6240
6241 2
        return $str;
6242
    }
6243
6244
    /**
6245
     * Replaces $search from the ending of string with $replacement.
6246
     *
6247
     * @param string $str         <p>The input string.</p>
6248
     * @param string $search      <p>The string to search for.</p>
6249
     * @param string $replacement <p>The replacement.</p>
6250
     *
6251
     * @return string string after the replacements
6252
     */
6253
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6254
    {
6255 17
        if ($str === '') {
6256 4
            if ($replacement === '') {
6257 2
                return '';
6258
            }
6259
6260 2
            if ($search === '') {
6261 2
                return $replacement;
6262
            }
6263
        }
6264
6265 13
        if ($search === '') {
6266 2
            return $str . $replacement;
6267
        }
6268
6269 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6270 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6271
        }
6272
6273 11
        return $str;
6274
    }
6275
6276
    /**
6277
     * Replace the first "$search"-term with the "$replace"-term.
6278
     *
6279
     * @param string $search
6280
     * @param string $replace
6281
     * @param string $subject
6282
     *
6283
     * @return string
6284
     *
6285
     * @psalm-suppress InvalidReturnType
6286
     */
6287
    public static function str_replace_first(string $search, string $replace, string $subject): string
6288
    {
6289 2
        $pos = self::strpos($subject, $search);
6290 2
        if ($pos !== false) {
6291
            /** @psalm-suppress InvalidReturnStatement */
6292 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6293
        }
6294
6295 2
        return $subject;
6296
    }
6297
6298
    /**
6299
     * Replace the last "$search"-term with the "$replace"-term.
6300
     *
6301
     * @param string $search
6302
     * @param string $replace
6303
     * @param string $subject
6304
     *
6305
     * @return string
6306
     *
6307
     * @psalm-suppress InvalidReturnType
6308
     */
6309
    public static function str_replace_last(string $search, string $replace, string $subject): string
6310
    {
6311 2
        $pos = self::strrpos($subject, $search);
6312 2
        if ($pos !== false) {
6313
            /** @psalm-suppress InvalidReturnStatement */
6314 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6315
        }
6316
6317 2
        return $subject;
6318
    }
6319
6320
    /**
6321
     * Shuffles all the characters in the string.
6322
     *
6323
     * PS: uses random algorithm which is weak for cryptography purposes
6324
     *
6325
     * @param string $str <p>The input string</p>
6326
     *
6327
     * @return string the shuffled string
6328
     */
6329
    public static function str_shuffle(string $str): string
6330
    {
6331 5
        $indexes = \range(0, (int) self::strlen($str) - 1);
6332
        /** @noinspection NonSecureShuffleUsageInspection */
6333 5
        \shuffle($indexes);
6334
6335 5
        $shuffledStr = '';
6336 5
        foreach ($indexes as &$i) {
6337 5
            $tmpSubStr = self::substr($str, $i, 1);
6338 5
            if ($tmpSubStr !== false) {
6339 5
                $shuffledStr .= $tmpSubStr;
6340
            }
6341
        }
6342
6343 5
        return $shuffledStr;
6344
    }
6345
6346
    /**
6347
     * Returns the substring beginning at $start, and up to, but not including
6348
     * the index specified by $end. If $end is omitted, the function extracts
6349
     * the remaining string. If $end is negative, it is computed from the end
6350
     * of the string.
6351
     *
6352
     * @param string $str
6353
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6354
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6355
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6356
     *
6357
     * @return false|string
6358
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6359
     *                      characters long, <b>FALSE</b> will be returned.
6360
     */
6361
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6362
    {
6363 18
        if ($end === null) {
6364 6
            $length = (int) self::strlen($str);
6365 12
        } elseif ($end >= 0 && $end <= $start) {
6366 4
            return '';
6367 8
        } elseif ($end < 0) {
6368 2
            $length = (int) self::strlen($str) + $end - $start;
6369
        } else {
6370 6
            $length = $end - $start;
6371
        }
6372
6373 14
        return self::substr($str, $start, $length, $encoding);
6374
    }
6375
6376
    /**
6377
     * Convert a string to e.g.: "snake_case"
6378
     *
6379
     * @param string $str
6380
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6381
     *
6382
     * @return string string in snake_case
6383
     */
6384
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6385
    {
6386 20
        $str = self::normalize_whitespace($str);
6387 20
        $str = \str_replace('-', '_', $str);
6388
6389 20
        $str = (string) \preg_replace_callback(
6390 20
            '/([\d|A-Z])/u',
6391
            /**
6392
             * @param string[] $matches
6393
             *
6394
             * @return string
6395
             */
6396
            static function (array $matches) use ($encoding): string {
6397 8
                $match = $matches[1];
6398 8
                $matchInt = (int) $match;
6399
6400 8
                if ((string) $matchInt === $match) {
6401 4
                    return '_' . $match . '_';
6402
                }
6403
6404 4
                return '_' . self::strtolower($match, $encoding);
6405 20
            },
6406 20
            $str
6407
        );
6408
6409 20
        $str = (string) \preg_replace(
6410
            [
6411 20
                '/\s+/',        // convert spaces to "_"
6412
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6413
                '/_+/',         // remove double "_"
6414
            ],
6415
            [
6416 20
                '_',
6417
                '',
6418
                '_',
6419
            ],
6420 20
            $str
6421
        );
6422
6423 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6424
6425 20
        return self::trim($str); // trim leading & trailing whitespace
6426
    }
6427
6428
    /**
6429
     * Sort all characters according to code points.
6430
     *
6431
     * @param string $str    <p>A UTF-8 string.</p>
6432
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6433
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6434
     *
6435
     * @return string string of sorted characters
6436
     */
6437
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6438
    {
6439 2
        $array = self::codepoints($str);
6440
6441 2
        if ($unique) {
6442 2
            $array = \array_flip(\array_flip($array));
6443
        }
6444
6445 2
        if ($desc) {
6446 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6446
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6447
        } else {
6448 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6448
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6449
        }
6450
6451 2
        return self::string($array);
6452
    }
6453
6454
    /**
6455
     * alias for "UTF8::split()"
6456
     *
6457
     * @see UTF8::split()
6458
     *
6459
     * @param string|string[] $str
6460
     * @param int             $len
6461
     *
6462
     * @return string[]
6463
     */
6464
    public static function str_split($str, int $len = 1): array
6465
    {
6466 25
        return self::split($str, $len);
6467
    }
6468
6469
    /**
6470
     * Splits the string with the provided regular expression, returning an
6471
     * array of Stringy objects. An optional integer $limit will truncate the
6472
     * results.
6473
     *
6474
     * @param string $str
6475
     * @param string $pattern <p>The regex with which to split the string.</p>
6476
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6477
     *
6478
     * @return string[] an array of strings
6479
     */
6480
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6481
    {
6482 16
        if ($limit === 0) {
6483 2
            return [];
6484
        }
6485
6486
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6487
        // and current versions of HHVM (3.8 and below)
6488 14
        if ($pattern === '') {
6489 1
            return [$str];
6490
        }
6491
6492
        // this->split returns the remaining unsplit string in the last index when
6493
        // supplying a limit
6494 13
        if ($limit > 0) {
6495 8
            ++$limit;
6496
        } else {
6497 5
            $limit = -1;
6498
        }
6499
6500 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6501
6502 13
        if ($array === false) {
6503
            return [];
6504
        }
6505
6506 13
        if ($limit > 0 && \count($array) === $limit) {
6507 4
            \array_pop($array);
6508
        }
6509
6510 13
        return $array;
6511
    }
6512
6513
    /**
6514
     * Check if the string starts with the given substring.
6515
     *
6516
     * @param string $haystack <p>The string to search in.</p>
6517
     * @param string $needle   <p>The substring to search for.</p>
6518
     *
6519
     * @return bool
6520
     */
6521
    public static function str_starts_with(string $haystack, string $needle): bool
6522
    {
6523 41
        if ($haystack === '' || $needle === '') {
6524 4
            return false;
6525
        }
6526
6527 39
        return \strpos($haystack, $needle) === 0;
6528
    }
6529
6530
    /**
6531
     * Returns true if the string begins with any of $substrings, false otherwise.
6532
     *
6533
     * - case-sensitive
6534
     *
6535
     * @param string $str        <p>The input string.</p>
6536
     * @param array  $substrings <p>Substrings to look for.</p>
6537
     *
6538
     * @return bool whether or not $str starts with $substring
6539
     */
6540
    public static function str_starts_with_any(string $str, array $substrings): bool
6541
    {
6542 8
        if ($str === '') {
6543
            return false;
6544
        }
6545
6546 8
        if (empty($substrings)) {
6547
            return false;
6548
        }
6549
6550 8
        foreach ($substrings as &$substring) {
6551 8
            if (self::str_starts_with($str, $substring)) {
6552 8
                return true;
6553
            }
6554
        }
6555
6556 6
        return false;
6557
    }
6558
6559
    /**
6560
     * Gets the substring after the first occurrence of a separator.
6561
     *
6562
     * @param string $str       <p>The input string.</p>
6563
     * @param string $separator <p>The string separator.</p>
6564
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6565
     *
6566
     * @return string
6567
     */
6568
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6569
    {
6570
        if (
6571 1
            $separator === ''
6572
            ||
6573 1
            $str === ''
6574
        ) {
6575 1
            return '';
6576
        }
6577
6578 1
        $offset = self::str_index_first($str, $separator);
6579 1
        if ($offset === false) {
6580 1
            return '';
6581
        }
6582
6583 1
        return (string) self::substr(
6584 1
            $str,
6585 1
            $offset + (int) self::strlen($separator, $encoding),
6586 1
            null,
6587 1
            $encoding
6588
        );
6589
    }
6590
6591
    /**
6592
     * Gets the substring after the last occurrence of a separator.
6593
     *
6594
     * @param string $str       <p>The input string.</p>
6595
     * @param string $separator <p>The string separator.</p>
6596
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6597
     *
6598
     * @return string
6599
     */
6600
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6601
    {
6602
        if (
6603 1
            $separator === ''
6604
            ||
6605 1
            $str === ''
6606
        ) {
6607 1
            return '';
6608
        }
6609
6610 1
        $offset = self::str_index_last($str, $separator);
6611 1
        if ($offset === false) {
6612 1
            return '';
6613
        }
6614
6615 1
        return (string) self::substr(
6616 1
            $str,
6617 1
            $offset + (int) self::strlen($separator, $encoding),
6618 1
            null,
6619 1
            $encoding
6620
        );
6621
    }
6622
6623
    /**
6624
     * Gets the substring before the first occurrence of a separator.
6625
     *
6626
     * @param string $str       <p>The input string.</p>
6627
     * @param string $separator <p>The string separator.</p>
6628
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6629
     *
6630
     * @return string
6631
     */
6632
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6633
    {
6634
        if (
6635 1
            $separator === ''
6636
            ||
6637 1
            $str === ''
6638
        ) {
6639 1
            return '';
6640
        }
6641
6642 1
        $offset = self::str_index_first($str, $separator);
6643 1
        if ($offset === false) {
6644 1
            return '';
6645
        }
6646
6647 1
        return (string) self::substr(
6648 1
            $str,
6649 1
            0,
6650 1
            $offset,
6651 1
            $encoding
6652
        );
6653
    }
6654
6655
    /**
6656
     * Gets the substring before the last occurrence of a separator.
6657
     *
6658
     * @param string $str       <p>The input string.</p>
6659
     * @param string $separator <p>The string separator.</p>
6660
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6661
     *
6662
     * @return string
6663
     */
6664
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6665
    {
6666
        if (
6667 1
            $separator === ''
6668
            ||
6669 1
            $str === ''
6670
        ) {
6671 1
            return '';
6672
        }
6673
6674 1
        $offset = self::str_index_last($str, $separator);
6675 1
        if ($offset === false) {
6676 1
            return '';
6677
        }
6678
6679 1
        return (string) self::substr(
6680 1
            $str,
6681 1
            0,
6682 1
            $offset,
6683 1
            $encoding
6684
        );
6685
    }
6686
6687
    /**
6688
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6689
     *
6690
     * @param string $str          <p>The input string.</p>
6691
     * @param string $needle       <p>The string to look for.</p>
6692
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6693
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6694
     *
6695
     * @return string
6696
     */
6697
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6698
    {
6699
        if (
6700 2
            $str === ''
6701
            ||
6702 2
            $needle === ''
6703
        ) {
6704 2
            return '';
6705
        }
6706
6707 2
        $part = self::strstr(
6708 2
            $str,
6709 2
            $needle,
6710 2
            $beforeNeedle,
6711 2
            $encoding
6712
        );
6713 2
        if ($part === false) {
6714 2
            return '';
6715
        }
6716
6717 2
        return $part;
6718
    }
6719
6720
    /**
6721
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6722
     *
6723
     * @param string $str          <p>The input string.</p>
6724
     * @param string $needle       <p>The string to look for.</p>
6725
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6726
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6727
     *
6728
     * @return string
6729
     */
6730
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6731
    {
6732
        if (
6733 2
            $str === ''
6734
            ||
6735 2
            $needle === ''
6736
        ) {
6737 2
            return '';
6738
        }
6739
6740 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6741 2
        if ($part === false) {
6742 2
            return '';
6743
        }
6744
6745 2
        return $part;
6746
    }
6747
6748
    /**
6749
     * Surrounds $str with the given substring.
6750
     *
6751
     * @param string $str
6752
     * @param string $substring <p>The substring to add to both sides.</P>
6753
     *
6754
     * @return string string with the substring both prepended and appended
6755
     */
6756
    public static function str_surround(string $str, string $substring): string
6757
    {
6758 5
        return \implode('', [$substring, $str, $substring]);
6759
    }
6760
6761
    /**
6762
     * Returns a trimmed string with the first letter of each word capitalized.
6763
     * Also accepts an array, $ignore, allowing you to list words not to be
6764
     * capitalized.
6765
     *
6766
     * @param string              $str
6767
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
6768
     *                                                   Default: null</p>
6769
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6770
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6771
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6772
     *                                                   tr</p>
6773
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6774
     *                                                   ß</p>
6775
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6776
     *
6777
     * @return string the titleized string
6778
     */
6779
    public static function str_titleize(
6780
        string $str,
6781
        array $ignore = null,
6782
        string $encoding = 'UTF-8',
6783
        bool $cleanUtf8 = false,
6784
        string $lang = null,
6785
        bool $tryToKeepStringLength = false,
6786
        bool $useTrimFirst = true
6787
    ): string {
6788 10
        if ($useTrimFirst === true) {
6789 5
            $str = self::trim($str);
6790
        }
6791
6792 10
        $str_array = self::str_to_words($str);
6793
6794 10
        foreach ($str_array as &$str_tmp) {
6795 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
6796 2
                continue;
6797
            }
6798
6799 10
            $str_tmp = self::str_upper_first(
6800 10
                self::strtolower(
6801 10
                    $str_tmp,
6802 10
                    $encoding,
6803 10
                    $cleanUtf8,
6804 10
                    $lang,
6805 10
                    $tryToKeepStringLength
6806
                ),
6807 10
                $encoding,
6808 10
                $cleanUtf8,
6809 10
                $lang,
6810 10
                $tryToKeepStringLength
6811
            );
6812
        }
6813
6814 10
        return \implode('', $str_array);
6815
    }
6816
6817
    /**
6818
     * Returns a trimmed string in proper title case.
6819
     *
6820
     * Also accepts an array, $ignore, allowing you to list words not to be
6821
     * capitalized.
6822
     *
6823
     * Adapted from John Gruber's script.
6824
     *
6825
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6826
     *
6827
     * @param string $str
6828
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
6829
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6830
     *
6831
     * @return string the titleized string
6832
     */
6833
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6834
    {
6835 35
        $smallWords = \array_merge(
6836
            [
6837 35
                '(?<!q&)a',
6838
                'an',
6839
                'and',
6840
                'as',
6841
                'at(?!&t)',
6842
                'but',
6843
                'by',
6844
                'en',
6845
                'for',
6846
                'if',
6847
                'in',
6848
                'of',
6849
                'on',
6850
                'or',
6851
                'the',
6852
                'to',
6853
                'v[.]?',
6854
                'via',
6855
                'vs[.]?',
6856
            ],
6857 35
            $ignore
6858
        );
6859
6860 35
        $smallWordsRx = \implode('|', $smallWords);
6861 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6862
6863 35
        $str = self::trim($str);
6864
6865 35
        if (self::has_lowercase($str) === false) {
6866 2
            $str = self::strtolower($str);
6867
        }
6868
6869
        // the main substitutions
6870 35
        $str = (string) \preg_replace_callback(
6871
            '~\b (_*) (?:                                                              # 1. Leading underscore and
6872
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6873 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6874
                        |
6875 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6876
                        |
6877 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6878
                        |
6879 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6880
                      ) (_*) \b                                                           # 6. With trailing underscore
6881
                    ~ux',
6882
            /**
6883
             * @param string[] $matches
6884
             *
6885
             * @return string
6886
             */
6887
            static function (array $matches) use ($encoding): string {
6888
                // preserve leading underscore
6889 35
                $str = $matches[1];
6890 35
                if ($matches[2]) {
6891
                    // preserve URLs, domains, emails and file paths
6892 5
                    $str .= $matches[2];
6893 35
                } elseif ($matches[3]) {
6894
                    // lower-case small words
6895 25
                    $str .= self::strtolower($matches[3], $encoding);
6896 35
                } elseif ($matches[4]) {
6897
                    // capitalize word w/o internal caps
6898 34
                    $str .= static::str_upper_first($matches[4], $encoding);
6899
                } else {
6900
                    // preserve other kinds of word (iPhone)
6901 7
                    $str .= $matches[5];
6902
                }
6903
                // Preserve trailing underscore
6904 35
                $str .= $matches[6];
6905
6906 35
                return $str;
6907 35
            },
6908 35
            $str
6909
        );
6910
6911
        // Exceptions for small words: capitalize at start of title...
6912 35
        $str = (string) \preg_replace_callback(
6913
            '~(  \A [[:punct:]]*                # start of title...
6914
                      |  [:.;?!][ ]+               # or of subsentence...
6915
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6916 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6917
                     ~uxi',
6918
            /**
6919
             * @param string[] $matches
6920
             *
6921
             * @return string
6922
             */
6923
            static function (array $matches) use ($encoding): string {
6924 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6925 35
            },
6926 35
            $str
6927
        );
6928
6929
        // ...and end of title
6930 35
        $str = (string) \preg_replace_callback(
6931 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
6932
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6933
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6934
                     ~uxi',
6935
            /**
6936
             * @param string[] $matches
6937
             *
6938
             * @return string
6939
             */
6940
            static function (array $matches) use ($encoding): string {
6941 3
                return static::str_upper_first($matches[1], $encoding);
6942 35
            },
6943 35
            $str
6944
        );
6945
6946
        // Exceptions for small words in hyphenated compound words.
6947
        // e.g. "in-flight" -> In-Flight
6948 35
        $str = (string) \preg_replace_callback(
6949
            '~\b
6950
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6951 35
                        ( ' . $smallWordsRx . ' )
6952
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6953
                       ~uxi',
6954
            /**
6955
             * @param string[] $matches
6956
             *
6957
             * @return string
6958
             */
6959
            static function (array $matches) use ($encoding): string {
6960
                return static::str_upper_first($matches[1], $encoding);
6961 35
            },
6962 35
            $str
6963
        );
6964
6965
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6966 35
        $str = (string) \preg_replace_callback(
6967
            '~\b
6968
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6969
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6970 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6971
                      (?!	- )                   # Negative lookahead for another -
6972
                     ~uxi',
6973
            /**
6974
             * @param string[] $matches
6975
             *
6976
             * @return string
6977
             */
6978
            static function (array $matches) use ($encoding): string {
6979
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6980 35
            },
6981 35
            $str
6982
        );
6983
6984 35
        return $str;
6985
    }
6986
6987
    /**
6988
     * Get a binary representation of a specific string.
6989
     *
6990
     * @param string $str <p>The input string.</p>
6991
     *
6992
     * @return string
6993
     */
6994
    public static function str_to_binary(string $str): string
6995
    {
6996 2
        $value = \unpack('H*', $str);
6997
6998 2
        return \base_convert($value[1], 16, 2);
6999
    }
7000
7001
    /**
7002
     * @param string   $str
7003
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7004
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7005
     *
7006
     * @return string[]
7007
     */
7008
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7009
    {
7010 17
        if ($str === '') {
7011 1
            return $removeEmptyValues === true ? [] : [''];
7012
        }
7013
7014 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
7015 16
        if ($return === false) {
7016
            return $removeEmptyValues === true ? [] : [''];
7017
        }
7018
7019
        if (
7020 16
            $removeShortValues === null
7021
            &&
7022 16
            $removeEmptyValues === false
7023
        ) {
7024 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7025
        }
7026
7027
        return self::reduce_string_array(
7028
            $return,
7029
            $removeEmptyValues,
7030
            $removeShortValues
7031
        );
7032
    }
7033
7034
    /**
7035
     * Convert a string into an array of words.
7036
     *
7037
     * @param string   $str
7038
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7039
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7040
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7041
     *
7042
     * @return string[]
7043
     */
7044
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7045
    {
7046 23
        if ($str === '') {
7047 4
            return $removeEmptyValues === true ? [] : [''];
7048
        }
7049
7050 23
        $charList = self::rxClass($charList, '\pL');
7051
7052 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7053 23
        if ($return === false) {
7054
            return $removeEmptyValues === true ? [] : [''];
7055
        }
7056
7057
        if (
7058 23
            $removeShortValues === null
7059
            &&
7060 23
            $removeEmptyValues === false
7061
        ) {
7062 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7063
        }
7064
7065 2
        $tmpReturn = self::reduce_string_array(
7066 2
            $return,
7067 2
            $removeEmptyValues,
7068 2
            $removeShortValues
7069
        );
7070
7071 2
        foreach ($tmpReturn as &$item) {
7072 2
            $item = (string) $item;
7073
        }
7074
7075 2
        return $tmpReturn;
7076
    }
7077
7078
    /**
7079
     * alias for "UTF8::to_ascii()"
7080
     *
7081
     * @see UTF8::to_ascii()
7082
     *
7083
     * @param string $str
7084
     * @param string $unknown
7085
     * @param bool   $strict
7086
     *
7087
     * @return string
7088
     */
7089
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7090
    {
7091 8
        return self::to_ascii($str, $unknown, $strict);
7092
    }
7093
7094
    /**
7095
     * Truncates the string to a given length. If $substring is provided, and
7096
     * truncating occurs, the string is further truncated so that the substring
7097
     * may be appended without exceeding the desired length.
7098
     *
7099
     * @param string $str
7100
     * @param int    $length    <p>Desired length of the truncated string.</p>
7101
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7102
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7103
     *
7104
     * @return string string after truncating
7105
     */
7106
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7107
    {
7108
        // init
7109 22
        $str = (string) $str;
7110
7111 22
        if ($str === '') {
7112
            return '';
7113
        }
7114
7115 22
        if ($length >= (int) self::strlen($str, $encoding)) {
7116 4
            return $str;
7117
        }
7118
7119
        // Need to further trim the string so we can append the substring
7120 18
        $substringLength = (int) self::strlen($substring, $encoding);
7121 18
        $length -= $substringLength;
7122
7123 18
        return ((string) self::substr($str, 0, $length, $encoding)) . $substring;
7124
    }
7125
7126
    /**
7127
     * Truncates the string to a given length, while ensuring that it does not
7128
     * split words. If $substring is provided, and truncating occurs, the
7129
     * string is further truncated so that the substring may be appended without
7130
     * exceeding the desired length.
7131
     *
7132
     * @param string $str
7133
     * @param int    $length    <p>Desired length of the truncated string.</p>
7134
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7135
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7136
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7137
     *
7138
     * @return string string after truncating
7139
     */
7140
    public static function str_truncate_safe(
7141
        string $str,
7142
        int $length,
7143
        string $substring = '',
7144
        string $encoding = 'UTF-8',
7145
        bool $ignoreDoNotSplitWordsForOneWord = false
7146
    ): string
7147
    {
7148 46
        if ($length >= (int) self::strlen($str, $encoding)) {
7149 8
            return $str;
7150
        }
7151
7152
        // need to further trim the string so we can append the substring
7153 38
        $substringLength = (int) self::strlen($substring, $encoding);
7154 38
        $length -= $substringLength;
7155
7156 38
        $truncated = self::substr($str, 0, $length, $encoding);
7157 38
        if ($truncated === false) {
7158
            return '';
7159
        }
7160
7161
        // if the last word was truncated
7162 38
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7163 38
        if ($strPosSpace !== $length) {
7164
            // find pos of the last occurrence of a space, get up to that
7165 24
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7166
7167
            if (
7168 24
                $lastPos !== false
7169
                ||
7170 24
                ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7171
            ) {
7172 20
                $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
7173
            }
7174
        }
7175
7176 38
        return $truncated . $substring;
7177
    }
7178
7179
    /**
7180
     * Returns a lowercase and trimmed string separated by underscores.
7181
     * Underscores are inserted before uppercase characters (with the exception
7182
     * of the first character of the string), and in place of spaces as well as
7183
     * dashes.
7184
     *
7185
     * @param string $str
7186
     *
7187
     * @return string the underscored string
7188
     */
7189
    public static function str_underscored(string $str): string
7190
    {
7191 16
        return self::str_delimit($str, '_');
7192
    }
7193
7194
    /**
7195
     * Returns an UpperCamelCase version of the supplied string. It trims
7196
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7197
     * and underscores, and removes spaces, dashes, underscores.
7198
     *
7199
     * @param string      $str                   <p>The input string.</p>
7200
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7201
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7202
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7203
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7204
     *
7205
     * @return string string in UpperCamelCase
7206
     */
7207
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7208
    {
7209 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7210
    }
7211
7212
    /**
7213
     * alias for "UTF8::ucfirst()"
7214
     *
7215
     * @see UTF8::ucfirst()
7216
     *
7217
     * @param string      $str
7218
     * @param string      $encoding
7219
     * @param bool        $cleanUtf8
7220
     * @param string|null $lang
7221
     * @param bool        $tryToKeepStringLength
7222
     *
7223
     * @return string
7224
     */
7225
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7226
    {
7227 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7228
    }
7229
7230
    /**
7231
     * Counts number of words in the UTF-8 string.
7232
     *
7233
     * @param string $str      <p>The input string.</p>
7234
     * @param int    $format   [optional] <p>
7235
     *                         <strong>0</strong> => return a number of words (default)<br>
7236
     *                         <strong>1</strong> => return an array of words<br>
7237
     *                         <strong>2</strong> => return an array of words with word-offset as key
7238
     *                         </p>
7239
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7240
     *
7241
     * @return int|string[] The number of words in the string
7242
     */
7243
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7244
    {
7245 2
        $strParts = self::str_to_words($str, $charlist);
7246
7247 2
        $len = \count($strParts);
7248
7249 2
        if ($format === 1) {
7250 2
            $numberOfWords = [];
7251 2
            for ($i = 1; $i < $len; $i += 2) {
7252 2
                $numberOfWords[] = $strParts[$i];
7253
            }
7254 2
        } elseif ($format === 2) {
7255 2
            $numberOfWords = [];
7256 2
            $offset = (int) self::strlen($strParts[0]);
7257 2
            for ($i = 1; $i < $len; $i += 2) {
7258 2
                $numberOfWords[$offset] = $strParts[$i];
7259 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
7260
            }
7261
        } else {
7262 2
            $numberOfWords = (int) (($len - 1) / 2);
7263
        }
7264
7265 2
        return $numberOfWords;
7266
    }
7267
7268
    /**
7269
     * Case-insensitive string comparison.
7270
     *
7271
     * INFO: Case-insensitive version of UTF8::strcmp()
7272
     *
7273
     * @param string $str1     <p>The first string.</p>
7274
     * @param string $str2     <p>The second string.</p>
7275
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7276
     *
7277
     * @return int
7278
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7279
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7280
     *             <strong>0</strong> if they are equal
7281
     */
7282
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7283
    {
7284 23
        return self::strcmp(
7285 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7286 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7287
        );
7288
    }
7289
7290
    /**
7291
     * alias for "UTF8::strstr()"
7292
     *
7293
     * @see UTF8::strstr()
7294
     *
7295
     * @param string $haystack
7296
     * @param string $needle
7297
     * @param bool   $before_needle
7298
     * @param string $encoding
7299
     * @param bool   $cleanUtf8
7300
     *
7301
     * @return false|string
7302
     */
7303
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7304
    {
7305 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7306
    }
7307
7308
    /**
7309
     * Case-sensitive string comparison.
7310
     *
7311
     * @param string $str1 <p>The first string.</p>
7312
     * @param string $str2 <p>The second string.</p>
7313
     *
7314
     * @return int
7315
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7316
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7317
     *             <strong>0</strong> if they are equal
7318
     */
7319
    public static function strcmp(string $str1, string $str2): int
7320
    {
7321
        /** @noinspection PhpUndefinedClassInspection */
7322 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7323 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7324 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7325
        );
7326
    }
7327
7328
    /**
7329
     * Find length of initial segment not matching mask.
7330
     *
7331
     * @param string $str
7332
     * @param string $charList
7333
     * @param int    $offset
7334
     * @param int    $length
7335
     *
7336
     * @return int
7337
     */
7338
    public static function strcspn(string $str, string $charList, int $offset = null, int $length = null): int
7339
    {
7340 12
        if ($charList === '') {
7341 2
            return (int) self::strlen($str);
7342
        }
7343
7344 11
        if ($offset !== null || $length !== null) {
7345
            /** @noinspection UnnecessaryCastingInspection */
7346 3
            $strTmp = self::substr($str, (int) $offset, $length);
7347 3
            if ($strTmp === false) {
7348
                return 0;
7349
            }
7350 3
            $str = $strTmp;
7351
        }
7352
7353 11
        if ($str === '') {
7354 2
            return 0;
7355
        }
7356
7357 10
        $matches = [];
7358 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
7359 9
            $return = self::strlen($matches[1]);
7360 9
            if ($return === false) {
7361
                return 0;
7362
            }
7363
7364 9
            return $return;
7365
        }
7366
7367 2
        return (int) self::strlen($str);
7368
    }
7369
7370
    /**
7371
     * alias for "UTF8::stristr()"
7372
     *
7373
     * @see UTF8::stristr()
7374
     *
7375
     * @param string $haystack
7376
     * @param string $needle
7377
     * @param bool   $before_needle
7378
     * @param string $encoding
7379
     * @param bool   $cleanUtf8
7380
     *
7381
     * @return false|string
7382
     */
7383
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7384
    {
7385 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7386
    }
7387
7388
    /**
7389
     * Create a UTF-8 string from code points.
7390
     *
7391
     * INFO: opposite to UTF8::codepoints()
7392
     *
7393
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7394
     *
7395
     * @return string UTF-8 encoded string
7396
     */
7397
    public static function string(array $array): string
7398
    {
7399 4
        return \implode(
7400 4
            '',
7401 4
            \array_map(
7402
                [
7403 4
                    self::class,
7404
                    'chr',
7405
                ],
7406 4
                $array
7407
            )
7408
        );
7409
    }
7410
7411
    /**
7412
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7413
     *
7414
     * @param string $str <p>The input string.</p>
7415
     *
7416
     * @return bool
7417
     *              <strong>true</strong> if the string has BOM at the start,<br>
7418
     *              <strong>false</strong> otherwise
7419
     */
7420
    public static function string_has_bom(string $str): bool
7421
    {
7422
        /** @noinspection PhpUnusedLocalVariableInspection */
7423 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
7424 6
            if (\strpos($str, $bomString) === 0) {
7425 6
                return true;
7426
            }
7427
        }
7428
7429 6
        return false;
7430
    }
7431
7432
    /**
7433
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7434
     *
7435
     * @see http://php.net/manual/en/function.strip-tags.php
7436
     *
7437
     * @param string $str            <p>
7438
     *                               The input string.
7439
     *                               </p>
7440
     * @param string $allowable_tags [optional] <p>
7441
     *                               You can use the optional second parameter to specify tags which should
7442
     *                               not be stripped.
7443
     *                               </p>
7444
     *                               <p>
7445
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
7446
     *                               can not be changed with allowable_tags.
7447
     *                               </p>
7448
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7449
     *
7450
     * @return string the stripped string
7451
     */
7452
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7453
    {
7454 4
        if ($str === '') {
7455 1
            return '';
7456
        }
7457
7458 4
        if ($cleanUtf8 === true) {
7459 2
            $str = self::clean($str);
7460
        }
7461
7462
        /** @noinspection UnnecessaryCastingInspection */
7463 4
        return \strip_tags($str, (string) $allowable_tags);
7464
    }
7465
7466
    /**
7467
     * Strip all whitespace characters. This includes tabs and newline
7468
     * characters, as well as multibyte whitespace such as the thin space
7469
     * and ideographic space.
7470
     *
7471
     * @param string $str
7472
     *
7473
     * @return string
7474
     */
7475
    public static function strip_whitespace(string $str): string
7476
    {
7477 36
        if ($str === '') {
7478 3
            return '';
7479
        }
7480
7481 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7482
    }
7483
7484
    /**
7485
     * Finds position of first occurrence of a string within another, case insensitive.
7486
     *
7487
     * @see http://php.net/manual/en/function.mb-stripos.php
7488
     *
7489
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7490
     * @param string $needle    <p>The string to find in haystack.</p>
7491
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7492
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7493
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7494
     *
7495
     * @return false|int
7496
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7497
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7498
     */
7499
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7500
    {
7501 75
        if ($haystack === '' || $needle === '') {
7502 5
            return false;
7503
        }
7504
7505 74
        if ($cleanUtf8 === true) {
7506
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7507
            // if invalid characters are found in $haystack before $needle
7508 1
            $haystack = self::clean($haystack);
7509 1
            $needle = self::clean($needle);
7510
        }
7511
7512 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7513 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7514
        }
7515
7516 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7517
            self::checkForSupport();
7518
        }
7519
7520 74
        if (self::$SUPPORT['mbstring'] === true) {
7521 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7522 74
            if ($returnTmp !== false) {
7523 54
                return $returnTmp;
7524
            }
7525
        }
7526
7527
        if (
7528 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7529
            &&
7530 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7531
            &&
7532 31
            self::$SUPPORT['intl'] === true
7533
        ) {
7534 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7535 31
            if ($returnTmp !== false) {
7536
                return $returnTmp;
7537
            }
7538
        }
7539
7540
        //
7541
        // fallback for ascii only
7542
        //
7543
7544 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7545 15
            return \stripos($haystack, $needle, $offset);
7546
        }
7547
7548
        //
7549
        // fallback via vanilla php
7550
        //
7551
7552 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7553 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7554
7555 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7556
    }
7557
7558
    /**
7559
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7560
     *
7561
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
7562
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
7563
     * @param bool   $before_needle [optional] <p>
7564
     *                              If <b>TRUE</b>, it returns the part of the
7565
     *                              haystack before the first occurrence of the needle (excluding the needle).
7566
     *                              </p>
7567
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7568
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7569
     *
7570
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7571
     */
7572
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7573
    {
7574 12
        if ($haystack === '' || $needle === '') {
7575 3
            return false;
7576
        }
7577
7578 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7579 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7580
        }
7581
7582 9
        if ($cleanUtf8 === true) {
7583
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7584
            // if invalid characters are found in $haystack before $needle
7585 1
            $needle = self::clean($needle);
7586 1
            $haystack = self::clean($haystack);
7587
        }
7588
7589 9
        if (!$needle) {
7590
            return $haystack;
7591
        }
7592
7593 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7594
            self::checkForSupport();
7595
        }
7596
7597
        if (
7598 9
            $encoding !== 'UTF-8'
7599
            &&
7600 9
            self::$SUPPORT['mbstring'] === false
7601
        ) {
7602
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7603
        }
7604
7605 9
        if (self::$SUPPORT['mbstring'] === true) {
7606 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7607
        }
7608
7609
        if (
7610
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7611
            &&
7612
            self::$SUPPORT['intl'] === true
7613
        ) {
7614
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7615
            if ($returnTmp !== false) {
7616
                return $returnTmp;
7617
            }
7618
        }
7619
7620
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7621
            return \stristr($haystack, $needle, $before_needle);
7622
        }
7623
7624
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7625
7626
        if (!isset($match[1])) {
7627
            return false;
7628
        }
7629
7630
        if ($before_needle) {
7631
            return $match[1];
7632
        }
7633
7634
        return self::substr($haystack, (int) self::strlen($match[1]));
7635
    }
7636
7637
    /**
7638
     * Get the string length, not the byte-length!
7639
     *
7640
     * @see     http://php.net/manual/en/function.mb-strlen.php
7641
     *
7642
     * @param string $str       <p>The string being checked for length.</p>
7643
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7644
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7645
     *
7646
     * @return false|int
7647
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
7648
     *                   $encoding.
7649
     *                   (One multi-byte character counted as +1).
7650
     *                   <br>
7651
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
7652
     *                   chars.
7653
     */
7654
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7655
    {
7656 284
        if ($str === '') {
7657 46
            return 0;
7658
        }
7659
7660 282
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7661 96
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7662
        }
7663
7664
        //
7665
        // fallback for binary || ascii only
7666
        //
7667
7668
        if (
7669 282
            $encoding === 'CP850'
7670
            ||
7671 282
            $encoding === 'ASCII'
7672
        ) {
7673 2
            return self::strlen_in_byte($str);
7674
        }
7675
7676 282
        if ($cleanUtf8 === true) {
7677
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7678
            // if invalid characters are found in $str
7679 4
            $str = self::clean($str);
7680
        }
7681
7682 282
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7683
            self::checkForSupport();
7684
        }
7685
7686
        if (
7687 282
            $encoding !== 'UTF-8'
7688
            &&
7689 282
            self::$SUPPORT['mbstring'] === false
7690
            &&
7691 282
            self::$SUPPORT['iconv'] === false
7692
        ) {
7693 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7694
        }
7695
7696
        //
7697
        // fallback via mbstring
7698
        //
7699
7700 282
        if (self::$SUPPORT['mbstring'] === true) {
7701 278
            $returnTmp = \mb_strlen($str, $encoding);
7702 278
            if ($returnTmp !== false) {
7703 278
                return $returnTmp;
7704
            }
7705
        }
7706
7707
        //
7708
        // fallback via iconv
7709
        //
7710
7711 8
        if (self::$SUPPORT['iconv'] === true) {
7712
            $returnTmp = \iconv_strlen($str, $encoding);
7713
            if ($returnTmp !== false) {
7714
                return $returnTmp;
7715
            }
7716
        }
7717
7718
        //
7719
        // fallback via intl
7720
        //
7721
7722
        if (
7723 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7724
            &&
7725 8
            self::$SUPPORT['intl'] === true
7726
        ) {
7727
            $returnTmp = \grapheme_strlen($str);
7728
            if ($returnTmp !== null) {
7729
                return $returnTmp;
7730
            }
7731
        }
7732
7733
        //
7734
        // fallback for ascii only
7735
        //
7736
7737 8
        if (self::is_ascii($str)) {
7738 4
            return \strlen($str);
7739
        }
7740
7741
        //
7742
        // fallback via vanilla php
7743
        //
7744
7745 8
        \preg_match_all('/./us', $str, $parts);
7746
7747 8
        $returnTmp = \count($parts[0]);
7748 8
        if ($returnTmp === 0) {
7749
            return false;
7750
        }
7751
7752 8
        return $returnTmp;
7753
    }
7754
7755
    /**
7756
     * Get string length in byte.
7757
     *
7758
     * @param string $str
7759
     *
7760
     * @return int
7761
     */
7762
    public static function strlen_in_byte(string $str): int
7763
    {
7764 192
        if ($str === '') {
7765
            return 0;
7766
        }
7767
7768 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7769
            self::checkForSupport();
7770
        }
7771
7772 192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7773
            // "mb_" is available if overload is used, so use it ...
7774
            return \mb_strlen($str, 'CP850'); // 8-BIT
7775
        }
7776
7777 192
        return \strlen($str);
7778
    }
7779
7780
    /**
7781
     * Case insensitive string comparisons using a "natural order" algorithm.
7782
     *
7783
     * INFO: natural order version of UTF8::strcasecmp()
7784
     *
7785
     * @param string $str1     <p>The first string.</p>
7786
     * @param string $str2     <p>The second string.</p>
7787
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7788
     *
7789
     * @return int
7790
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7791
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7792
     *             <strong>0</strong> if they are equal
7793
     */
7794
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7795
    {
7796 2
        return self::strnatcmp(
7797 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7798 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
7799
        );
7800
    }
7801
7802
    /**
7803
     * String comparisons using a "natural order" algorithm
7804
     *
7805
     * INFO: natural order version of UTF8::strcmp()
7806
     *
7807
     * @see  http://php.net/manual/en/function.strnatcmp.php
7808
     *
7809
     * @param string $str1 <p>The first string.</p>
7810
     * @param string $str2 <p>The second string.</p>
7811
     *
7812
     * @return int
7813
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7814
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7815
     *             <strong>0</strong> if they are equal
7816
     */
7817
    public static function strnatcmp(string $str1, string $str2): int
7818
    {
7819 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
7820
    }
7821
7822
    /**
7823
     * Case-insensitive string comparison of the first n characters.
7824
     *
7825
     * @see  http://php.net/manual/en/function.strncasecmp.php
7826
     *
7827
     * @param string $str1     <p>The first string.</p>
7828
     * @param string $str2     <p>The second string.</p>
7829
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7830
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7831
     *
7832
     * @return int
7833
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7834
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7835
     *             <strong>0</strong> if they are equal
7836
     */
7837
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7838
    {
7839 2
        return self::strncmp(
7840 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7841 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
7842 2
            $len
7843
        );
7844
    }
7845
7846
    /**
7847
     * String comparison of the first n characters.
7848
     *
7849
     * @see  http://php.net/manual/en/function.strncmp.php
7850
     *
7851
     * @param string $str1 <p>The first string.</p>
7852
     * @param string $str2 <p>The second string.</p>
7853
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
7854
     *
7855
     * @return int
7856
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7857
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7858
     *             <strong>0</strong> if they are equal
7859
     */
7860
    public static function strncmp(string $str1, string $str2, int $len): int
7861
    {
7862 4
        $str1 = (string) self::substr($str1, 0, $len);
7863 4
        $str2 = (string) self::substr($str2, 0, $len);
7864
7865 4
        return self::strcmp($str1, $str2);
7866
    }
7867
7868
    /**
7869
     * Search a string for any of a set of characters.
7870
     *
7871
     * @see  http://php.net/manual/en/function.strpbrk.php
7872
     *
7873
     * @param string $haystack  <p>The string where char_list is looked for.</p>
7874
     * @param string $char_list <p>This parameter is case sensitive.</p>
7875
     *
7876
     * @return false|string string starting from the character found, or false if it is not found
7877
     */
7878
    public static function strpbrk(string $haystack, string $char_list)
7879
    {
7880 2
        if ($haystack === '' || $char_list === '') {
7881 2
            return false;
7882
        }
7883
7884 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7885 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
7886
        }
7887
7888 2
        return false;
7889
    }
7890
7891
    /**
7892
     * Find position of first occurrence of string in a string.
7893
     *
7894
     * @see http://php.net/manual/en/function.mb-strpos.php
7895
     *
7896
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7897
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7898
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7899
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7900
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7901
     *
7902
     * @return false|int
7903
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7904
     *                   string.<br> If needle is not found it returns false.
7905
     */
7906
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7907
    {
7908 161
        if ($haystack === '') {
7909 4
            return false;
7910
        }
7911
7912
        // iconv and mbstring do not support integer $needle
7913 160
        if ((int) $needle === $needle && $needle >= 0) {
7914
            $needle = (string) self::chr($needle);
7915
        }
7916 160
        $needle = (string) $needle;
7917
7918 160
        if ($needle === '') {
7919 2
            return false;
7920
        }
7921
7922 160
        if ($cleanUtf8 === true) {
7923
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7924
            // if invalid characters are found in $haystack before $needle
7925 3
            $needle = self::clean($needle);
7926 3
            $haystack = self::clean($haystack);
7927
        }
7928
7929 160
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7930 66
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7931
        }
7932
7933 160
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7934
            self::checkForSupport();
7935
        }
7936
7937
        //
7938
        // fallback for binary || ascii only
7939
        //
7940
7941
        if (
7942 160
            $encoding === 'CP850'
7943
            ||
7944 160
            $encoding === 'ASCII'
7945
        ) {
7946 2
            return self::strpos_in_byte($haystack, $needle, $offset);
7947
        }
7948
7949
        if (
7950 160
            $encoding !== 'UTF-8'
7951
            &&
7952 160
            self::$SUPPORT['iconv'] === false
7953
            &&
7954 160
            self::$SUPPORT['mbstring'] === false
7955
        ) {
7956 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7957
        }
7958
7959
        //
7960
        // fallback via mbstring
7961
        //
7962
7963 160
        if (self::$SUPPORT['mbstring'] === true) {
7964 160
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7965 160
            if ($returnTmp !== false) {
7966 102
                return $returnTmp;
7967
            }
7968
        }
7969
7970
        //
7971
        // fallback via intl
7972
        //
7973
7974
        if (
7975 72
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7976
            &&
7977 72
            $offset >= 0 // grapheme_strpos() can't handle negative offset
7978
            &&
7979 72
            self::$SUPPORT['intl'] === true
7980
        ) {
7981 72
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7982 72
            if ($returnTmp !== false) {
7983
                return $returnTmp;
7984
            }
7985
        }
7986
7987
        //
7988
        // fallback via iconv
7989
        //
7990
7991
        if (
7992 72
            $offset >= 0 // iconv_strpos() can't handle negative offset
7993
            &&
7994 72
            self::$SUPPORT['iconv'] === true
7995
        ) {
7996
            // ignore invalid negative offset to keep compatibility
7997
            // with php < 5.5.35, < 5.6.21, < 7.0.6
7998 72
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7999 72
            if ($returnTmp !== false) {
8000
                return $returnTmp;
8001
            }
8002
        }
8003
8004
        //
8005
        // fallback for ascii only
8006
        //
8007
8008 72
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8009 37
            return \strpos($haystack, $needle, $offset);
8010
        }
8011
8012
        //
8013
        // fallback via vanilla php
8014
        //
8015
8016 40
        if ($haystackIsAscii) {
8017
            $haystackTmp = \substr($haystack, $offset);
8018
        } else {
8019 40
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8020
        }
8021 40
        if ($haystackTmp === false) {
8022
            $haystackTmp = '';
8023
        }
8024 40
        $haystack = (string) $haystackTmp;
8025
8026 40
        if ($offset < 0) {
8027 2
            $offset = 0;
8028
        }
8029
8030 40
        $pos = \strpos($haystack, $needle);
8031 40
        if ($pos === false) {
8032 40
            return false;
8033
        }
8034
8035 4
        if ($pos) {
8036 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8037
        }
8038
8039 2
        return $offset + 0;
8040
    }
8041
8042
    /**
8043
     * Find position of first occurrence of string in a string.
8044
     *
8045
     * @param string $haystack <p>
8046
     *                         The string being checked.
8047
     *                         </p>
8048
     * @param string $needle   <p>
8049
     *                         The position counted from the beginning of haystack.
8050
     *                         </p>
8051
     * @param int    $offset   [optional] <p>
8052
     *                         The search offset. If it is not specified, 0 is used.
8053
     *                         </p>
8054
     *
8055
     * @return false|int The numeric position of the first occurrence of needle in the
8056
     *                   haystack string. If needle is not found, it returns false.
8057
     */
8058
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8059
    {
8060 81
        if ($haystack === '' || $needle === '') {
8061
            return false;
8062
        }
8063
8064 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8065
            self::checkForSupport();
8066
        }
8067
8068 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8069
            // "mb_" is available if overload is used, so use it ...
8070
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8071
        }
8072
8073 81
        return \strpos($haystack, $needle, $offset);
8074
    }
8075
8076
    /**
8077
     * Finds the last occurrence of a character in a string within another.
8078
     *
8079
     * @see http://php.net/manual/en/function.mb-strrchr.php
8080
     *
8081
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8082
     * @param string $needle        <p>The string to find in haystack</p>
8083
     * @param bool   $before_needle [optional] <p>
8084
     *                              Determines which portion of haystack
8085
     *                              this function returns.
8086
     *                              If set to true, it returns all of haystack
8087
     *                              from the beginning to the last occurrence of needle.
8088
     *                              If set to false, it returns all of haystack
8089
     *                              from the last occurrence of needle to the end,
8090
     *                              </p>
8091
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8092
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8093
     *
8094
     * @return false|string the portion of haystack or false if needle is not found
8095
     */
8096
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8097
    {
8098 4
        if ($haystack === '' || $needle === '') {
8099 2
            return false;
8100
        }
8101
8102 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8103 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8104
        }
8105
8106 4
        if ($cleanUtf8 === true) {
8107
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8108
            // if invalid characters are found in $haystack before $needle
8109 2
            $needle = self::clean($needle);
8110 2
            $haystack = self::clean($haystack);
8111
        }
8112
8113 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8114
            self::checkForSupport();
8115
        }
8116
8117
        if (
8118 4
            $encoding !== 'UTF-8'
8119
            &&
8120 4
            self::$SUPPORT['mbstring'] === false
8121
        ) {
8122
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8123
        }
8124
8125 4
        if (self::$SUPPORT['mbstring'] === true) {
8126 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8127
        }
8128
8129
        //
8130
        // fallback for binary || ascii only
8131
        //
8132
8133
        if (
8134
            $before_needle === false
8135
            &&
8136
            (
8137
                $encoding === 'CP850'
8138
                ||
8139
                $encoding === 'ASCII'
8140
            )
8141
        ) {
8142
            return \strrchr($haystack, $needle);
8143
        }
8144
8145
        //
8146
        // fallback via iconv
8147
        //
8148
8149
        if (self::$SUPPORT['iconv'] === true) {
8150
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8151
            if ($needleTmp === false) {
8152
                return false;
8153
            }
8154
            $needle = (string) $needleTmp;
8155
8156
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8157
            if ($pos === false) {
8158
                return false;
8159
            }
8160
8161
            if ($before_needle) {
8162
                return self::substr($haystack, 0, $pos, $encoding);
8163
            }
8164
8165
            return self::substr($haystack, $pos, null, $encoding);
8166
        }
8167
8168
        //
8169
        // fallback via vanilla php
8170
        //
8171
8172
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8173
        if ($needleTmp === false) {
8174
            return false;
8175
        }
8176
        $needle = (string) $needleTmp;
8177
8178
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
8179
        if ($pos === false) {
8180
            return false;
8181
        }
8182
8183
        if ($before_needle) {
8184
            return self::substr($haystack, 0, $pos, $encoding);
8185
        }
8186
8187
        return self::substr($haystack, $pos, null, $encoding);
8188
    }
8189
8190
    /**
8191
     * Reverses characters order in the string.
8192
     *
8193
     * @param string $str <p>The input string.</p>
8194
     *
8195
     * @return string the string with characters in the reverse sequence
8196
     */
8197
    public static function strrev(string $str): string
8198
    {
8199 10
        if ($str === '') {
8200 4
            return '';
8201
        }
8202
8203 8
        $reversed = '';
8204 8
        $i = (int) self::strlen($str);
8205 8
        while ($i--) {
8206 8
            $reversedTmp = self::substr($str, $i, 1);
8207 8
            if ($reversedTmp !== false) {
8208 8
                $reversed .= $reversedTmp;
8209
            }
8210
        }
8211
8212 8
        return $reversed;
8213
    }
8214
8215
    /**
8216
     * Finds the last occurrence of a character in a string within another, case insensitive.
8217
     *
8218
     * @see http://php.net/manual/en/function.mb-strrichr.php
8219
     *
8220
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8221
     * @param string $needle        <p>The string to find in haystack.</p>
8222
     * @param bool   $before_needle [optional] <p>
8223
     *                              Determines which portion of haystack
8224
     *                              this function returns.
8225
     *                              If set to true, it returns all of haystack
8226
     *                              from the beginning to the last occurrence of needle.
8227
     *                              If set to false, it returns all of haystack
8228
     *                              from the last occurrence of needle to the end,
8229
     *                              </p>
8230
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8231
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8232
     *
8233
     * @return false|string the portion of haystack or<br>false if needle is not found
8234
     */
8235
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8236
    {
8237 3
        if ($haystack === '' || $needle === '') {
8238 2
            return false;
8239
        }
8240
8241 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8242 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8243
        }
8244
8245 3
        if ($cleanUtf8 === true) {
8246
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8247
            // if invalid characters are found in $haystack before $needle
8248 2
            $needle = self::clean($needle);
8249 2
            $haystack = self::clean($haystack);
8250
        }
8251
8252 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8253
            self::checkForSupport();
8254
        }
8255
8256
        //
8257
        // fallback via mbstring
8258
        //
8259
8260 3
        if (self::$SUPPORT['mbstring'] === true) {
8261 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8262
        }
8263
8264
        //
8265
        // fallback via vanilla php
8266
        //
8267
8268
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8269
        if ($needleTmp === false) {
8270
            return false;
8271
        }
8272
        $needle = (string) $needleTmp;
8273
8274
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8275
        if ($pos === false) {
8276
            return false;
8277
        }
8278
8279
        if ($before_needle) {
8280
            return self::substr($haystack, 0, $pos, $encoding);
8281
        }
8282
8283
        return self::substr($haystack, $pos, null, $encoding);
8284
    }
8285
8286
    /**
8287
     * Find position of last occurrence of a case-insensitive string.
8288
     *
8289
     * @param string     $haystack  <p>The string to look in.</p>
8290
     * @param int|string $needle    <p>The string to look for.</p>
8291
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8292
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8293
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8294
     *
8295
     * @return false|int
8296
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8297
     *                   string.<br>If needle is not found, it returns false.
8298
     */
8299
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8300
    {
8301 4
        if ($haystack === '') {
8302
            return false;
8303
        }
8304
8305
        // iconv and mbstring do not support integer $needle
8306 4
        if ((int) $needle === $needle && $needle >= 0) {
8307
            $needle = (string) self::chr($needle);
8308
        }
8309 4
        $needle = (string) $needle;
8310
8311 4
        if ($needle === '') {
8312
            return false;
8313
        }
8314
8315 4
        if ($cleanUtf8 === true) {
8316
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8317 2
            $needle = self::clean($needle);
8318 2
            $haystack = self::clean($haystack);
8319
        }
8320
8321 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8322 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8323
        }
8324
8325
        //
8326
        // fallback for binary || ascii only
8327
        //
8328
8329
        if (
8330 4
            $encoding === 'CP850'
8331
            ||
8332 4
            $encoding === 'ASCII'
8333
        ) {
8334
            return self::strripos_in_byte($haystack, $needle, $offset);
8335
        }
8336
8337 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8338
            self::checkForSupport();
8339
        }
8340
8341
        if (
8342 4
            $encoding !== 'UTF-8'
8343
            &&
8344 4
            self::$SUPPORT['mbstring'] === false
8345
        ) {
8346
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8347
        }
8348
8349
        //
8350
        // fallback via mbstrig
8351
        //
8352
8353 4
        if (self::$SUPPORT['mbstring'] === true) {
8354 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8355
        }
8356
8357
        //
8358
        // fallback via intl
8359
        //
8360
8361
        if (
8362
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8363
            &&
8364
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8365
            &&
8366
            self::$SUPPORT['intl'] === true
8367
        ) {
8368
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8369
            if ($returnTmp !== false) {
8370
                return $returnTmp;
8371
            }
8372
        }
8373
8374
        //
8375
        // fallback for ascii only
8376
        //
8377
8378
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8379
            return self::strripos_in_byte($haystack, $needle, $offset);
8380
        }
8381
8382
        //
8383
        // fallback via vanilla php
8384
        //
8385
8386
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8387
        $needle = self::strtocasefold($needle, true, false, $encoding);
8388
8389
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8390
    }
8391
8392
    /**
8393
     * Finds position of last occurrence of a string within another, case insensitive.
8394
     *
8395
     * @param string $haystack <p>
8396
     *                         The string from which to get the position of the last occurrence
8397
     *                         of needle.
8398
     *                         </p>
8399
     * @param string $needle   <p>
8400
     *                         The string to find in haystack.
8401
     *                         </p>
8402
     * @param int    $offset   [optional] <p>
8403
     *                         The position in haystack
8404
     *                         to start searching.
8405
     *                         </p>
8406
     *
8407
     * @return false|int return the numeric position of the last occurrence of needle in the
8408
     *                   haystack string, or false if needle is not found
8409
     */
8410
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8411
    {
8412
        if ($haystack === '' || $needle === '') {
8413
            return false;
8414
        }
8415
8416
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8417
            self::checkForSupport();
8418
        }
8419
8420
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8421
            // "mb_" is available if overload is used, so use it ...
8422
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8423
        }
8424
8425
        return \strripos($haystack, $needle, $offset);
8426
    }
8427
8428
    /**
8429
     * Find position of last occurrence of a string in a string.
8430
     *
8431
     * @see http://php.net/manual/en/function.mb-strrpos.php
8432
     *
8433
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8434
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8435
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8436
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8437
     *                              the end of the string.
8438
     *                              </p>
8439
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8440
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8441
     *
8442
     * @return false|int
8443
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8444
     *                   string.<br>If needle is not found, it returns false.
8445
     */
8446
    public static function strrpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8447
    {
8448 50
        if ($haystack === '') {
8449 3
            return false;
8450
        }
8451
8452
        // iconv and mbstring do not support integer $needle
8453 49
        if ((int) $needle === $needle && $needle >= 0) {
8454 2
            $needle = (string) self::chr($needle);
8455
        }
8456 49
        $needle = (string) $needle;
8457
8458 49
        if ($needle === '') {
8459 2
            return false;
8460
        }
8461
8462 49
        if ($cleanUtf8 === true) {
8463
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8464 4
            $needle = self::clean($needle);
8465 4
            $haystack = self::clean($haystack);
8466
        }
8467
8468 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8469 20
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8470
        }
8471
8472
        //
8473
        // fallback for binary || ascii only
8474
        //
8475
8476
        if (
8477 49
            $encoding === 'CP850'
8478
            ||
8479 49
            $encoding === 'ASCII'
8480
        ) {
8481 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
8482
        }
8483
8484 49
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8485
            self::checkForSupport();
8486
        }
8487
8488
        if (
8489 49
            $encoding !== 'UTF-8'
8490
            &&
8491 49
            self::$SUPPORT['mbstring'] === false
8492
        ) {
8493
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8494
        }
8495
8496
        //
8497
        // fallback via mbstring
8498
        //
8499
8500 49
        if (self::$SUPPORT['mbstring'] === true) {
8501 49
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8502
        }
8503
8504
        //
8505
        // fallback via intl
8506
        //
8507
8508
        if (
8509
            $offset !== null
8510
            &&
8511
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8512
            &&
8513
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8514
            &&
8515
            self::$SUPPORT['intl'] === true
8516
        ) {
8517
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8518
            if ($returnTmp !== false) {
8519
                return $returnTmp;
8520
            }
8521
        }
8522
8523
        //
8524
        // fallback for ascii only
8525
        //
8526
8527
        if (
8528
            $offset !== null
8529
            &&
8530
            self::is_ascii($haystack)
8531
            &&
8532
            self::is_ascii($needle)
8533
        ) {
8534
            return self::strrpos_in_byte($haystack, $needle, $offset);
8535
        }
8536
8537
        //
8538
        // fallback via vanilla php
8539
        //
8540
8541
        $haystackTmp = null;
8542
        if ($offset > 0) {
8543
            $haystackTmp = self::substr($haystack, $offset);
8544
        } elseif ($offset < 0) {
8545
            $haystackTmp = self::substr($haystack, 0, $offset);
8546
            $offset = 0;
8547
        }
8548
8549
        if ($haystackTmp !== null) {
8550
            if ($haystackTmp === false) {
8551
                $haystackTmp = '';
8552
            }
8553
            $haystack = (string) $haystackTmp;
8554
        }
8555
8556
        $pos = self::strrpos_in_byte($haystack, $needle);
8557
        if ($pos === false) {
8558
            return false;
8559
        }
8560
8561
        $strTmp = self::substr_in_byte($haystack, 0, $pos);
8562
        if ($strTmp === false) {
0 ignored issues
show
introduced by
The condition $strTmp === false is always false.
Loading history...
8563
            return false;
8564
        }
8565
8566
        return $offset + (int) self::strlen($strTmp);
8567
    }
8568
8569
    /**
8570
     * Find position of last occurrence of a string in a string.
8571
     *
8572
     * @param string $haystack <p>
8573
     *                         The string being checked, for the last occurrence
8574
     *                         of needle.
8575
     *                         </p>
8576
     * @param string $needle   <p>
8577
     *                         The string to find in haystack.
8578
     *                         </p>
8579
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8580
     *                         the string. Negative values will stop searching at an arbitrary point
8581
     *                         prior to the end of the string.
8582
     *
8583
     * @return false|int The numeric position of the last occurrence of needle in the
8584
     *                   haystack string. If needle is not found, it returns false.
8585
     */
8586
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8587
    {
8588 2
        if ($haystack === '' || $needle === '') {
8589
            return false;
8590
        }
8591
8592 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8593
            self::checkForSupport();
8594
        }
8595
8596 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8597
            // "mb_" is available if overload is used, so use it ...
8598
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8599
        }
8600
8601 2
        return \strrpos($haystack, $needle, $offset);
8602
    }
8603
8604
    /**
8605
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8606
     * mask.
8607
     *
8608
     * @param string $str    <p>The input string.</p>
8609
     * @param string $mask   <p>The mask of chars</p>
8610
     * @param int    $offset [optional]
8611
     * @param int    $length [optional]
8612
     *
8613
     * @return false|int
8614
     */
8615
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null)
8616
    {
8617 10
        if ($offset || $length !== null) {
8618 2
            $str = (string) self::substr($str, $offset, $length);
8619
        }
8620
8621 10
        if ($str === '' || $mask === '') {
8622 2
            return 0;
8623
        }
8624
8625 8
        $matches = [];
8626
8627 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0]) : 0;
8628
    }
8629
8630
    /**
8631
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8632
     *
8633
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8634
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8635
     * @param bool   $before_needle [optional] <p>
8636
     *                              If <b>TRUE</b>, strstr() returns the part of the
8637
     *                              haystack before the first occurrence of the needle (excluding the needle).
8638
     *                              </p>
8639
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8640
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8641
     *
8642
     * @return false|string
8643
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
8644
     */
8645
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8646
    {
8647 5
        if ($haystack === '' || $needle === '') {
8648 2
            return false;
8649
        }
8650
8651 5
        if ($cleanUtf8 === true) {
8652
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8653
            // if invalid characters are found in $haystack before $needle
8654
            $needle = self::clean($needle);
8655
            $haystack = self::clean($haystack);
8656
        }
8657
8658 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8659 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8660
        }
8661
8662
        //
8663
        // fallback for binary || ascii only
8664
        //
8665
8666
        if (
8667 5
            $encoding === 'CP850'
8668
            ||
8669 5
            $encoding === 'ASCII'
8670
        ) {
8671
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8672
        }
8673
8674 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8675
            self::checkForSupport();
8676
        }
8677
8678
        if (
8679 5
            $encoding !== 'UTF-8'
8680
            &&
8681 5
            self::$SUPPORT['mbstring'] === false
8682
        ) {
8683
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8684
        }
8685
8686
        //
8687
        // fallback via mbstring
8688
        //
8689
8690 5
        if (self::$SUPPORT['mbstring'] === true) {
8691 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8692
        }
8693
8694
        //
8695
        // fallback via intl
8696
        //
8697
8698
        if (
8699
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8700
            &&
8701
            self::$SUPPORT['intl'] === true
8702
        ) {
8703
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8704
            if ($returnTmp !== false) {
8705
                return $returnTmp;
8706
            }
8707
        }
8708
8709
        //
8710
        // fallback for ascii only
8711
        //
8712
8713
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8714
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8715
        }
8716
8717
        //
8718
        // fallback via vanilla php
8719
        //
8720
8721
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8722
8723
        if (!isset($match[1])) {
8724
            return false;
8725
        }
8726
8727
        if ($before_needle) {
8728
            return $match[1];
8729
        }
8730
8731
        return self::substr($haystack, (int) self::strlen($match[1]));
8732
    }
8733
8734
    /**
8735
     *  * Finds first occurrence of a string within another.
8736
     *
8737
     * @param string $haystack      <p>
8738
     *                              The string from which to get the first occurrence
8739
     *                              of needle.
8740
     *                              </p>
8741
     * @param string $needle        <p>
8742
     *                              The string to find in haystack.
8743
     *                              </p>
8744
     * @param bool   $before_needle [optional] <p>
8745
     *                              Determines which portion of haystack
8746
     *                              this function returns.
8747
     *                              If set to true, it returns all of haystack
8748
     *                              from the beginning to the first occurrence of needle.
8749
     *                              If set to false, it returns all of haystack
8750
     *                              from the first occurrence of needle to the end,
8751
     *                              </p>
8752
     *
8753
     * @return false|string the portion of haystack,
8754
     *                      or false if needle is not found
8755
     */
8756
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8757
    {
8758
        if ($haystack === '' || $needle === '') {
8759
            return false;
8760
        }
8761
8762
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8763
            self::checkForSupport();
8764
        }
8765
8766
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8767
            // "mb_" is available if overload is used, so use it ...
8768
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8769
        }
8770
8771
        return \strstr($haystack, $needle, $before_needle);
8772
    }
8773
8774
    /**
8775
     * Unicode transformation for case-less matching.
8776
     *
8777
     * @see http://unicode.org/reports/tr21/tr21-5.html
8778
     *
8779
     * @param string      $str       <p>The input string.</p>
8780
     * @param bool        $full      [optional] <p>
8781
     *                               <b>true</b>, replace full case folding chars (default)<br>
8782
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8783
     *                               </p>
8784
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8785
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8786
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8787
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8788
     *                               is for some languages better ...</p>
8789
     *
8790
     * @return string
8791
     */
8792
    public static function strtocasefold(
8793
        string $str,
8794
        bool $full = true,
8795
        bool $cleanUtf8 = false,
8796
        string $encoding = 'UTF-8',
8797
        string $lang = null,
8798
        $lower = true
8799
    ): string {
8800 53
        if ($str === '') {
8801 5
            return '';
8802
        }
8803
8804 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
8805
8806 52
        if ($lower === true) {
8807 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8808
        }
8809
8810 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8811
    }
8812
8813
    /**
8814
     * Make a string lowercase.
8815
     *
8816
     * @see http://php.net/manual/en/function.mb-strtolower.php
8817
     *
8818
     * @param string      $str                   <p>The string being lowercased.</p>
8819
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8820
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8821
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8822
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8823
     *
8824
     * @return string
8825
     *                <p>String with all alphabetic characters converted to lowercase.</p>
8826
     */
8827
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8828
    {
8829
        // init
8830 156
        $str = (string) $str;
8831
8832 156
        if ($str === '') {
8833 12
            return '';
8834
        }
8835
8836 154
        if ($cleanUtf8 === true) {
8837
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8838
            // if invalid characters are found in $haystack before $needle
8839 4
            $str = self::clean($str);
8840
        }
8841
8842 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8843 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8844
        }
8845
8846
        // hack for old php version or for the polyfill ...
8847 154
        if ($tryToKeepStringLength === true) {
8848
            $str = self::fixStrCaseHelper($str, true);
8849
        }
8850
8851 154
        if ($lang !== null) {
8852 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8853
                self::checkForSupport();
8854
            }
8855
8856 2
            if (self::$SUPPORT['intl'] === true) {
8857 2
                $langCode = $lang . '-Lower';
8858 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8859
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
8860
8861
                    $langCode = 'Any-Lower';
8862
                }
8863
8864
                /** @noinspection PhpComposerExtensionStubsInspection */
8865 2
                return \transliterator_transliterate($langCode, $str);
8866
            }
8867
8868
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
8869
        }
8870
8871
        // always fallback via symfony polyfill
8872 154
        return \mb_strtolower($str, $encoding);
8873
    }
8874
8875
    /**
8876
     * Make a string uppercase.
8877
     *
8878
     * @see http://php.net/manual/en/function.mb-strtoupper.php
8879
     *
8880
     * @param string      $str                   <p>The string being uppercased.</p>
8881
     * @param string      $encoding              [optional] <p>Set the charset.</p>
8882
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8883
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8884
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8885
     *
8886
     * @return string
8887
     *                <p>String with all alphabetic characters converted to uppercase.</p>
8888
     */
8889
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8890
    {
8891
        // init
8892 163
        $str = (string) $str;
8893
8894 163
        if ($str === '') {
8895 12
            return '';
8896
        }
8897
8898 161
        if ($cleanUtf8 === true) {
8899
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8900
            // if invalid characters are found in $haystack before $needle
8901 3
            $str = self::clean($str);
8902
        }
8903
8904 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8905 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8906
        }
8907
8908
        // hack for old php version or for the polyfill ...
8909 161
        if ($tryToKeepStringLength === true) {
8910 2
            $str = self::fixStrCaseHelper($str, false);
8911
        }
8912
8913 161
        if ($lang !== null) {
8914 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8915
                self::checkForSupport();
8916
            }
8917
8918 2
            if (self::$SUPPORT['intl'] === true) {
8919 2
                $langCode = $lang . '-Upper';
8920 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8921
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
8922
8923
                    $langCode = 'Any-Upper';
8924
                }
8925
8926
                /** @noinspection PhpComposerExtensionStubsInspection */
8927 2
                return \transliterator_transliterate($langCode, $str);
8928
            }
8929
8930
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
8931
        }
8932
8933
        // always fallback via symfony polyfill
8934 161
        return \mb_strtoupper($str, $encoding);
8935
    }
8936
8937
    /**
8938
     * Translate characters or replace sub-strings.
8939
     *
8940
     * @see  http://php.net/manual/en/function.strtr.php
8941
     *
8942
     * @param string          $str  <p>The string being translated.</p>
8943
     * @param string|string[] $from <p>The string replacing from.</p>
8944
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
8945
     *
8946
     * @return string
8947
     *                This function returns a copy of str, translating all occurrences of each character in from to the
8948
     *                corresponding character in to
8949
     */
8950
    public static function strtr(string $str, $from, $to = ''): string
8951
    {
8952 2
        if ($str === '') {
8953
            return '';
8954
        }
8955
8956 2
        if ($from === $to) {
8957
            return $str;
8958
        }
8959
8960 2
        if ($to !== '') {
8961 2
            $from = self::str_split($from);
8962 2
            $to = self::str_split($to);
8963 2
            $countFrom = \count($from);
8964 2
            $countTo = \count($to);
8965
8966 2
            if ($countFrom > $countTo) {
8967 2
                $from = \array_slice($from, 0, $countTo);
8968 2
            } elseif ($countFrom < $countTo) {
8969 2
                $to = \array_slice($to, 0, $countFrom);
8970
            }
8971
8972 2
            $from = \array_combine($from, $to);
8973 2
            if ($from === false) {
8974
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
8975
            }
8976
        }
8977
8978 2
        if (\is_string($from)) {
8979 2
            return \str_replace($from, '', $str);
8980
        }
8981
8982 2
        return \strtr($str, $from);
8983
    }
8984
8985
    /**
8986
     * Return the width of a string.
8987
     *
8988
     * @param string $str       <p>The input string.</p>
8989
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8990
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8991
     *
8992
     * @return int
8993
     */
8994
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8995
    {
8996 2
        if ($str === '') {
8997 2
            return 0;
8998
        }
8999
9000 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9001 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9002
        }
9003
9004 2
        if ($cleanUtf8 === true) {
9005
            // iconv and mbstring are not tolerant to invalid encoding
9006
            // further, their behaviour is inconsistent with that of PHP's substr
9007 2
            $str = self::clean($str);
9008
        }
9009
9010 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9011
            self::checkForSupport();
9012
        }
9013
9014
        //
9015
        // fallback via mbstring
9016
        //
9017
9018 2
        if (self::$SUPPORT['mbstring'] === true) {
9019 2
            return \mb_strwidth($str, $encoding);
9020
        }
9021
9022
        //
9023
        // fallback via vanilla php
9024
        //
9025
9026
        if ($encoding !== 'UTF-8') {
9027
            $str = self::encode('UTF-8', $str, false, $encoding);
9028
        }
9029
9030
        $wide = 0;
9031
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9032
9033
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
9034
    }
9035
9036
    /**
9037
     * Get part of a string.
9038
     *
9039
     * @see http://php.net/manual/en/function.mb-substr.php
9040
     *
9041
     * @param string $str       <p>The string being checked.</p>
9042
     * @param int    $offset    <p>The first position used in str.</p>
9043
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9044
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9045
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9046
     *
9047
     * @return false|string
9048
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9049
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9050
     *                      characters long, <b>FALSE</b> will be returned.
9051
     */
9052
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9053
    {
9054 421
        if ($str === '') {
9055 26
            return '';
9056
        }
9057
9058
        // Empty string
9059 416
        if ($length === 0) {
9060 20
            return '';
9061
        }
9062
9063 413
        if ($cleanUtf8 === true) {
9064
            // iconv and mbstring are not tolerant to invalid encoding
9065
            // further, their behaviour is inconsistent with that of PHP's substr
9066 2
            $str = self::clean($str);
9067
        }
9068
9069
        // Whole string
9070 413
        if (!$offset && $length === null) {
9071 40
            return $str;
9072
        }
9073
9074 384
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9075 172
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9076
        }
9077
9078 384
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9079
            self::checkForSupport();
9080
        }
9081
9082
        //
9083
        // fallback for binary || ascii only
9084
        //
9085
9086
        if (
9087 384
            $encoding === 'CP850'
9088
            ||
9089 384
            $encoding === 'ASCII'
9090
        ) {
9091 2
            return self::substr_in_byte($str, $offset, $length);
9092
        }
9093
9094
        //
9095
        // fallback via mbstring
9096
        //
9097
9098 382
        if (self::$SUPPORT['mbstring'] === true) {
9099 382
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9100 382
            if ($return !== false) {
9101 382
                return $return;
9102
            }
9103
        }
9104
9105
        // otherwise we need the string-length and can't fake it via "2147483647"
9106 4
        $str_length = 0;
9107 4
        if ($offset || $length === null) {
9108 4
            $str_length = self::strlen($str, $encoding);
9109
        }
9110
9111
        // e.g.: invalid chars + mbstring not installed
9112 4
        if ($str_length === false) {
9113
            return false;
9114
        }
9115
9116
        // Empty string
9117 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9118
            return '';
9119
        }
9120
9121
        // Impossible
9122 4
        if ($offset && $offset > $str_length) {
9123
            // "false" is the php native return type here,
9124
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9125
            return '';
9126
        }
9127
9128 4
        if ($length === null) {
9129 4
            $length = (int) $str_length;
9130
        } else {
9131 2
            $length = (int) $length;
9132
        }
9133
9134
        if (
9135 4
            $encoding !== 'UTF-8'
9136
            &&
9137 4
            self::$SUPPORT['mbstring'] === false
9138
        ) {
9139 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9140
        }
9141
9142
        //
9143
        // fallback via intl
9144
        //
9145
9146
        if (
9147 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9148
            &&
9149 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
9150
            &&
9151 4
            self::$SUPPORT['intl'] === true
9152
        ) {
9153
            $returnTmp = \grapheme_substr($str, $offset, $length);
9154
            if ($returnTmp !== false) {
9155
                return $returnTmp;
9156
            }
9157
        }
9158
9159
        //
9160
        // fallback via iconv
9161
        //
9162
9163
        if (
9164 4
            $length >= 0 // "iconv_substr()" can't handle negative length
9165
            &&
9166 4
            self::$SUPPORT['iconv'] === true
9167
        ) {
9168
            $returnTmp = \iconv_substr($str, $offset, $length);
9169
            if ($returnTmp !== false) {
9170
                return $returnTmp;
9171
            }
9172
        }
9173
9174
        //
9175
        // fallback for ascii only
9176
        //
9177
9178 4
        if (self::is_ascii($str)) {
9179
            return \substr($str, $offset, $length);
9180
        }
9181
9182
        //
9183
        // fallback via vanilla php
9184
        //
9185
9186
        // split to array, and remove invalid characters
9187 4
        $array = self::split($str);
9188
9189
        // extract relevant part, and join to make sting again
9190 4
        return \implode('', \array_slice($array, $offset, $length));
9191
    }
9192
9193
    /**
9194
     * Binary safe comparison of two strings from an offset, up to length characters.
9195
     *
9196
     * @param string   $str1               <p>The main string being compared.</p>
9197
     * @param string   $str2               <p>The secondary string being compared.</p>
9198
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9199
     *                                     counting from the end of the string.</p>
9200
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9201
     *                                     of the length of the str compared to the length of main_str less the
9202
     *                                     offset.</p>
9203
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9204
     *                                     insensitive.</p>
9205
     *
9206
     * @return int
9207
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9208
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9209
     *             <strong>0</strong> if they are equal
9210
     */
9211
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9212
    {
9213
        if (
9214 2
            $offset !== 0
9215
            ||
9216 2
            $length !== null
9217
        ) {
9218 2
            $str1 = (string) self::substr($str1, $offset, $length);
9219 2
            $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1));
9220
        }
9221
9222 2
        if ($case_insensitivity === true) {
9223 2
            return self::strcasecmp($str1, $str2);
9224
        }
9225
9226 2
        return self::strcmp($str1, $str2);
9227
    }
9228
9229
    /**
9230
     * Count the number of substring occurrences.
9231
     *
9232
     * @see  http://php.net/manual/en/function.substr-count.php
9233
     *
9234
     * @param string $haystack  <p>The string to search in.</p>
9235
     * @param string $needle    <p>The substring to search for.</p>
9236
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
9237
     * @param int    $length    [optional] <p>
9238
     *                          The maximum length after the specified offset to search for the
9239
     *                          substring. It outputs a warning if the offset plus the length is
9240
     *                          greater than the haystack length.
9241
     *                          </p>
9242
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9243
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9244
     *
9245
     * @return false|int this functions returns an integer or false if there isn't a string
9246
     */
9247
    public static function substr_count(
9248
        string $haystack,
9249
        string $needle,
9250
        int $offset = 0,
9251
        int $length = null,
9252
        string $encoding = 'UTF-8',
9253
        bool $cleanUtf8 = false
9254
    ) {
9255 18
        if ($haystack === '' || $needle === '') {
9256 2
            return false;
9257
        }
9258
9259 18
        if ($offset || $length !== null) {
9260 2
            if ($length === null) {
9261 2
                $lengthTmp = self::strlen($haystack);
9262 2
                if ($lengthTmp === false) {
9263
                    return false;
9264
                }
9265 2
                $length = (int) $lengthTmp;
9266
            }
9267
9268
            if (
9269
                (
9270 2
                    $length !== 0
9271
                    &&
9272 2
                    $offset !== 0
9273
                )
9274
                &&
9275 2
                ($length + $offset) <= 0
9276
                &&
9277 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9278
            ) {
9279 2
                return false;
9280
            }
9281
9282 2
            $haystack = (string) self::substr($haystack, $offset, $length, $encoding);
9283
        }
9284
9285 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9286 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9287
        }
9288
9289 18
        if ($cleanUtf8 === true) {
9290
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9291
            // if invalid characters are found in $haystack before $needle
9292
            $needle = self::clean($needle);
9293
            $haystack = self::clean($haystack);
9294
        }
9295
9296 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9297
            self::checkForSupport();
9298
        }
9299
9300
        if (
9301 18
            $encoding !== 'UTF-8'
9302
            &&
9303 18
            self::$SUPPORT['mbstring'] === false
9304
        ) {
9305
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9306
        }
9307
9308 18
        if (self::$SUPPORT['mbstring'] === true) {
9309 18
            return \mb_substr_count($haystack, $needle, $encoding);
9310
        }
9311
9312
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9313
9314
        return \count($matches);
9315
    }
9316
9317
    /**
9318
     * Count the number of substring occurrences.
9319
     *
9320
     * @param string $haystack <p>
9321
     *                         The string being checked.
9322
     *                         </p>
9323
     * @param string $needle   <p>
9324
     *                         The string being found.
9325
     *                         </p>
9326
     * @param int    $offset   [optional] <p>
9327
     *                         The offset where to start counting
9328
     *                         </p>
9329
     * @param int    $length   [optional] <p>
9330
     *                         The maximum length after the specified offset to search for the
9331
     *                         substring. It outputs a warning if the offset plus the length is
9332
     *                         greater than the haystack length.
9333
     *                         </p>
9334
     *
9335
     * @return false|int the number of times the
9336
     *                   needle substring occurs in the
9337
     *                   haystack string
9338
     */
9339
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9340
    {
9341 36
        if ($haystack === '' || $needle === '') {
9342
            return 0;
9343
        }
9344
9345 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9346
            self::checkForSupport();
9347
        }
9348
9349
        if (
9350 36
            ($offset || $length !== null)
9351
            &&
9352 36
            self::$SUPPORT['mbstring_func_overload'] === true
9353
        ) {
9354
            if ($length === null) {
9355
                $lengthTmp = self::strlen($haystack);
9356
                if ($lengthTmp === false) {
9357
                    return false;
9358
                }
9359
                $length = (int) $lengthTmp;
9360
            }
9361
9362
            if (
9363
                (
9364
                    $length !== 0
9365
                    &&
9366
                    $offset !== 0
9367
                )
9368
                &&
9369
                ($length + $offset) <= 0
9370
                &&
9371
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9372
            ) {
9373
                return false;
9374
            }
9375
9376
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9377
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9378
                $haystackTmp = '';
9379
            }
9380
            $haystack = (string) $haystackTmp;
9381
        }
9382
9383 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9384
            // "mb_" is available if overload is used, so use it ...
9385
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9386
        }
9387
9388 36
        if ($length === null) {
9389
            return \substr_count($haystack, $needle, $offset);
9390
        }
9391
9392 36
        return \substr_count($haystack, $needle, $offset, $length);
9393
    }
9394
9395
    /**
9396
     * Returns the number of occurrences of $substring in the given string.
9397
     * By default, the comparison is case-sensitive, but can be made insensitive
9398
     * by setting $caseSensitive to false.
9399
     *
9400
     * @param string $str           <p>The input string.</p>
9401
     * @param string $substring     <p>The substring to search for.</p>
9402
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9403
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9404
     *
9405
     * @return int
9406
     */
9407
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9408
    {
9409 15
        if ($str === '' || $substring === '') {
9410 2
            return 0;
9411
        }
9412
9413
        // only a fallback to prevent BC in the api ...
9414
        /** @psalm-suppress RedundantConditionGivenDocblockType */
9415 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9416 4
            $encoding = (string) $caseSensitive;
9417
        }
9418
9419 13
        if (!$caseSensitive) {
9420 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9421 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9422
        }
9423
9424 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9425
    }
9426
9427
    /**
9428
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9429
     *
9430
     * @param string $haystack <p>The string to search in.</p>
9431
     * @param string $needle   <p>The substring to search for.</p>
9432
     *
9433
     * @return string return the sub-string
9434
     */
9435
    public static function substr_ileft(string $haystack, string $needle): string
9436
    {
9437 2
        if ($haystack === '') {
9438 2
            return '';
9439
        }
9440
9441 2
        if ($needle === '') {
9442 2
            return $haystack;
9443
        }
9444
9445 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9446 2
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9447
        }
9448
9449 2
        return $haystack;
9450
    }
9451
9452
    /**
9453
     * Get part of a string process in bytes.
9454
     *
9455
     * @param string $str    <p>The string being checked.</p>
9456
     * @param int    $offset <p>The first position used in str.</p>
9457
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9458
     *
9459
     * @return false|string
9460
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9461
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9462
     *                      characters long, <b>FALSE</b> will be returned.
9463
     */
9464
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9465
    {
9466 51
        if ($str === '') {
9467
            return '';
9468
        }
9469
9470
        // Empty string
9471 51
        if ($length === 0) {
9472
            return '';
9473
        }
9474
9475
        // Whole string
9476 51
        if (!$offset && $length === null) {
9477
            return $str;
9478
        }
9479
9480 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9481
            self::checkForSupport();
9482
        }
9483
9484 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9485
            // "mb_" is available if overload is used, so use it ...
9486
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9487
        }
9488
9489 51
        return \substr($str, $offset, $length ?? 2147483647);
9490
    }
9491
9492
    /**
9493
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9494
     *
9495
     * @param string $haystack <p>The string to search in.</p>
9496
     * @param string $needle   <p>The substring to search for.</p>
9497
     *
9498
     * @return string return the sub-string
9499
     */
9500
    public static function substr_iright(string $haystack, string $needle): string
9501
    {
9502 2
        if ($haystack === '') {
9503 2
            return '';
9504
        }
9505
9506 2
        if ($needle === '') {
9507 2
            return $haystack;
9508
        }
9509
9510 2
        if (self::str_iends_with($haystack, $needle) === true) {
9511 2
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9512
        }
9513
9514 2
        return $haystack;
9515
    }
9516
9517
    /**
9518
     * Removes an prefix ($needle) from start of the string ($haystack).
9519
     *
9520
     * @param string $haystack <p>The string to search in.</p>
9521
     * @param string $needle   <p>The substring to search for.</p>
9522
     *
9523
     * @return string return the sub-string
9524
     */
9525
    public static function substr_left(string $haystack, string $needle): string
9526
    {
9527 2
        if ($haystack === '') {
9528 2
            return '';
9529
        }
9530
9531 2
        if ($needle === '') {
9532 2
            return $haystack;
9533
        }
9534
9535 2
        if (self::str_starts_with($haystack, $needle) === true) {
9536 2
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9537
        }
9538
9539 2
        return $haystack;
9540
    }
9541
9542
    /**
9543
     * Replace text within a portion of a string.
9544
     *
9545
     * source: https://gist.github.com/stemar/8287074
9546
     *
9547
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
9548
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
9549
     * @param int|int[]       $offset      <p>
9550
     *                                     If start is positive, the replacing will begin at the start'th offset
9551
     *                                     into string.
9552
     *                                     <br><br>
9553
     *                                     If start is negative, the replacing will begin at the start'th character
9554
     *                                     from the end of string.
9555
     *                                     </p>
9556
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
9557
     *                                     portion of string which is to be replaced. If it is negative, it
9558
     *                                     represents the number of characters from the end of string at which to
9559
     *                                     stop replacing. If it is not given, then it will default to strlen(
9560
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
9561
     *                                     length is zero then this function will have the effect of inserting
9562
     *                                     replacement into string at the given start offset.</p>
9563
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
9564
     *
9565
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9566
     */
9567
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9568
    {
9569 10
        if (\is_array($str) === true) {
9570 1
            $num = \count($str);
9571
9572
            // the replacement
9573 1
            if (\is_array($replacement) === true) {
9574 1
                $replacement = \array_slice($replacement, 0, $num);
9575
            } else {
9576 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9577
            }
9578
9579
            // the offset
9580 1
            if (\is_array($offset) === true) {
9581 1
                $offset = \array_slice($offset, 0, $num);
9582 1
                foreach ($offset as &$valueTmp) {
9583 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9584
                }
9585 1
                unset($valueTmp);
9586
            } else {
9587 1
                $offset = \array_pad([$offset], $num, $offset);
9588
            }
9589
9590
            // the length
9591 1
            if ($length === null) {
9592 1
                $length = \array_fill(0, $num, 0);
9593 1
            } elseif (\is_array($length) === true) {
9594 1
                $length = \array_slice($length, 0, $num);
9595 1
                foreach ($length as &$valueTmpV2) {
9596 1
                    if ($valueTmpV2 !== null) {
9597 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9598
                    } else {
9599 1
                        $valueTmpV2 = 0;
9600
                    }
9601
                }
9602 1
                unset($valueTmpV2);
9603
            } else {
9604 1
                $length = \array_pad([$length], $num, $length);
9605
            }
9606
9607
            // recursive call
9608 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9609
        }
9610
9611 10
        if (\is_array($replacement) === true) {
9612 1
            if (\count($replacement) > 0) {
9613 1
                $replacement = $replacement[0];
9614
            } else {
9615 1
                $replacement = '';
9616
            }
9617
        }
9618
9619
        // init
9620 10
        $str = (string) $str;
9621 10
        $replacement = (string) $replacement;
9622
9623 10
        if (\is_array($length) === true) {
9624
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
9625
        }
9626
9627 10
        if (\is_array($offset) === true) {
9628
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
9629
        }
9630
9631 10
        if ($str === '') {
9632 1
            return $replacement;
9633
        }
9634
9635 9
        if (self::is_ascii($str)) {
9636 6
            return ($length === null) ?
9637
                \substr_replace($str, $replacement, $offset) :
9638 6
                \substr_replace($str, $replacement, $offset, $length);
9639
        }
9640
9641 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9642
            self::checkForSupport();
9643
        }
9644
9645 8
        if (self::$SUPPORT['mbstring'] === true) {
9646 8
            $string_length = (int) self::strlen($str, $encoding);
9647
9648 8
            if ($offset < 0) {
9649 1
                $offset = (int) \max(0, $string_length + $offset);
9650 8
            } elseif ($offset > $string_length) {
9651
                $offset = $string_length;
9652
            }
9653
9654 8
            if ($length !== null && $length < 0) {
9655 1
                $length = (int) \max(0, $string_length - $offset + $length);
9656 8
            } elseif ($length === null || $length > $string_length) {
9657 3
                $length = $string_length;
9658
            }
9659
9660
            /** @noinspection AdditionOperationOnArraysInspection */
9661 8
            if (($offset + $length) > $string_length) {
9662 3
                $length = $string_length - $offset;
9663
            }
9664
9665
            /** @noinspection AdditionOperationOnArraysInspection */
9666 8
            return (string) self::substr($str, 0, $offset, $encoding) .
9667 8
                   $replacement .
9668 8
                   (string) self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
9669
        }
9670
9671
        \preg_match_all('/./us', $str, $smatches);
9672
        \preg_match_all('/./us', $replacement, $rmatches);
9673
9674
        if ($length === null) {
9675
            $lengthTmp = self::strlen($str, $encoding);
9676
            if ($lengthTmp === false) {
9677
                // e.g.: non mbstring support + invalid chars
9678
                return '';
9679
            }
9680
            $length = (int) $lengthTmp;
9681
        }
9682
9683
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
9684
9685
        return \implode('', $smatches[0]);
9686
    }
9687
9688
    /**
9689
     * Removes an suffix ($needle) from end of the string ($haystack).
9690
     *
9691
     * @param string $haystack <p>The string to search in.</p>
9692
     * @param string $needle   <p>The substring to search for.</p>
9693
     *
9694
     * @return string return the sub-string
9695
     */
9696
    public static function substr_right(string $haystack, string $needle): string
9697
    {
9698 2
        if ($haystack === '') {
9699 2
            return '';
9700
        }
9701
9702 2
        if ($needle === '') {
9703 2
            return $haystack;
9704
        }
9705
9706 2
        if (self::str_ends_with($haystack, $needle) === true) {
9707 2
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9708
        }
9709
9710 2
        return $haystack;
9711
    }
9712
9713
    /**
9714
     * Returns a case swapped version of the string.
9715
     *
9716
     * @param string $str       <p>The input string.</p>
9717
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9718
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9719
     *
9720
     * @return string each character's case swapped
9721
     */
9722
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9723
    {
9724 6
        if ($str === '') {
9725 1
            return '';
9726
        }
9727
9728 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9729 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9730
        }
9731
9732 6
        if ($cleanUtf8 === true) {
9733
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9734
            // if invalid characters are found in $haystack before $needle
9735 2
            $str = self::clean($str);
9736
        }
9737
9738 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9739
    }
9740
9741
    /**
9742
     * Checks whether symfony-polyfills are used.
9743
     *
9744
     * @return bool
9745
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
9746
     */
9747
    public static function symfony_polyfill_used(): bool
9748
    {
9749
        // init
9750
        $return = false;
9751
9752
        $returnTmp = \extension_loaded('mbstring');
9753
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9754
            $return = true;
9755
        }
9756
9757
        $returnTmp = \extension_loaded('iconv');
9758
        if ($returnTmp === false && \function_exists('iconv')) {
9759
            $return = true;
9760
        }
9761
9762
        return $return;
9763
    }
9764
9765
    /**
9766
     * @param string $str
9767
     * @param int    $tabLength
9768
     *
9769
     * @return string
9770
     */
9771
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9772
    {
9773 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9774
    }
9775
9776
    /**
9777
     * Converts the first character of each word in the string to uppercase
9778
     * and all other chars to lowercase.
9779
     *
9780
     * @param string      $str                   <p>The input string.</p>
9781
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9782
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9783
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9784
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9785
     *
9786
     * @return string string with all characters of $str being title-cased
9787
     */
9788
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9789
    {
9790 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9791 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9792
        }
9793
9794 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9795
    }
9796
9797
    /**
9798
     * alias for "UTF8::to_ascii()"
9799
     *
9800
     * @see        UTF8::to_ascii()
9801
     *
9802
     * @param string $str
9803
     * @param string $subst_chr
9804
     * @param bool   $strict
9805
     *
9806
     * @return string
9807
     *
9808
     * @deprecated <p>use "UTF8::to_ascii()"</p>
9809
     */
9810
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9811
    {
9812 7
        return self::to_ascii($str, $subst_chr, $strict);
9813
    }
9814
9815
    /**
9816
     * alias for "UTF8::to_iso8859()"
9817
     *
9818
     * @see        UTF8::to_iso8859()
9819
     *
9820
     * @param string|string[] $str
9821
     *
9822
     * @return string|string[]
9823
     *
9824
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
9825
     */
9826
    public static function toIso8859($str)
9827
    {
9828 2
        return self::to_iso8859($str);
9829
    }
9830
9831
    /**
9832
     * alias for "UTF8::to_latin1()"
9833
     *
9834
     * @see        UTF8::to_latin1()
9835
     *
9836
     * @param string|string[] $str
9837
     *
9838
     * @return string|string[]
9839
     *
9840
     * @deprecated <p>use "UTF8::to_latin1()"</p>
9841
     */
9842
    public static function toLatin1($str)
9843
    {
9844 2
        return self::to_latin1($str);
9845
    }
9846
9847
    /**
9848
     * alias for "UTF8::to_utf8()"
9849
     *
9850
     * @see        UTF8::to_utf8()
9851
     *
9852
     * @param string|string[] $str
9853
     *
9854
     * @return string|string[]
9855
     *
9856
     * @deprecated <p>use "UTF8::to_utf8()"</p>
9857
     */
9858
    public static function toUTF8($str)
9859
    {
9860 2
        return self::to_utf8($str);
9861
    }
9862
9863
    /**
9864
     * Convert a string into ASCII.
9865
     *
9866
     * @param string $str     <p>The input string.</p>
9867
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9868
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9869
     *                        performance</p>
9870
     *
9871
     * @return string
9872
     */
9873
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9874
    {
9875 38
        static $UTF8_TO_ASCII;
9876
9877 38
        if ($str === '') {
9878 3
            return '';
9879
        }
9880
9881
        // check if we only have ASCII, first (better performance)
9882 35
        if (self::is_ascii($str) === true) {
9883 9
            return $str;
9884
        }
9885
9886 28
        $str = self::clean(
9887 28
            $str,
9888 28
            true,
9889 28
            true,
9890 28
            true,
9891 28
            false,
9892 28
            true,
9893 28
            true
9894
        );
9895
9896
        // check again, if we only have ASCII, now ...
9897 28
        if (self::is_ascii($str) === true) {
9898 10
            return $str;
9899
        }
9900
9901 19
        if ($strict === true) {
9902 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9903
                self::checkForSupport();
9904
            }
9905
9906 1
            if (self::$SUPPORT['intl'] === true) {
9907
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9908
                /** @noinspection PhpComposerExtensionStubsInspection */
9909 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9910
9911
                // check again, if we only have ASCII, now ...
9912 1
                if (self::is_ascii($str) === true) {
9913 1
                    return $str;
9914
                }
9915
            }
9916
        }
9917
9918 19
        if (self::$ORD === null) {
9919
            self::$ORD = self::getData('ord');
9920
        }
9921
9922 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9923 19
        $chars = $ar[0];
9924 19
        $ord = null;
9925 19
        foreach ($chars as &$c) {
9926 19
            $ordC0 = self::$ORD[$c[0]];
9927
9928 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
9929 15
                continue;
9930
            }
9931
9932 19
            $ordC1 = self::$ORD[$c[1]];
9933
9934
            // ASCII - next please
9935 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
9936 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9937
            }
9938
9939 19
            if ($ordC0 >= 224) {
9940 8
                $ordC2 = self::$ORD[$c[2]];
9941
9942 8
                if ($ordC0 <= 239) {
9943 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9944
                }
9945
9946 8
                if ($ordC0 >= 240) {
9947 2
                    $ordC3 = self::$ORD[$c[3]];
9948
9949 2
                    if ($ordC0 <= 247) {
9950 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9951
                    }
9952
9953 2
                    if ($ordC0 >= 248) {
9954
                        $ordC4 = self::$ORD[$c[4]];
9955
9956
                        if ($ordC0 <= 251) {
9957
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9958
                        }
9959
9960
                        if ($ordC0 >= 252) {
9961
                            $ordC5 = self::$ORD[$c[5]];
9962
9963
                            if ($ordC0 <= 253) {
9964
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9965
                            }
9966
                        }
9967
                    }
9968
                }
9969
            }
9970
9971 19
            if ($ordC0 === 254 || $ordC0 === 255) {
9972
                $c = $unknown;
9973
9974
                continue;
9975
            }
9976
9977 19
            if ($ord === null) {
9978
                $c = $unknown;
9979
9980
                continue;
9981
            }
9982
9983 19
            $bank = $ord >> 8;
9984 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
9985 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
9986 9
                if ($UTF8_TO_ASCII[$bank] === false) {
9987 2
                    $UTF8_TO_ASCII[$bank] = [];
9988
                }
9989
            }
9990
9991 19
            $newchar = $ord & 255;
9992
9993
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
9994 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9995
9996
                // keep for debugging
9997
                /*
9998
                echo "file: " . sprintf('x%02x', $bank) . "\n";
9999
                echo "char: " . $c . "\n";
10000
                echo "ord: " . $ord . "\n";
10001
                echo "newchar: " . $newchar . "\n";
10002
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10003
                echo "bank:" . $bank . "\n\n";
10004
                 */
10005
10006 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
10007
            } else {
10008
10009
                // keep for debugging missing chars
10010
                /*
10011
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10012
                echo "char: " . $c . "\n";
10013
                echo "ord: " . $ord . "\n";
10014
                echo "newchar: " . $newchar . "\n";
10015
                echo "bank:" . $bank . "\n\n";
10016
                 */
10017
10018 19
                $c = $unknown;
10019
            }
10020
        }
10021
10022 19
        return \implode('', $chars);
10023
    }
10024
10025
    /**
10026
     * @param mixed $str
10027
     *
10028
     * @return bool
10029
     */
10030
    public static function to_boolean($str): bool
10031
    {
10032
        // init
10033 19
        $str = (string) $str;
10034
10035 19
        if ($str === '') {
10036 2
            return false;
10037
        }
10038
10039 17
        $key = \strtolower($str);
10040
10041
        // Info: http://php.net/manual/en/filter.filters.validate.php
10042
        $map = [
10043 17
            'true'  => true,
10044
            '1'     => true,
10045
            'on'    => true,
10046
            'yes'   => true,
10047
            'false' => false,
10048
            '0'     => false,
10049
            'off'   => false,
10050
            'no'    => false,
10051
        ];
10052
10053 17
        if (isset($map[$key])) {
10054 13
            return $map[$key];
10055
        }
10056
10057
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10058 4
        if (\is_numeric($str)) {
10059 2
            return ((float) $str + 0) > 0;
10060
        }
10061
10062 2
        return (bool) self::trim($str);
10063
    }
10064
10065
    /**
10066
     * Convert given string to safe filename (and keep string case).
10067
     *
10068
     * @param string $string
10069
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10070
     *                                  simply replaced with hyphen.
10071
     * @param string $fallback_char
10072
     *
10073
     * @return string
10074
     */
10075
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10076
    {
10077 1
        if ($use_transliterate === true) {
10078 1
            $string = self::str_transliterate($string, $fallback_char);
10079
        }
10080
10081 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
10082
10083 1
        $string = (string) \preg_replace(
10084
            [
10085 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10086 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10087 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10088
            ],
10089
            [
10090 1
                '',
10091 1
                $fallback_char,
10092 1
                $fallback_char,
10093
            ],
10094 1
            $string
10095
        );
10096
10097
        // trim "$fallback_char" from beginning and end of the string
10098 1
        return \trim($string, $fallback_char);
10099
    }
10100
10101
    /**
10102
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10103
     *
10104
     * @param string|string[] $str
10105
     *
10106
     * @return string|string[]
10107
     */
10108
    public static function to_iso8859($str)
10109
    {
10110 7
        if (\is_array($str) === true) {
10111 2
            foreach ($str as $k => &$v) {
10112 2
                $v = self::to_iso8859($v);
10113
            }
10114
10115 2
            return $str;
10116
        }
10117
10118 7
        $str = (string) $str;
10119 7
        if ($str === '') {
10120 2
            return '';
10121
        }
10122
10123 7
        return self::utf8_decode($str);
10124
    }
10125
10126
    /**
10127
     * alias for "UTF8::to_iso8859()"
10128
     *
10129
     * @see UTF8::to_iso8859()
10130
     *
10131
     * @param string|string[] $str
10132
     *
10133
     * @return string|string[]
10134
     */
10135
    public static function to_latin1($str)
10136
    {
10137 2
        return self::to_iso8859($str);
10138
    }
10139
10140
    /**
10141
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10142
     *
10143
     * <ul>
10144
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10145
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10146
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10147
     * case.</li>
10148
     * </ul>
10149
     *
10150
     * @param string|string[] $str                    <p>Any string or array.</p>
10151
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10152
     *
10153
     * @return string|string[] the UTF-8 encoded string
10154
     */
10155
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10156
    {
10157 37
        if (\is_array($str) === true) {
10158 4
            foreach ($str as $k => &$v) {
10159 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10160
            }
10161
10162 4
            return $str;
10163
        }
10164
10165 37
        $str = (string) $str;
10166 37
        if ($str === '') {
10167 6
            return $str;
10168
        }
10169
10170 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10171
            self::checkForSupport();
10172
        }
10173
10174 37
        $max = self::strlen_in_byte($str);
10175 37
        $buf = '';
10176
10177 37
        for ($i = 0; $i < $max; ++$i) {
10178 37
            $c1 = $str[$i];
10179
10180 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10181
10182 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10183
10184 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10185
10186 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10187 17
                        $buf .= $c1 . $c2;
10188 17
                        ++$i;
10189
                    } else { // not valid UTF8 - convert it
10190 31
                        $buf .= self::to_utf8_convert_helper($c1);
10191
                    }
10192 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10193
10194 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10195 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10196
10197 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10198 14
                        $buf .= $c1 . $c2 . $c3;
10199 14
                        $i += 2;
10200
                    } else { // not valid UTF8 - convert it
10201 32
                        $buf .= self::to_utf8_convert_helper($c1);
10202
                    }
10203 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10204
10205 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10206 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10207 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10208
10209 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10210 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10211 8
                        $i += 3;
10212
                    } else { // not valid UTF8 - convert it
10213 26
                        $buf .= self::to_utf8_convert_helper($c1);
10214
                    }
10215
                } else { // doesn't look like UTF8, but should be converted
10216 34
                    $buf .= self::to_utf8_convert_helper($c1);
10217
                }
10218 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10219
10220 4
                $buf .= self::to_utf8_convert_helper($c1);
10221
            } else { // it doesn't need conversion
10222 34
                $buf .= $c1;
10223
            }
10224
        }
10225
10226
        // decode unicode escape sequences
10227 37
        $buf = \preg_replace_callback(
10228 37
            '/\\\\u([0-9a-f]{4})/i',
10229
            /**
10230
             * @param array $match
10231
             *
10232
             * @return string
10233
             */
10234
            static function (array $match): string {
10235
                // always fallback via symfony polyfill
10236 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10237 37
            },
10238 37
            $buf
10239
        );
10240
10241 37
        if ($buf === null) {
10242
            return '';
10243
        }
10244
10245
        // decode UTF-8 codepoints
10246 37
        if ($decodeHtmlEntityToUtf8 === true) {
10247 2
            $buf = self::html_entity_decode($buf);
10248
        }
10249
10250 37
        return $buf;
10251
    }
10252
10253
    /**
10254
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10255
     *
10256
     * INFO: This is slower then "trim()"
10257
     *
10258
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10259
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10260
     *
10261
     * @param string $str   <p>The string to be trimmed</p>
10262
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10263
     *
10264
     * @return string the trimmed string
10265
     */
10266
    public static function trim(string $str = '', $chars = \INF): string
10267
    {
10268 214
        if ($str === '') {
10269 11
            return '';
10270
        }
10271
10272
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10273 206
        if ($chars === \INF || !$chars) {
10274 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10275
        } else {
10276 47
            $chars = \preg_quote($chars, '/');
10277 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10278
        }
10279
10280 206
        return self::regex_replace($str, $pattern, '', '', '/');
10281
    }
10282
10283
    /**
10284
     * Makes string's first char uppercase.
10285
     *
10286
     * @param string      $str                   <p>The input string.</p>
10287
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10288
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10289
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10290
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10291
     *
10292
     * @return string the resulting string
10293
     */
10294
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10295
    {
10296 79
        if ($cleanUtf8 === true) {
10297
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10298
            // if invalid characters are found in $haystack before $needle
10299 1
            $str = self::clean($str);
10300
        }
10301
10302 79
        $strPartTwo = (string) self::substr($str, 1, null, $encoding);
10303
10304 79
        $strPartOne = self::strtoupper(
10305 79
            (string) self::substr($str, 0, 1, $encoding),
10306 79
            $encoding,
10307 79
            $cleanUtf8,
10308 79
            $lang,
10309 79
            $tryToKeepStringLength
10310
        );
10311
10312 79
        return $strPartOne . $strPartTwo;
10313
    }
10314
10315
    /**
10316
     * alias for "UTF8::ucfirst()"
10317
     *
10318
     * @see UTF8::ucfirst()
10319
     *
10320
     * @param string $str
10321
     * @param string $encoding
10322
     * @param bool   $cleanUtf8
10323
     *
10324
     * @return string
10325
     */
10326
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10327
    {
10328 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10329
    }
10330
10331
    /**
10332
     * Uppercase for all words in the string.
10333
     *
10334
     * @param string   $str        <p>The input string.</p>
10335
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10336
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10337
     *                             word.</p>
10338
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10339
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10340
     *
10341
     * @return string
10342
     */
10343
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10344
    {
10345 8
        if (!$str) {
10346 2
            return '';
10347
        }
10348
10349
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10350
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10351
10352 7
        if ($cleanUtf8 === true) {
10353
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10354
            // if invalid characters are found in $haystack before $needle
10355 1
            $str = self::clean($str);
10356
        }
10357
10358 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10359
10360
        if (
10361 7
            $usePhpDefaultFunctions === true
10362
            &&
10363 7
            self::is_ascii($str) === true
10364
        ) {
10365
            return \ucwords($str);
10366
        }
10367
10368 7
        $words = self::str_to_words($str, $charlist);
10369 7
        $useExceptions = \count($exceptions) > 0;
10370
10371 7
        foreach ($words as &$word) {
10372 7
            if (!$word) {
10373 7
                continue;
10374
            }
10375
10376
            if (
10377 7
                $useExceptions === false
10378
                ||
10379 7
                !\in_array($word, $exceptions, true)
10380
            ) {
10381 7
                $word = self::ucfirst($word, $encoding);
10382
            }
10383
        }
10384
10385 7
        return \implode('', $words);
10386
    }
10387
10388
    /**
10389
     * Multi decode html entity & fix urlencoded-win1252-chars.
10390
     *
10391
     * e.g:
10392
     * 'test+test'                     => 'test test'
10393
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10394
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10395
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10396
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10397
     * 'Düsseldorf'                   => 'Düsseldorf'
10398
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10399
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10400
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10401
     *
10402
     * @param string $str          <p>The input string.</p>
10403
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10404
     *
10405
     * @return string
10406
     */
10407
    public static function urldecode(string $str, bool $multi_decode = true): string
10408
    {
10409 2
        if ($str === '') {
10410 2
            return '';
10411
        }
10412
10413 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10414 2
        if (\preg_match($pattern, $str)) {
10415 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10416
        }
10417
10418 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10419
10420 2
        if ($multi_decode === true) {
10421
            do {
10422 2
                $str_compare = $str;
10423
10424
                /**
10425
                 * @psalm-suppress PossiblyInvalidArgument
10426
                 */
10427 2
                $str = self::fix_simple_utf8(
10428 2
                    \urldecode(
10429 2
                        self::html_entity_decode(
10430 2
                            self::to_utf8($str),
10431 2
                            $flags
10432
                        )
10433
                    )
10434
                );
10435 2
            } while ($str_compare !== $str);
10436
        }
10437
10438 2
        return $str;
10439
    }
10440
10441
    /**
10442
     * Return a array with "urlencoded"-win1252 -> UTF-8
10443
     *
10444
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10445
     *
10446
     * @return string[]
10447
     */
10448
    public static function urldecode_fix_win1252_chars(): array
10449
    {
10450
        return [
10451 2
            '%20' => ' ',
10452
            '%21' => '!',
10453
            '%22' => '"',
10454
            '%23' => '#',
10455
            '%24' => '$',
10456
            '%25' => '%',
10457
            '%26' => '&',
10458
            '%27' => "'",
10459
            '%28' => '(',
10460
            '%29' => ')',
10461
            '%2A' => '*',
10462
            '%2B' => '+',
10463
            '%2C' => ',',
10464
            '%2D' => '-',
10465
            '%2E' => '.',
10466
            '%2F' => '/',
10467
            '%30' => '0',
10468
            '%31' => '1',
10469
            '%32' => '2',
10470
            '%33' => '3',
10471
            '%34' => '4',
10472
            '%35' => '5',
10473
            '%36' => '6',
10474
            '%37' => '7',
10475
            '%38' => '8',
10476
            '%39' => '9',
10477
            '%3A' => ':',
10478
            '%3B' => ';',
10479
            '%3C' => '<',
10480
            '%3D' => '=',
10481
            '%3E' => '>',
10482
            '%3F' => '?',
10483
            '%40' => '@',
10484
            '%41' => 'A',
10485
            '%42' => 'B',
10486
            '%43' => 'C',
10487
            '%44' => 'D',
10488
            '%45' => 'E',
10489
            '%46' => 'F',
10490
            '%47' => 'G',
10491
            '%48' => 'H',
10492
            '%49' => 'I',
10493
            '%4A' => 'J',
10494
            '%4B' => 'K',
10495
            '%4C' => 'L',
10496
            '%4D' => 'M',
10497
            '%4E' => 'N',
10498
            '%4F' => 'O',
10499
            '%50' => 'P',
10500
            '%51' => 'Q',
10501
            '%52' => 'R',
10502
            '%53' => 'S',
10503
            '%54' => 'T',
10504
            '%55' => 'U',
10505
            '%56' => 'V',
10506
            '%57' => 'W',
10507
            '%58' => 'X',
10508
            '%59' => 'Y',
10509
            '%5A' => 'Z',
10510
            '%5B' => '[',
10511
            '%5C' => '\\',
10512
            '%5D' => ']',
10513
            '%5E' => '^',
10514
            '%5F' => '_',
10515
            '%60' => '`',
10516
            '%61' => 'a',
10517
            '%62' => 'b',
10518
            '%63' => 'c',
10519
            '%64' => 'd',
10520
            '%65' => 'e',
10521
            '%66' => 'f',
10522
            '%67' => 'g',
10523
            '%68' => 'h',
10524
            '%69' => 'i',
10525
            '%6A' => 'j',
10526
            '%6B' => 'k',
10527
            '%6C' => 'l',
10528
            '%6D' => 'm',
10529
            '%6E' => 'n',
10530
            '%6F' => 'o',
10531
            '%70' => 'p',
10532
            '%71' => 'q',
10533
            '%72' => 'r',
10534
            '%73' => 's',
10535
            '%74' => 't',
10536
            '%75' => 'u',
10537
            '%76' => 'v',
10538
            '%77' => 'w',
10539
            '%78' => 'x',
10540
            '%79' => 'y',
10541
            '%7A' => 'z',
10542
            '%7B' => '{',
10543
            '%7C' => '|',
10544
            '%7D' => '}',
10545
            '%7E' => '~',
10546
            '%7F' => '',
10547
            '%80' => '`',
10548
            '%81' => '',
10549
            '%82' => '‚',
10550
            '%83' => 'ƒ',
10551
            '%84' => '„',
10552
            '%85' => '…',
10553
            '%86' => '†',
10554
            '%87' => '‡',
10555
            '%88' => 'ˆ',
10556
            '%89' => '‰',
10557
            '%8A' => 'Š',
10558
            '%8B' => '‹',
10559
            '%8C' => 'Œ',
10560
            '%8D' => '',
10561
            '%8E' => 'Ž',
10562
            '%8F' => '',
10563
            '%90' => '',
10564
            '%91' => '‘',
10565
            '%92' => '’',
10566
            '%93' => '“',
10567
            '%94' => '”',
10568
            '%95' => '•',
10569
            '%96' => '–',
10570
            '%97' => '—',
10571
            '%98' => '˜',
10572
            '%99' => '™',
10573
            '%9A' => 'š',
10574
            '%9B' => '›',
10575
            '%9C' => 'œ',
10576
            '%9D' => '',
10577
            '%9E' => 'ž',
10578
            '%9F' => 'Ÿ',
10579
            '%A0' => '',
10580
            '%A1' => '¡',
10581
            '%A2' => '¢',
10582
            '%A3' => '£',
10583
            '%A4' => '¤',
10584
            '%A5' => '¥',
10585
            '%A6' => '¦',
10586
            '%A7' => '§',
10587
            '%A8' => '¨',
10588
            '%A9' => '©',
10589
            '%AA' => 'ª',
10590
            '%AB' => '«',
10591
            '%AC' => '¬',
10592
            '%AD' => '',
10593
            '%AE' => '®',
10594
            '%AF' => '¯',
10595
            '%B0' => '°',
10596
            '%B1' => '±',
10597
            '%B2' => '²',
10598
            '%B3' => '³',
10599
            '%B4' => '´',
10600
            '%B5' => 'µ',
10601
            '%B6' => '¶',
10602
            '%B7' => '·',
10603
            '%B8' => '¸',
10604
            '%B9' => '¹',
10605
            '%BA' => 'º',
10606
            '%BB' => '»',
10607
            '%BC' => '¼',
10608
            '%BD' => '½',
10609
            '%BE' => '¾',
10610
            '%BF' => '¿',
10611
            '%C0' => 'À',
10612
            '%C1' => 'Á',
10613
            '%C2' => 'Â',
10614
            '%C3' => 'Ã',
10615
            '%C4' => 'Ä',
10616
            '%C5' => 'Å',
10617
            '%C6' => 'Æ',
10618
            '%C7' => 'Ç',
10619
            '%C8' => 'È',
10620
            '%C9' => 'É',
10621
            '%CA' => 'Ê',
10622
            '%CB' => 'Ë',
10623
            '%CC' => 'Ì',
10624
            '%CD' => 'Í',
10625
            '%CE' => 'Î',
10626
            '%CF' => 'Ï',
10627
            '%D0' => 'Ð',
10628
            '%D1' => 'Ñ',
10629
            '%D2' => 'Ò',
10630
            '%D3' => 'Ó',
10631
            '%D4' => 'Ô',
10632
            '%D5' => 'Õ',
10633
            '%D6' => 'Ö',
10634
            '%D7' => '×',
10635
            '%D8' => 'Ø',
10636
            '%D9' => 'Ù',
10637
            '%DA' => 'Ú',
10638
            '%DB' => 'Û',
10639
            '%DC' => 'Ü',
10640
            '%DD' => 'Ý',
10641
            '%DE' => 'Þ',
10642
            '%DF' => 'ß',
10643
            '%E0' => 'à',
10644
            '%E1' => 'á',
10645
            '%E2' => 'â',
10646
            '%E3' => 'ã',
10647
            '%E4' => 'ä',
10648
            '%E5' => 'å',
10649
            '%E6' => 'æ',
10650
            '%E7' => 'ç',
10651
            '%E8' => 'è',
10652
            '%E9' => 'é',
10653
            '%EA' => 'ê',
10654
            '%EB' => 'ë',
10655
            '%EC' => 'ì',
10656
            '%ED' => 'í',
10657
            '%EE' => 'î',
10658
            '%EF' => 'ï',
10659
            '%F0' => 'ð',
10660
            '%F1' => 'ñ',
10661
            '%F2' => 'ò',
10662
            '%F3' => 'ó',
10663
            '%F4' => 'ô',
10664
            '%F5' => 'õ',
10665
            '%F6' => 'ö',
10666
            '%F7' => '÷',
10667
            '%F8' => 'ø',
10668
            '%F9' => 'ù',
10669
            '%FA' => 'ú',
10670
            '%FB' => 'û',
10671
            '%FC' => 'ü',
10672
            '%FD' => 'ý',
10673
            '%FE' => 'þ',
10674
            '%FF' => 'ÿ',
10675
        ];
10676
    }
10677
10678
    /**
10679
     * Decodes an UTF-8 string to ISO-8859-1.
10680
     *
10681
     * @param string $str           <p>The input string.</p>
10682
     * @param bool   $keepUtf8Chars
10683
     *
10684
     * @return string
10685
     */
10686
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10687
    {
10688 13
        if ($str === '') {
10689 5
            return '';
10690
        }
10691
10692 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10693 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10694
10695 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10696 1
            if (self::$WIN1252_TO_UTF8 === null) {
10697
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10698
            }
10699
10700 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10701 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10702
        }
10703
10704
        /** @noinspection PhpInternalEntityUsedInspection */
10705 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10706
10707 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10708
            self::checkForSupport();
10709
        }
10710
10711
        // save for later comparision
10712 13
        $str_backup = $str;
10713 13
        $len = self::strlen_in_byte($str);
10714
10715 13
        if (self::$ORD === null) {
10716
            self::$ORD = self::getData('ord');
10717
        }
10718
10719 13
        if (self::$CHR === null) {
10720
            self::$CHR = self::getData('chr');
10721
        }
10722
10723 13
        $noCharFound = '?';
10724
        /** @noinspection ForeachInvariantsInspection */
10725 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10726 13
            switch ($str[$i] & "\xF0") {
10727 13
                case "\xC0":
10728 12
                case "\xD0":
10729 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10730 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10731
10732 13
                    break;
10733
10734
                /** @noinspection PhpMissingBreakStatementInspection */
10735 12
                case "\xF0":
10736
                    ++$i;
10737
10738
                    // no break
10739
10740 12
                case "\xE0":
10741 10
                    $str[$j] = $noCharFound;
10742 10
                    $i += 2;
10743
10744 10
                    break;
10745
10746
                default:
10747 12
                    $str[$j] = $str[$i];
10748
            }
10749
        }
10750
10751 13
        $return = self::substr_in_byte($str, 0, $j);
10752 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10753
            $return = '';
10754
        }
10755
10756
        if (
10757 13
            $keepUtf8Chars === true
10758
            &&
10759 13
            self::strlen($return) >= (int) self::strlen($str_backup)
10760
        ) {
10761 2
            return $str_backup;
10762
        }
10763
10764 13
        return $return;
10765
    }
10766
10767
    /**
10768
     * Encodes an ISO-8859-1 string to UTF-8.
10769
     *
10770
     * @param string $str <p>The input string.</p>
10771
     *
10772
     * @return string
10773
     */
10774
    public static function utf8_encode(string $str): string
10775
    {
10776 14
        if ($str === '') {
10777 13
            return '';
10778
        }
10779
10780 14
        $str = \utf8_encode($str);
10781
10782
        // the polyfill maybe return false
10783
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10784
        /** @psalm-suppress TypeDoesNotContainType */
10785 14
        if ($str === false) {
10786
            return '';
10787
        }
10788
10789 14
        if (\strpos($str, "\xC2") === false) {
10790 6
            return $str;
10791
        }
10792
10793 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10794 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10795
10796 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10797 1
            if (self::$WIN1252_TO_UTF8 === null) {
10798
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10799
            }
10800
10801 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10802 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10803
        }
10804
10805 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10806
    }
10807
10808
    /**
10809
     * fix -> utf8-win1252 chars
10810
     *
10811
     * @param string $str <p>The input string.</p>
10812
     *
10813
     * @return string
10814
     *
10815
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10816
     */
10817
    public static function utf8_fix_win1252_chars(string $str): string
10818
    {
10819 2
        return self::fix_simple_utf8($str);
10820
    }
10821
10822
    /**
10823
     * Returns an array with all utf8 whitespace characters.
10824
     *
10825
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10826
     *
10827
     * @author: Derek E. [email protected]
10828
     *
10829
     * @return string[]
10830
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
10831
     *                  as defined in above URL
10832
     */
10833
    public static function whitespace_table(): array
10834
    {
10835 2
        return self::$WHITESPACE_TABLE;
10836
    }
10837
10838
    /**
10839
     * Limit the number of words in a string.
10840
     *
10841
     * @param string $str      <p>The input string.</p>
10842
     * @param int    $limit    <p>The limit of words as integer.</p>
10843
     * @param string $strAddOn <p>Replacement for the striped string.</p>
10844
     *
10845
     * @return string
10846
     */
10847
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10848
    {
10849 2
        if ($str === '') {
10850 2
            return '';
10851
        }
10852
10853 2
        if ($limit < 1) {
10854 2
            return '';
10855
        }
10856
10857 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10858
10859
        if (
10860 2
            !isset($matches[0])
10861
            ||
10862 2
            self::strlen($str) === (int) self::strlen($matches[0])
10863
        ) {
10864 2
            return $str;
10865
        }
10866
10867 2
        return self::rtrim($matches[0]) . $strAddOn;
10868
    }
10869
10870
    /**
10871
     * Wraps a string to a given number of characters
10872
     *
10873
     * @see  http://php.net/manual/en/function.wordwrap.php
10874
     *
10875
     * @param string $str   <p>The input string.</p>
10876
     * @param int    $width [optional] <p>The column width.</p>
10877
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10878
     * @param bool   $cut   [optional] <p>
10879
     *                      If the cut is set to true, the string is
10880
     *                      always wrapped at or before the specified width. So if you have
10881
     *                      a word that is larger than the given width, it is broken apart.
10882
     *                      </p>
10883
     *
10884
     * @return string
10885
     *                <p>The given string wrapped at the specified column.</p>
10886
     */
10887
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10888
    {
10889 10
        if ($str === '' || $break === '') {
10890 3
            return '';
10891
        }
10892
10893 8
        $w = '';
10894 8
        $strSplit = \explode($break, $str);
10895 8
        if ($strSplit === false) {
10896
            return '';
10897
        }
10898 8
        $chars = [];
10899
10900 8
        foreach ($strSplit as $i => $iValue) {
10901 8
            if ($i) {
10902 1
                $chars[] = $break;
10903 1
                $w .= '#';
10904
            }
10905
10906 8
            $c = $iValue;
10907 8
            unset($strSplit[$i]);
10908
10909 8
            foreach (self::split($c) as $c) {
10910 8
                $chars[] = $c;
10911 8
                $w .= $c === ' ' ? ' ' : '?';
10912
            }
10913
        }
10914
10915 8
        $strReturn = '';
10916 8
        $j = 0;
10917 8
        $b = $i = -1;
10918 8
        $w = \wordwrap($w, $width, '#', $cut);
10919
10920 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
10921 6
            for (++$i; $i < $b; ++$i) {
10922 6
                $strReturn .= $chars[$j];
10923 6
                unset($chars[$j++]);
10924
            }
10925
10926 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
10927 3
                unset($chars[$j++]);
10928
            }
10929
10930 6
            $strReturn .= $break;
10931
        }
10932
10933 8
        return $strReturn . \implode('', $chars);
10934
    }
10935
10936
    /**
10937
     * Line-Wrap the string after $limit, but also after the next word.
10938
     *
10939
     * @param string $str
10940
     * @param int    $limit
10941
     *
10942
     * @return string
10943
     */
10944
    public static function wordwrap_per_line(string $str, int $limit): string
10945
    {
10946 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
10947
10948 1
        $string = '';
10949 1
        foreach ($strings as &$value) {
10950 1
            if ($value === false) {
10951
                continue;
10952
            }
10953
10954 1
            $string .= \wordwrap($value, $limit);
10955 1
            $string .= "\n";
10956
        }
10957
10958 1
        return $string;
10959
    }
10960
10961
    /**
10962
     * Returns an array of Unicode White Space characters.
10963
     *
10964
     * @return string[] an array with numeric code point as key and White Space Character as value
10965
     */
10966
    public static function ws(): array
10967
    {
10968 2
        return self::$WHITESPACE;
10969
    }
10970
10971
    /**
10972
     * Adds the specified amount of left and right padding to the given string.
10973
     * The default character used is a space.
10974
     *
10975
     * @param string $str
10976
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
10977
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
10978
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
10979
     * @param string $encoding [optional] <p>Default: UTF-8</p>
10980
     *
10981
     * @return string string with padding applied
10982
     */
10983
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding = 'UTF-8'): string
10984
    {
10985 25
        $strlen = (int) self::strlen($str, $encoding);
10986
10987 25
        if ($left && $right) {
10988 8
            $length = ($left + $right) + $strlen;
10989 8
            $type = \STR_PAD_BOTH;
10990 17
        } elseif ($left) {
10991 7
            $length = $left + $strlen;
10992 7
            $type = \STR_PAD_LEFT;
10993 10
        } elseif ($right) {
10994 10
            $length = $right + $strlen;
10995 10
            $type = \STR_PAD_RIGHT;
10996
        } else {
10997
            $length = ($left + $right) + $strlen;
10998
            $type = \STR_PAD_BOTH;
10999
        }
11000
11001 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
11002
    }
11003
11004
    /**
11005
     * @param string $str
11006
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
11007
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
11008
     *
11009
     * @return string
11010
     */
11011
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
11012
    {
11013 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
11014 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
11015
11016 54
        if ($useLower === true) {
11017 2
            $str = (string) \str_replace(
11018 2
                $upper,
11019 2
                $lower,
11020 2
                $str
11021
            );
11022
        } else {
11023 52
            $str = (string) \str_replace(
11024 52
                $lower,
11025 52
                $upper,
11026 52
                $str
11027
            );
11028
        }
11029
11030 54
        if ($fullCaseFold) {
11031 52
            static $FULL_CASE_FOLD = null;
11032 52
            if ($FULL_CASE_FOLD === null) {
11033 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
11034
            }
11035
11036 52
            if ($useLower === true) {
11037 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
11038
            } else {
11039 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
11040
            }
11041
        }
11042
11043 54
        return $str;
11044
    }
11045
11046
    /**
11047
     * get data from "/data/*.php"
11048
     *
11049
     * @param string $file
11050
     *
11051
     * @return mixed
11052
     */
11053
    private static function getData(string $file)
11054
    {
11055
        /** @noinspection PhpIncludeInspection */
11056
        /** @psalm-suppress UnresolvableInclude */
11057 5
        return include __DIR__ . '/data/' . $file . '.php';
11058
    }
11059
11060
    /**
11061
     * get data from "/data/*.php"
11062
     *
11063
     * @param string $file
11064
     *
11065
     * @return false|mixed will return false on error
11066
     */
11067
    private static function getDataIfExists(string $file)
11068
    {
11069 9
        $file = __DIR__ . '/data/' . $file . '.php';
11070 9
        if (\file_exists($file)) {
11071
            /** @noinspection PhpIncludeInspection */
11072 8
            return include $file;
11073
        }
11074
11075 2
        return false;
11076
    }
11077
11078
    /**
11079
     * Checks whether mbstring "overloaded" is active on the server.
11080
     *
11081
     * @return bool
11082
     */
11083
    private static function mbstring_overloaded(): bool
11084
    {
11085
        /**
11086
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
11087
         */
11088
11089
        /** @noinspection PhpComposerExtensionStubsInspection */
11090
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
11091
        return \defined('MB_OVERLOAD_STRING')
11092
               &&
11093
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
11094
    }
11095
11096
    /**
11097
     * @param array $strings
11098
     * @param bool  $removeEmptyValues
11099
     * @param int   $removeShortValues
11100
     *
11101
     * @return array
11102
     */
11103
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
11104
    {
11105
        // init
11106 2
        $return = [];
11107
11108 2
        foreach ($strings as &$str) {
11109
            if (
11110 2
                $removeShortValues !== null
11111
                &&
11112 2
                self::strlen($str) <= $removeShortValues
11113
            ) {
11114 2
                continue;
11115
            }
11116
11117
            if (
11118 2
                $removeEmptyValues === true
11119
                &&
11120 2
                \trim($str) === ''
11121
            ) {
11122 2
                continue;
11123
            }
11124
11125 2
            $return[] = $str;
11126
        }
11127
11128 2
        return $return;
11129
    }
11130
11131
    /**
11132
     * rxClass
11133
     *
11134
     * @param string $s
11135
     * @param string $class
11136
     *
11137
     * @return string
11138
     */
11139
    private static function rxClass(string $s, string $class = ''): string
11140
    {
11141 43
        static $RX_CLASSS_CACHE = [];
11142
11143 43
        $cacheKey = $s . $class;
11144
11145 43
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
11146 31
            return $RX_CLASSS_CACHE[$cacheKey];
11147
        }
11148
11149 16
        $class = [$class];
11150
11151
        /** @noinspection SuspiciousLoopInspection */
11152 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
11153 15
            if ($s === '-') {
11154
                $class[0] = '-' . $class[0];
11155 15
            } elseif (!isset($s[2])) {
11156 15
                $class[0] .= \preg_quote($s, '/');
11157 1
            } elseif (self::strlen($s) === 1) {
11158 1
                $class[0] .= $s;
11159
            } else {
11160 15
                $class[] = $s;
11161
            }
11162
        }
11163 16
        unset($s);
11164
11165 16
        if ($class[0]) {
11166 16
            $class[0] = '[' . $class[0] . ']';
11167
        }
11168
11169 16
        if (\count($class) === 1) {
11170 16
            $return = $class[0];
11171
        } else {
11172
            $return = '(?:' . \implode('|', $class) . ')';
11173
        }
11174
11175 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
11176
11177 16
        return $return;
11178
    }
11179
11180
    /**
11181
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
11182
     *
11183
     * @param string $names
11184
     * @param string $delimiter
11185
     * @param string $encoding
11186
     *
11187
     * @return string
11188
     */
11189
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
11190
    {
11191
        // init
11192 1
        $namesArray = \explode($delimiter, $names);
11193
11194 1
        if ($namesArray === false) {
11195
            return '';
11196
        }
11197
11198
        $specialCases = [
11199 1
            'names' => [
11200
                'ab',
11201
                'af',
11202
                'al',
11203
                'and',
11204
                'ap',
11205
                'bint',
11206
                'binte',
11207
                'da',
11208
                'de',
11209
                'del',
11210
                'den',
11211
                'der',
11212
                'di',
11213
                'dit',
11214
                'ibn',
11215
                'la',
11216
                'mac',
11217
                'nic',
11218
                'of',
11219
                'ter',
11220
                'the',
11221
                'und',
11222
                'van',
11223
                'von',
11224
                'y',
11225
                'zu',
11226
            ],
11227
            'prefixes' => [
11228
                'al-',
11229
                "d'",
11230
                'ff',
11231
                "l'",
11232
                'mac',
11233
                'mc',
11234
                'nic',
11235
            ],
11236
        ];
11237
11238 1
        foreach ($namesArray as &$name) {
11239 1
            if (\in_array($name, $specialCases['names'], true)) {
11240 1
                continue;
11241
            }
11242
11243 1
            $continue = false;
11244
11245 1
            if ($delimiter === '-') {
11246 1
                foreach ($specialCases['names'] as &$beginning) {
11247 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11248 1
                        $continue = true;
11249
                    }
11250
                }
11251 1
                unset($beginning);
11252
            }
11253
11254 1
            foreach ($specialCases['prefixes'] as &$beginning) {
11255 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11256 1
                    $continue = true;
11257
                }
11258
            }
11259 1
            unset($beginning);
11260
11261 1
            if ($continue === true) {
11262 1
                continue;
11263
            }
11264
11265 1
            $name = self::str_upper_first($name);
11266
        }
11267
11268 1
        return \implode($delimiter, $namesArray);
11269
    }
11270
11271
    /**
11272
     * Generic case sensitive transformation for collation matching.
11273
     *
11274
     * @param string $str <p>The input string</p>
11275
     *
11276
     * @return string|null
11277
     */
11278
    private static function strtonatfold(string $str)
11279
    {
11280
        /** @noinspection PhpUndefinedClassInspection */
11281 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
11282
    }
11283
11284
    /**
11285
     * @param int|string $input
11286
     *
11287
     * @return string
11288
     */
11289
    private static function to_utf8_convert_helper($input): string
11290
    {
11291
        // init
11292 30
        $buf = '';
11293
11294 30
        if (self::$ORD === null) {
11295 1
            self::$ORD = self::getData('ord');
11296
        }
11297
11298 30
        if (self::$CHR === null) {
11299 1
            self::$CHR = self::getData('chr');
11300
        }
11301
11302 30
        if (self::$WIN1252_TO_UTF8 === null) {
11303 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11304
        }
11305
11306 30
        $ordC1 = self::$ORD[$input];
11307 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
11308 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
11309
        } else {
11310 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
11311 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
11312 2
            $buf .= $cc1 . $cc2;
11313
        }
11314
11315 30
        return $buf;
11316
    }
11317
}
11318