Passed
Push — master ( 74eb37...56737a )
by Lars
03:35
created

UTF8::remove_right()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 3
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Changes all keys in an array.
269
     *
270
     * @param array $array <p>The array to work on</p>
271
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
272
     *                     or <strong>CASE_LOWER</strong> (default)</p>
273
     *
274
     * @return string[] an array with its keys lower or uppercased
275
     */
276 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
277
    {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower($key)
290 2
                : self::strtoupper($key);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
312
    {
313 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
314 16
        if ($posStart === false) {
315 2
            return '';
316
        }
317
318 14
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
319 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
320
        if (
321 14
            $posEnd === false
322
            ||
323 14
            $posEnd === $substrIndex
324
        ) {
325 4
            return '';
326
        }
327
328 10
        return (string) self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
329
    }
330
331
    /**
332
     * Convert binary into an string.
333
     *
334
     * @param mixed $bin 1|0
335
     *
336
     * @return string
337
     */
338 2
    public static function binary_to_str($bin): string
339
    {
340 2
        if (!isset($bin[0])) {
341
            return '';
342
        }
343
344 2
        $convert = \base_convert($bin, 2, 16);
345 2
        if ($convert === '0') {
346 1
            return '';
347
        }
348
349 2
        return \pack('H*', $convert);
350
    }
351
352
    /**
353
     * Returns the UTF-8 Byte Order Mark Character.
354
     *
355
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
356
     *
357
     * @return string UTF-8 Byte Order Mark
358
     */
359 4
    public static function bom(): string
360
    {
361 4
        return "\xef\xbb\xbf";
362
    }
363
364
    /**
365
     * @alias of UTF8::chr_map()
366
     *
367
     * @see   UTF8::chr_map()
368
     *
369
     * @param array|string $callback
370
     * @param string       $str
371
     *
372
     * @return string[]
373
     */
374 2
    public static function callback($callback, string $str): array
375
    {
376 2
        return self::chr_map($callback, $str);
377
    }
378
379
    /**
380
     * Returns the character at $index, with indexes starting at 0.
381
     *
382
     * @param string $str
383
     * @param int    $index    <p>Position of the character.</p>
384
     * @param string $encoding [optional] <p>Default is UTF-8</p>
385
     *
386
     * @return string the character at $index
387
     */
388 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
389
    {
390 9
        return (string) self::substr($str, $index, 1, $encoding);
391
    }
392
393
    /**
394
     * Returns an array consisting of the characters in the string.
395
     *
396
     * @param string $str <p>The input string.</p>
397
     *
398
     * @return string[] an array of chars
399
     */
400 3
    public static function chars(string $str): array
401
    {
402 3
        return self::str_split($str, 1);
403
    }
404
405
    /**
406
     * This method will auto-detect your server environment for UTF-8 support.
407
     *
408
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
409
     */
410 37
    public static function checkForSupport()
411
    {
412 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
413
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
414
415
            // http://php.net/manual/en/book.mbstring.php
416
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
417
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
418
419
            // http://php.net/manual/en/book.iconv.php
420
            self::$SUPPORT['iconv'] = self::iconv_loaded();
421
422
            // http://php.net/manual/en/book.intl.php
423
            self::$SUPPORT['intl'] = self::intl_loaded();
424
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
425
426
            if (
427
                self::$SUPPORT['intl'] === true
428
                &&
429
                \function_exists('transliterator_list_ids') === true
430
            ) {
431
                /** @noinspection PhpComposerExtensionStubsInspection */
432
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
433
            }
434
435
            // http://php.net/manual/en/class.intlchar.php
436
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
437
438
            // http://php.net/manual/en/book.ctype.php
439
            self::$SUPPORT['ctype'] = self::ctype_loaded();
440
441
            // http://php.net/manual/en/class.finfo.php
442
            self::$SUPPORT['finfo'] = self::finfo_loaded();
443
444
            // http://php.net/manual/en/book.json.php
445
            self::$SUPPORT['json'] = self::json_loaded();
446
447
            // http://php.net/manual/en/book.pcre.php
448
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
449
450
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
451
        }
452 37
    }
453
454
    /**
455
     * Generates a UTF-8 encoded character from the given code point.
456
     *
457
     * INFO: opposite to UTF8::ord()
458
     *
459
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
460
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
461
     *
462
     * @return string|null multi-byte character, returns null on failure or empty input
463
     */
464 16
    public static function chr($code_point, string $encoding = 'UTF-8')
465
    {
466
        // init
467 16
        static $CHAR_CACHE = [];
468
469 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
470
            self::checkForSupport();
471
        }
472
473 16
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
474 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
475
        }
476
477
        if (
478 16
            $encoding !== 'UTF-8'
479
            &&
480 16
            $encoding !== 'ISO-8859-1'
481
            &&
482 16
            $encoding !== 'WINDOWS-1252'
483
            &&
484 16
            self::$SUPPORT['mbstring'] === false
485
        ) {
486
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
487
        }
488
489 16
        $cacheKey = $code_point . $encoding;
490 16
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
491 15
            return $CHAR_CACHE[$cacheKey];
492
        }
493
494 10
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
495
496 9
            if (self::$CHR === null) {
497
                $chrTmp = self::getData('chr');
498
                if ($chrTmp) {
499
                    self::$CHR = (array) $chrTmp;
500
                }
501
            }
502
503
            /**
504
             * @psalm-suppress PossiblyNullArrayAccess
505
             */
506 9
            $chr = self::$CHR[$code_point];
507
508 9
            if ($encoding !== 'UTF-8') {
509 1
                $chr = self::encode($encoding, $chr);
510
            }
511
512 9
            return $CHAR_CACHE[$cacheKey] = $chr;
513
        }
514
515 7
        if (self::$SUPPORT['intlChar'] === true) {
516
            /** @noinspection PhpComposerExtensionStubsInspection */
517 7
            $chr = \IntlChar::chr($code_point);
518
519 7
            if ($encoding !== 'UTF-8') {
520
                $chr = self::encode($encoding, $chr);
521
            }
522
523 7
            return $CHAR_CACHE[$cacheKey] = $chr;
524
        }
525
526
        if (self::$CHR === null) {
527
            $chrTmp = self::getData('chr');
528
            if ($chrTmp) {
529
                self::$CHR = (array) $chrTmp;
530
            }
531
        }
532
533
        $code_point = (int) $code_point;
534
        if ($code_point <= 0x7F) {
535
            /**
536
             * @psalm-suppress PossiblyNullArrayAccess
537
             */
538
            $chr = self::$CHR[$code_point];
539
        } elseif ($code_point <= 0x7FF) {
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
544
                   self::$CHR[($code_point & 0x3F) + 0x80];
545
        } elseif ($code_point <= 0xFFFF) {
546
            /**
547
             * @psalm-suppress PossiblyNullArrayAccess
548
             */
549
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
550
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
551
                   self::$CHR[($code_point & 0x3F) + 0x80];
552
        } else {
553
            /**
554
             * @psalm-suppress PossiblyNullArrayAccess
555
             */
556
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
557
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
558
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
559
                   self::$CHR[($code_point & 0x3F) + 0x80];
560
        }
561
562
        if ($encoding !== 'UTF-8') {
563
            $chr = self::encode($encoding, $chr);
564
        }
565
566
        return $CHAR_CACHE[$cacheKey] = $chr;
567
    }
568
569
    /**
570
     * Applies callback to all characters of a string.
571
     *
572
     * @param array|string $callback <p>The callback function.</p>
573
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
574
     *
575
     * @return string[] the outcome of callback
576
     */
577 2
    public static function chr_map($callback, string $str): array
578
    {
579 2
        $chars = self::split($str);
580
581 2
        return \array_map($callback, $chars);
582
    }
583
584
    /**
585
     * Generates an array of byte length of each character of a Unicode string.
586
     *
587
     * 1 byte => U+0000  - U+007F
588
     * 2 byte => U+0080  - U+07FF
589
     * 3 byte => U+0800  - U+FFFF
590
     * 4 byte => U+10000 - U+10FFFF
591
     *
592
     * @param string $str <p>The original unicode string.</p>
593
     *
594
     * @return int[] an array of byte lengths of each character
595
     */
596 4
    public static function chr_size_list(string $str): array
597
    {
598 4
        if ($str === '') {
599 4
            return [];
600
        }
601
602 4
        $strSplit = self::split($str);
603
604 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
605
            self::checkForSupport();
606
        }
607
608 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
609 4
            return \array_map(
610 4
                static function (string $data): int {
611 4
                    return self::strlen_in_byte($data);
612 4
                },
613 4
                $strSplit
614
            );
615
        }
616
617
        return \array_map('\strlen', $strSplit);
618
    }
619
620
    /**
621
     * Get a decimal code representation of a specific character.
622
     *
623
     * @param string $char <p>The input character.</p>
624
     *
625
     * @return int
626
     */
627 4
    public static function chr_to_decimal(string $char): int
628
    {
629 4
        $code = self::ord($char[0]);
630 4
        $bytes = 1;
631
632 4
        if (!($code & 0x80)) {
633
            // 0xxxxxxx
634 4
            return $code;
635
        }
636
637 4
        if (($code & 0xe0) === 0xc0) {
638
            // 110xxxxx
639 4
            $bytes = 2;
640 4
            $code &= ~0xc0;
641 4
        } elseif (($code & 0xf0) === 0xe0) {
642
            // 1110xxxx
643 4
            $bytes = 3;
644 4
            $code &= ~0xe0;
645 2
        } elseif (($code & 0xf8) === 0xf0) {
646
            // 11110xxx
647 2
            $bytes = 4;
648 2
            $code &= ~0xf0;
649
        }
650
651 4
        for ($i = 2; $i <= $bytes; ++$i) {
652
            // 10xxxxxx
653 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
654
        }
655
656 4
        return $code;
657
    }
658
659
    /**
660
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
661
     *
662
     * @param int|string $char <p>The input character</p>
663
     * @param string     $pfix [optional]
664
     *
665
     * @return string The code point encoded as U+xxxx
666
     */
667 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
668
    {
669 2
        if ($char === '') {
670 2
            return '';
671
        }
672
673 2
        if ($char === '&#0;') {
674 2
            $char = '';
675
        }
676
677 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
678
    }
679
680
    /**
681
     * alias for "UTF8::chr_to_decimal()"
682
     *
683
     * @see UTF8::chr_to_decimal()
684
     *
685
     * @param string $chr
686
     *
687
     * @return int
688
     */
689 2
    public static function chr_to_int(string $chr): int
690
    {
691 2
        return self::chr_to_decimal($chr);
692
    }
693
694
    /**
695
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
696
     *
697
     * @param string $body     <p>The original string to be split.</p>
698
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
699
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
700
     *
701
     * @return string the chunked string
702
     */
703 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
704
    {
705 4
        return \implode($end, self::split($body, $chunklen));
706
    }
707
708
    /**
709
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
710
     *
711
     * @param string $str                           <p>The string to be sanitized.</p>
712
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
713
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
714
     *                                              whitespace.</p>
715
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
716
     *                                              e.g.: "…"
717
     *                                              => "..."</p>
718
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
719
     *                                              combination with
720
     *                                              $normalize_whitespace</p>
721
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
722
     *                                              mark e.g.: "�"</p>
723
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
724
     *                                              characters e.g.: "\0"</p>
725
     *
726
     * @return string clean UTF-8 encoded string
727
     */
728 111
    public static function clean(
729
        string $str,
730
        bool $remove_bom = false,
731
        bool $normalize_whitespace = false,
732
        bool $normalize_msword = false,
733
        bool $keep_non_breaking_space = false,
734
        bool $replace_diamond_question_mark = false,
735
        bool $remove_invisible_characters = true
736
    ): string {
737
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
738
        // caused connection reset problem on larger strings
739
740 111
        $regx = '/
741
          (
742
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
743
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
744
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
745
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
746
            ){1,100}                      # ...one or more times
747
          )
748
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
749
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
750
        /x';
751 111
        $str = (string) \preg_replace($regx, '$1', $str);
752
753 111
        if ($replace_diamond_question_mark === true) {
754 60
            $str = self::replace_diamond_question_mark($str, '');
755
        }
756
757 111
        if ($remove_invisible_characters === true) {
758 111
            $str = self::remove_invisible_characters($str);
759
        }
760
761 111
        if ($normalize_whitespace === true) {
762 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
763
        }
764
765 111
        if ($normalize_msword === true) {
766 32
            $str = self::normalize_msword($str);
767
        }
768
769 111
        if ($remove_bom === true) {
770 62
            $str = self::remove_bom($str);
771
        }
772
773 111
        return $str;
774
    }
775
776
    /**
777
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
778
     *
779
     * @param string $str <p>The input string.</p>
780
     *
781
     * @return string
782
     */
783 33
    public static function cleanup($str): string
784
    {
785
        // init
786 33
        $str = (string) $str;
787
788 33
        if ($str === '') {
789 5
            return '';
790
        }
791
792
        // fixed ISO <-> UTF-8 Errors
793 33
        $str = self::fix_simple_utf8($str);
794
795
        // remove all none UTF-8 symbols
796
        // && remove diamond question mark (�)
797
        // && remove remove invisible characters (e.g. "\0")
798
        // && remove BOM
799
        // && normalize whitespace chars (but keep non-breaking-spaces)
800 33
        return self::clean(
801 33
            $str,
802 33
            true,
803 33
            true,
804 33
            false,
805 33
            true,
806 33
            true,
807 33
            true
808
        );
809
    }
810
811
    /**
812
     * Accepts a string or a array of strings and returns an array of Unicode code points.
813
     *
814
     * INFO: opposite to UTF8::string()
815
     *
816
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
817
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
818
     *                                 default, code points will be returned as integers.</p>
819
     *
820
     * @return array<int|string>
821
     *                           The array of code points:<br>
822
     *                           array<int> for $u_style === false<br>
823
     *                           array<string> for $u_style === true<br>
824
     */
825 12
    public static function codepoints($arg, bool $u_style = false): array
826
    {
827 12
        if (\is_string($arg) === true) {
828 12
            $arg = self::split($arg);
829
        }
830
831 12
        $arg = \array_map(
832
            [
833 12
                self::class,
834
                'ord',
835
            ],
836 12
            $arg
837
        );
838
839 12
        if (\count($arg) === 0) {
840 7
            return [];
841
        }
842
843 11
        if ($u_style) {
844 2
            $arg = \array_map(
845
                [
846 2
                    self::class,
847
                    'int_to_hex',
848
                ],
849 2
                $arg
850
            );
851
        }
852
853 11
        return $arg;
854
    }
855
856
    /**
857
     * Trims the string and replaces consecutive whitespace characters with a
858
     * single space. This includes tabs and newline characters, as well as
859
     * multibyte whitespace such as the thin space and ideographic space.
860
     *
861
     * @param string $str <p>The input string.</p>
862
     *
863
     * @return string string with a trimmed $str and condensed whitespace
864
     */
865 13
    public static function collapse_whitespace(string $str): string
866
    {
867 13
        return self::trim(
868 13
            self::regex_replace($str, '[[:space:]]+', ' ')
869
        );
870
    }
871
872
    /**
873
     * Returns count of characters used in a string.
874
     *
875
     * @param string $str       <p>The input string.</p>
876
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
877
     *
878
     * @return int[] an associative array of Character as keys and
879
     *               their count as values
880
     */
881 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
882
    {
883 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
884
    }
885
886
    /**
887
     * Remove css media-queries.
888
     *
889
     * @param string $str
890
     *
891
     * @return string
892
     */
893 1
    public static function css_stripe_media_queries(string $str): string
894
    {
895 1
        return (string) \preg_replace(
896 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
897 1
            '',
898 1
            $str
899
        );
900
    }
901
902
    /**
903
     * Checks whether ctype is available on the server.
904
     *
905
     * @return bool
906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
907
     */
908
    public static function ctype_loaded(): bool
909
    {
910
        return \extension_loaded('ctype');
911
    }
912
913
    /**
914
     * Converts a int-value into an UTF-8 character.
915
     *
916
     * @param mixed $int
917
     *
918
     * @return string
919
     */
920 10
    public static function decimal_to_chr($int): string
921
    {
922 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
923
    }
924
925
    /**
926
     * Decodes a MIME header field
927
     *
928
     * @param string $str
929
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
930
     *
931
     * @return false|string
932
     *                      A decoded MIME field on success,
933
     *                      or false if an error occurs during the decoding
934
     */
935
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
936
    {
937
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
938
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
939
        }
940
941
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
942
            self::checkForSupport();
943
        }
944
945
        if (self::$SUPPORT['iconv'] === true) {
946
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
947
        }
948
949
        if ($encoding !== 'UTF-8') {
950
            $str = self::encode($encoding, $str);
951
        }
952
953
        return \mb_decode_mimeheader($str);
954
    }
955
956
    /**
957
     * Encode a string with a new charset-encoding.
958
     *
959
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
960
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
961
     *
962
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
963
     * @param string $str                    <p>The input string</p>
964
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
965
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
966
     *                                       string-encoding</p>
967
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
968
     *                                       A empty string will trigger the autodetect anyway.</p>
969
     *
970
     * @return string
971
     *
972
     * @psalm-suppress InvalidReturnStatement
973
     */
974 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
975
    {
976 28
        if ($str === '' || $toEncoding === '') {
977 12
            return $str;
978
        }
979
980 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
981 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
982
        }
983
984 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
985 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
986
        }
987
988 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
989
            return $str;
990
        }
991
992 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
993
            self::checkForSupport();
994
        }
995
996 28
        if ($toEncoding === 'JSON') {
997 1
            $return = self::json_encode($str);
998 1
            if ($return === false) {
999
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1000
            }
1001
1002 1
            return $return;
1003
        }
1004 28
        if ($fromEncoding === 'JSON') {
1005 1
            $str = self::json_decode($str);
1006 1
            $fromEncoding = '';
1007
        }
1008
1009 28
        if ($toEncoding === 'BASE64') {
1010 2
            return \base64_encode($str);
1011
        }
1012 28
        if ($fromEncoding === 'BASE64') {
1013 2
            $str = \base64_decode($str, true);
1014 2
            $fromEncoding = '';
1015
        }
1016
1017 28
        if ($toEncoding === 'HTML-ENTITIES') {
1018 2
            return self::html_encode($str, true, 'UTF-8');
1019
        }
1020 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1021 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1022 2
            $fromEncoding = '';
1023
        }
1024
1025 28
        $fromEncodingDetected = false;
1026
        if (
1027 28
            $autodetectFromEncoding === true
1028
            ||
1029 28
            !$fromEncoding
1030
        ) {
1031 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1032
        }
1033
1034
        // DEBUG
1035
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1036
1037 28
        if ($fromEncodingDetected !== false) {
1038 24
            $fromEncoding = $fromEncodingDetected;
1039 6
        } elseif ($autodetectFromEncoding === true) {
1040
            // fallback for the "autodetect"-mode
1041 6
            return self::to_utf8($str);
1042
        }
1043
1044
        if (
1045 24
            !$fromEncoding
1046
            ||
1047 24
            $fromEncoding === $toEncoding
1048
        ) {
1049 15
            return $str;
1050
        }
1051
1052
        if (
1053 18
            $toEncoding === 'UTF-8'
1054
            &&
1055
            (
1056 17
                $fromEncoding === 'WINDOWS-1252'
1057
                ||
1058 18
                $fromEncoding === 'ISO-8859-1'
1059
            )
1060
        ) {
1061 14
            return self::to_utf8($str);
1062
        }
1063
1064
        if (
1065 10
            $toEncoding === 'ISO-8859-1'
1066
            &&
1067
            (
1068 5
                $fromEncoding === 'WINDOWS-1252'
1069
                ||
1070 10
                $fromEncoding === 'UTF-8'
1071
            )
1072
        ) {
1073 5
            return self::to_iso8859($str);
1074
        }
1075
1076
        if (
1077 9
            $toEncoding !== 'UTF-8'
1078
            &&
1079 9
            $toEncoding !== 'ISO-8859-1'
1080
            &&
1081 9
            $toEncoding !== 'WINDOWS-1252'
1082
            &&
1083 9
            self::$SUPPORT['mbstring'] === false
1084
        ) {
1085
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1086
        }
1087
1088 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1089
            self::checkForSupport();
1090
        }
1091
1092 9
        if (self::$SUPPORT['mbstring'] === true) {
1093
            // info: do not use the symfony polyfill here
1094 9
            $strEncoded = \mb_convert_encoding(
1095 9
                $str,
1096 9
                $toEncoding,
1097 9
                $fromEncoding
1098
            );
1099
1100 9
            if ($strEncoded) {
1101 9
                return $strEncoded;
1102
            }
1103
        }
1104
1105
        $return = \iconv($fromEncoding, $toEncoding, $str);
1106
        if ($return !== false) {
1107
            return $return;
1108
        }
1109
1110
        return $str;
1111
    }
1112
1113
    /**
1114
     * @param string $str
1115
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1116
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1117
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1118
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1119
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1120
     *
1121
     * @return false|string
1122
     *                      An encoded MIME field on success,
1123
     *                      or false if an error occurs during the encoding
1124
     */
1125
    public static function encode_mimeheader(
1126
        $str,
1127
        $fromCharset = 'UTF-8',
1128
        $toCharset = 'UTF-8',
1129
        $transferEncoding = 'Q',
1130
        $linefeed = "\r\n",
1131
        $indent = 76
1132
    ) {
1133
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1134
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1135
        }
1136
1137
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1138
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1139
        }
1140
1141
        return \iconv_mime_encode(
1142
            '',
1143
            $str,
1144
            [
1145
                'scheme'           => $transferEncoding,
1146
                'line-length'      => $indent,
1147
                'input-charset'    => $fromCharset,
1148
                'output-charset'   => $toCharset,
1149
                'line-break-chars' => $linefeed,
1150
            ]
1151
        );
1152
    }
1153
1154
    /**
1155
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1156
     *
1157
     * @param string   $str                    <p>The input string.</p>
1158
     * @param string   $search                 <p>The searched string.</p>
1159
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1160
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1161
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1162
     *
1163
     * @return string
1164
     */
1165 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1166
    {
1167 1
        if ($str === '') {
1168 1
            return '';
1169
        }
1170
1171 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1172
1173 1
        if ($length === null) {
1174 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1175
        }
1176
1177 1
        if (empty($search)) {
1178 1
            if ($length > 0) {
1179 1
                $stringLength = (int) self::strlen($str, $encoding);
1180 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1181
            } else {
1182 1
                $end = 0;
1183
            }
1184
1185 1
            $pos = (int) \min(
1186 1
                self::strpos($str, ' ', $end, $encoding),
1187 1
                self::strpos($str, '.', $end, $encoding)
1188
            );
1189
1190 1
            if ($pos) {
1191 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1192 1
                if ($strSub === false) {
1193
                    return '';
1194
                }
1195
1196 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1197
            }
1198
1199
            return $str;
1200
        }
1201
1202 1
        $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1203 1
        $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1204
1205 1
        $pos_start = 0;
1206 1
        if ($halfSide > 0) {
1207 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1208 1
            if ($halfText !== false) {
1209 1
                $pos_start = (int) \max(
1210 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1211 1
                    self::strrpos($halfText, '.', 0, $encoding)
1212
                );
1213
            }
1214
        }
1215
1216 1
        if ($wordPos && $halfSide > 0) {
1217 1
            $offset = $pos_start + $length - 1;
1218 1
            $realLength = (int) self::strlen($str, $encoding);
1219
1220 1
            if ($offset > $realLength) {
1221
                $offset = $realLength;
1222
            }
1223
1224 1
            $pos_end = (int) \min(
1225 1
                    self::strpos($str, ' ', $offset, $encoding),
1226 1
                    self::strpos($str, '.', $offset, $encoding)
1227 1
                ) - $pos_start;
1228
1229 1
            if (!$pos_end || $pos_end <= 0) {
1230 1
                $strSub = self::substr($str, $pos_start, (int) self::strlen($str), $encoding);
1231 1
                if ($strSub !== false) {
1232 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1233
                } else {
1234 1
                    $extract = '';
1235
                }
1236
            } else {
1237 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1238 1
                if ($strSub !== false) {
1239 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1240
                } else {
1241 1
                    $extract = '';
1242
                }
1243
            }
1244
        } else {
1245 1
            $offset = $length - 1;
1246 1
            $trueLength = (int) self::strlen($str, $encoding);
1247
1248 1
            if ($offset > $trueLength) {
1249
                $offset = $trueLength;
1250
            }
1251
1252 1
            $pos_end = \min(
1253 1
                self::strpos($str, ' ', $offset, $encoding),
1254 1
                self::strpos($str, '.', $offset, $encoding)
1255
            );
1256
1257 1
            if ($pos_end) {
1258 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1259 1
                if ($strSub !== false) {
1260 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1261
                } else {
1262 1
                    $extract = '';
1263
                }
1264
            } else {
1265 1
                $extract = $str;
1266
            }
1267
        }
1268
1269 1
        return $extract;
1270
    }
1271
1272
    /**
1273
     * Reads entire file into a string.
1274
     *
1275
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1276
     *
1277
     * @see http://php.net/manual/en/function.file-get-contents.php
1278
     *
1279
     * @param string        $filename         <p>
1280
     *                                        Name of the file to read.
1281
     *                                        </p>
1282
     * @param bool          $use_include_path [optional] <p>
1283
     *                                        Prior to PHP 5, this parameter is called
1284
     *                                        use_include_path and is a bool.
1285
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1286
     *                                        to trigger include path
1287
     *                                        search.
1288
     *                                        </p>
1289
     * @param resource|null $context          [optional] <p>
1290
     *                                        A valid context resource created with
1291
     *                                        stream_context_create. If you don't need to use a
1292
     *                                        custom context, you can skip this parameter by &null;.
1293
     *                                        </p>
1294
     * @param int|null      $offset           [optional] <p>
1295
     *                                        The offset where the reading starts.
1296
     *                                        </p>
1297
     * @param int|null      $maxLength        [optional] <p>
1298
     *                                        Maximum length of data read. The default is to read until end
1299
     *                                        of file is reached.
1300
     *                                        </p>
1301
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1302
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1303
     *                                        some files, because they used non default utf-8 chars. Binary files
1304
     *                                        like images or pdf will not be converted.</p>
1305
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1306
     *                                        A empty string will trigger the autodetect anyway.</p>
1307
     *
1308
     * @return false|string the function returns the read data or false on failure
1309
     */
1310 12
    public static function file_get_contents(
1311
        string $filename,
1312
        bool $use_include_path = false,
1313
        $context = null,
1314
        int $offset = null,
1315
        int $maxLength = null,
1316
        int $timeout = 10,
1317
        bool $convertToUtf8 = true,
1318
        string $fromEncoding = ''
1319
    ) {
1320
        // init
1321 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1322
1323 12
        if ($timeout && $context === null) {
1324 9
            $context = \stream_context_create(
1325
                [
1326
                    'http' => [
1327 9
                        'timeout' => $timeout,
1328
                    ],
1329
                ]
1330
            );
1331
        }
1332
1333 12
        if ($offset === null) {
1334 12
            $offset = 0;
1335
        }
1336
1337 12
        if (\is_int($maxLength) === true) {
1338 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1339
        } else {
1340 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1341
        }
1342
1343
        // return false on error
1344 12
        if ($data === false) {
1345
            return false;
1346
        }
1347
1348 12
        if ($convertToUtf8 === true) {
1349
            if (
1350 12
                self::is_binary($data, true) === true
1351
                &&
1352 12
                self::is_utf16($data, false) === false
1353
                &&
1354 12
                self::is_utf32($data, false) === false
1355 7
            ) {
1356
                // do nothing, it's binary and not UTF16 or UTF32
1357
            } else {
1358 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1359 9
                $data = self::cleanup($data);
1360
            }
1361
        }
1362
1363 12
        return $data;
1364
    }
1365
1366
    /**
1367
     * Checks if a file starts with BOM (Byte Order Mark) character.
1368
     *
1369
     * @param string $file_path <p>Path to a valid file.</p>
1370
     *
1371
     * @throws \RuntimeException if file_get_contents() returned false
1372
     *
1373
     * @return bool
1374
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1375
     */
1376 2
    public static function file_has_bom(string $file_path): bool
1377
    {
1378 2
        $file_content = \file_get_contents($file_path);
1379 2
        if ($file_content === false) {
1380
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1381
        }
1382
1383 2
        return self::string_has_bom($file_content);
1384
    }
1385
1386
    /**
1387
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1388
     *
1389
     * @param mixed  $var
1390
     * @param int    $normalization_form
1391
     * @param string $leading_combining
1392
     *
1393
     * @return mixed
1394
     */
1395 62
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1396
    {
1397 62
        switch (\gettype($var)) {
1398 62
            case 'array':
1399 6
                foreach ($var as $k => &$v) {
1400 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1401
                }
1402 6
                unset($v);
1403
1404 6
                break;
1405 62
            case 'object':
1406 4
                foreach ($var as $k => &$v) {
1407 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1408
                }
1409 4
                unset($v);
1410
1411 4
                break;
1412 62
            case 'string':
1413
1414 62
                if (\strpos($var, "\r") !== false) {
1415
                    // Workaround https://bugs.php.net/65732
1416 3
                    $var = self::normalize_line_ending($var);
1417
                }
1418
1419 62
                if (self::is_ascii($var) === false) {
1420
                    /** @noinspection PhpUndefinedClassInspection */
1421 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1422 27
                        $n = '-';
1423
                    } else {
1424
                        /** @noinspection PhpUndefinedClassInspection */
1425 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1426
1427 13
                        if (isset($n[0])) {
1428 7
                            $var = $n;
1429
                        } else {
1430 9
                            $var = self::encode('UTF-8', $var, true);
1431
                        }
1432
                    }
1433
1434
                    if (
1435 32
                        $var[0] >= "\x80"
1436
                        &&
1437 32
                        isset($n[0], $leading_combining[0])
1438
                        &&
1439 32
                        \preg_match('/^\p{Mn}/u', $var)
1440
                    ) {
1441
                        // Prevent leading combining chars
1442
                        // for NFC-safe concatenations.
1443 3
                        $var = $leading_combining . $var;
1444
                    }
1445
                }
1446
1447 62
                break;
1448
        }
1449
1450 62
        return $var;
1451
    }
1452
1453
    /**
1454
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1455
     *
1456
     * Gets a specific external variable by name and optionally filters it
1457
     *
1458
     * @see  http://php.net/manual/en/function.filter-input.php
1459
     *
1460
     * @param int    $type          <p>
1461
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1462
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1463
     *                              <b>INPUT_ENV</b>.
1464
     *                              </p>
1465
     * @param string $variable_name <p>
1466
     *                              Name of a variable to get.
1467
     *                              </p>
1468
     * @param int    $filter        [optional] <p>
1469
     *                              The ID of the filter to apply. The
1470
     *                              manual page lists the available filters.
1471
     *                              </p>
1472
     * @param mixed  $options       [optional] <p>
1473
     *                              Associative array of options or bitwise disjunction of flags. If filter
1474
     *                              accepts options, flags can be provided in "flags" field of array.
1475
     *                              </p>
1476
     *
1477
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1478
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1479
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1480
     */
1481
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1482
    {
1483
        if (\func_num_args() < 4) {
1484
            $var = \filter_input($type, $variable_name, $filter);
1485
        } else {
1486
            $var = \filter_input($type, $variable_name, $filter, $options);
1487
        }
1488
1489
        return self::filter($var);
1490
    }
1491
1492
    /**
1493
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1494
     *
1495
     * Gets external variables and optionally filters them
1496
     *
1497
     * @see  http://php.net/manual/en/function.filter-input-array.php
1498
     *
1499
     * @param int   $type       <p>
1500
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1501
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1502
     *                          <b>INPUT_ENV</b>.
1503
     *                          </p>
1504
     * @param mixed $definition [optional] <p>
1505
     *                          An array defining the arguments. A valid key is a string
1506
     *                          containing a variable name and a valid value is either a filter type, or an array
1507
     *                          optionally specifying the filter, flags and options. If the value is an
1508
     *                          array, valid keys are filter which specifies the
1509
     *                          filter type,
1510
     *                          flags which specifies any flags that apply to the
1511
     *                          filter, and options which specifies any options that
1512
     *                          apply to the filter. See the example below for a better understanding.
1513
     *                          </p>
1514
     *                          <p>
1515
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1516
     *                          input array are filtered by this filter.
1517
     *                          </p>
1518
     * @param bool  $add_empty  [optional] <p>
1519
     *                          Add missing keys as <b>NULL</b> to the return value.
1520
     *                          </p>
1521
     *
1522
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1523
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1524
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1525
     *               is not set and <b>NULL</b> if the filter fails.
1526
     */
1527
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1528
    {
1529
        if (\func_num_args() < 2) {
1530
            $a = \filter_input_array($type);
1531
        } else {
1532
            $a = \filter_input_array($type, $definition, $add_empty);
1533
        }
1534
1535
        return self::filter($a);
1536
    }
1537
1538
    /**
1539
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1540
     *
1541
     * Filters a variable with a specified filter
1542
     *
1543
     * @see  http://php.net/manual/en/function.filter-var.php
1544
     *
1545
     * @param mixed $variable <p>
1546
     *                        Value to filter.
1547
     *                        </p>
1548
     * @param int   $filter   [optional] <p>
1549
     *                        The ID of the filter to apply. The
1550
     *                        manual page lists the available filters.
1551
     *                        </p>
1552
     * @param mixed $options  [optional] <p>
1553
     *                        Associative array of options or bitwise disjunction of flags. If filter
1554
     *                        accepts options, flags can be provided in "flags" field of array. For
1555
     *                        the "callback" filter, callable type should be passed. The
1556
     *                        callback must accept one argument, the value to be filtered, and return
1557
     *                        the value after filtering/sanitizing it.
1558
     *                        </p>
1559
     *                        <p>
1560
     *                        <code>
1561
     *                        // for filters that accept options, use this format
1562
     *                        $options = array(
1563
     *                        'options' => array(
1564
     *                        'default' => 3, // value to return if the filter fails
1565
     *                        // other options here
1566
     *                        'min_range' => 0
1567
     *                        ),
1568
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1569
     *                        );
1570
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1571
     *                        // for filter that only accept flags, you can pass them directly
1572
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1573
     *                        // for filter that only accept flags, you can also pass as an array
1574
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1575
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1576
     *                        // callback validate filter
1577
     *                        function foo($value)
1578
     *                        {
1579
     *                        // Expected format: Surname, GivenNames
1580
     *                        if (strpos($value, ", ") === false) return false;
1581
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1582
     *                        $empty = (empty($surname) || empty($givennames));
1583
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1584
     *                        if ($empty || $notstrings) {
1585
     *                        return false;
1586
     *                        } else {
1587
     *                        return $value;
1588
     *                        }
1589
     *                        }
1590
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1591
     *                        </code>
1592
     *                        </p>
1593
     *
1594
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1595
     */
1596 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1597
    {
1598 2
        if (\func_num_args() < 3) {
1599 2
            $variable = \filter_var($variable, $filter);
1600
        } else {
1601 2
            $variable = \filter_var($variable, $filter, $options);
1602
        }
1603
1604 2
        return self::filter($variable);
1605
    }
1606
1607
    /**
1608
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1609
     *
1610
     * Gets multiple variables and optionally filters them
1611
     *
1612
     * @see  http://php.net/manual/en/function.filter-var-array.php
1613
     *
1614
     * @param array $data       <p>
1615
     *                          An array with string keys containing the data to filter.
1616
     *                          </p>
1617
     * @param mixed $definition [optional] <p>
1618
     *                          An array defining the arguments. A valid key is a string
1619
     *                          containing a variable name and a valid value is either a
1620
     *                          filter type, or an
1621
     *                          array optionally specifying the filter, flags and options.
1622
     *                          If the value is an array, valid keys are filter
1623
     *                          which specifies the filter type,
1624
     *                          flags which specifies any flags that apply to the
1625
     *                          filter, and options which specifies any options that
1626
     *                          apply to the filter. See the example below for a better understanding.
1627
     *                          </p>
1628
     *                          <p>
1629
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1630
     *                          input array are filtered by this filter.
1631
     *                          </p>
1632
     * @param bool  $add_empty  [optional] <p>
1633
     *                          Add missing keys as <b>NULL</b> to the return value.
1634
     *                          </p>
1635
     *
1636
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1637
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1638
     *               set
1639
     */
1640 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1641
    {
1642 2
        if (\func_num_args() < 2) {
1643 2
            $a = \filter_var_array($data);
1644
        } else {
1645 2
            $a = \filter_var_array($data, $definition, $add_empty);
1646
        }
1647
1648 2
        return self::filter($a);
1649
    }
1650
1651
    /**
1652
     * Checks whether finfo is available on the server.
1653
     *
1654
     * @return bool
1655
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1656
     */
1657
    public static function finfo_loaded(): bool
1658
    {
1659
        return \class_exists('finfo');
1660
    }
1661
1662
    /**
1663
     * Returns the first $n characters of the string.
1664
     *
1665
     * @param string $str      <p>The input string.</p>
1666
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1667
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1668
     *
1669
     * @return string
1670
     */
1671 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1672
    {
1673 13
        if ($n <= 0) {
1674 4
            return '';
1675
        }
1676
1677 9
        return (string) self::substr($str, 0, $n, $encoding);
1678
    }
1679
1680
    /**
1681
     * Check if the number of unicode characters are not more than the specified integer.
1682
     *
1683
     * @param string $str      the original string to be checked
1684
     * @param int    $box_size the size in number of chars to be checked against string
1685
     *
1686
     * @return bool true if string is less than or equal to $box_size, false otherwise
1687
     */
1688 2
    public static function fits_inside(string $str, int $box_size): bool
1689
    {
1690 2
        return self::strlen($str) <= $box_size;
1691
    }
1692
1693
    /**
1694
     * Try to fix simple broken UTF-8 strings.
1695
     *
1696
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1697
     *
1698
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1699
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1700
     * See: http://en.wikipedia.org/wiki/Windows-1252
1701
     *
1702
     * @param string $str <p>The input string</p>
1703
     *
1704
     * @return string
1705
     */
1706 42
    public static function fix_simple_utf8(string $str): string
1707
    {
1708 42
        if ($str === '') {
1709 4
            return '';
1710
        }
1711
1712 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1713 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1714
1715 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1716 1
            if (self::$BROKEN_UTF8_FIX === null) {
1717 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1718
            }
1719
1720 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1721 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1722
        }
1723
1724 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1725
    }
1726
1727
    /**
1728
     * Fix a double (or multiple) encoded UTF8 string.
1729
     *
1730
     * @param string|string[] $str you can use a string or an array of strings
1731
     *
1732
     * @return string|string[]
1733
     *                         Will return the fixed input-"array" or
1734
     *                         the fixed input-"string"
1735
     *
1736
     * @psalm-suppress InvalidReturnType
1737
     */
1738 2
    public static function fix_utf8($str)
1739
    {
1740 2
        if (\is_array($str) === true) {
1741 2
            foreach ($str as $k => &$v) {
1742 2
                $v = self::fix_utf8($v);
1743
            }
1744 2
            unset($v);
1745
1746
            /**
1747
             * @psalm-suppress InvalidReturnStatement
1748
             */
1749 2
            return $str;
1750
        }
1751
1752 2
        $str = (string) $str;
1753 2
        $last = '';
1754 2
        while ($last !== $str) {
1755 2
            $last = $str;
1756
            /**
1757
             * @psalm-suppress PossiblyInvalidArgument
1758
             */
1759 2
            $str = self::to_utf8(
1760 2
                self::utf8_decode($str, true)
1761
            );
1762
        }
1763
1764
        /**
1765
         * @psalm-suppress InvalidReturnStatement
1766
         */
1767 2
        return $str;
1768
    }
1769
1770
    /**
1771
     * Get character of a specific character.
1772
     *
1773
     * @param string $char
1774
     *
1775
     * @return string 'RTL' or 'LTR'
1776
     */
1777 2
    public static function getCharDirection(string $char): string
1778
    {
1779 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1780
            self::checkForSupport();
1781
        }
1782
1783 2
        if (self::$SUPPORT['intlChar'] === true) {
1784
            /** @noinspection PhpComposerExtensionStubsInspection */
1785 2
            $tmpReturn = \IntlChar::charDirection($char);
1786
1787
            // from "IntlChar"-Class
1788
            $charDirection = [
1789 2
                'RTL' => [1, 13, 14, 15, 21],
1790
                'LTR' => [0, 11, 12, 20],
1791
            ];
1792
1793 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1794
                return 'LTR';
1795
            }
1796
1797 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1798 2
                return 'RTL';
1799
            }
1800
        }
1801
1802 2
        $c = static::chr_to_decimal($char);
1803
1804 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1805 2
            return 'LTR';
1806
        }
1807
1808 2
        if ($c <= 0x85e) {
1809 2
            if ($c === 0x5be ||
1810 2
                $c === 0x5c0 ||
1811 2
                $c === 0x5c3 ||
1812 2
                $c === 0x5c6 ||
1813 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1814 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1815 2
                $c === 0x608 ||
1816 2
                $c === 0x60b ||
1817 2
                $c === 0x60d ||
1818 2
                $c === 0x61b ||
1819 2
                ($c >= 0x61e && $c <= 0x64a) ||
1820
                ($c >= 0x66d && $c <= 0x66f) ||
1821
                ($c >= 0x671 && $c <= 0x6d5) ||
1822
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1823
                ($c >= 0x6ee && $c <= 0x6ef) ||
1824
                ($c >= 0x6fa && $c <= 0x70d) ||
1825
                $c === 0x710 ||
1826
                ($c >= 0x712 && $c <= 0x72f) ||
1827
                ($c >= 0x74d && $c <= 0x7a5) ||
1828
                $c === 0x7b1 ||
1829
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1830
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1831
                $c === 0x7fa ||
1832
                ($c >= 0x800 && $c <= 0x815) ||
1833
                $c === 0x81a ||
1834
                $c === 0x824 ||
1835
                $c === 0x828 ||
1836
                ($c >= 0x830 && $c <= 0x83e) ||
1837
                ($c >= 0x840 && $c <= 0x858) ||
1838 2
                $c === 0x85e
1839
            ) {
1840 2
                return 'RTL';
1841
            }
1842 2
        } elseif ($c === 0x200f) {
1843
            return 'RTL';
1844 2
        } elseif ($c >= 0xfb1d) {
1845 2
            if ($c === 0xfb1d ||
1846 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1847 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1848 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1849 2
                $c === 0xfb3e ||
1850 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1851 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1852 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1853 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1854 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1855 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1856 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1857 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1858 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1859 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1860 2
                $c === 0x10808 ||
1861 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1862 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1863 2
                $c === 0x1083c ||
1864 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1865 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1866 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1867 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1868 2
                $c === 0x1093f ||
1869 2
                $c === 0x10a00 ||
1870 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1871 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1872 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1873 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1874 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1875 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1876 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1877 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1878 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1879 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1880
            ) {
1881 2
                return 'RTL';
1882
            }
1883
        }
1884
1885 2
        return 'LTR';
1886
    }
1887
1888
    /**
1889
     * Check for php-support.
1890
     *
1891
     * @param string|null $key
1892
     *
1893
     * @return mixed
1894
     *               Return the full support-"array", if $key === null<br>
1895
     *               return bool-value, if $key is used and available<br>
1896
     *               otherwise return <strong>null</strong>
1897
     */
1898 25
    public static function getSupportInfo(string $key = null)
1899
    {
1900 25
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1901
            self::checkForSupport();
1902
        }
1903
1904 25
        if ($key === null) {
1905 4
            return self::$SUPPORT;
1906
        }
1907
1908 23
        if (!isset(self::$SUPPORT[$key])) {
1909 2
            return null;
1910
        }
1911
1912 21
        return self::$SUPPORT[$key];
1913
    }
1914
1915
    /**
1916
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1917
     *          if you need more supported types, please use e.g. "finfo"
1918
     *
1919
     * @param string $str
1920
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1921
     *
1922
     * @return array
1923
     *               with this keys: 'ext', 'mime', 'type'
1924
     */
1925 39
    public static function get_file_type(
1926
        string $str,
1927
        array $fallback = [
1928
            'ext'  => null,
1929
            'mime' => 'application/octet-stream',
1930
            'type' => null,
1931
        ]
1932
    ): array {
1933 39
        if ($str === '') {
1934
            return $fallback;
1935
        }
1936
1937 39
        $str_info = self::substr_in_byte($str, 0, 2);
1938 39
        if ($str_info === false || self::strlen_in_byte($str_info) !== 2) {
1939 10
            return $fallback;
1940
        }
1941
1942 35
        $str_info = \unpack('C2chars', $str_info);
1943 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
1944
1945
        // DEBUG
1946
        //var_dump($type_code);
1947
1948
        switch ($type_code) {
1949 35
            case 3780:
1950 5
                $ext = 'pdf';
1951 5
                $mime = 'application/pdf';
1952 5
                $type = 'binary';
1953
1954 5
                break;
1955 35
            case 7790:
1956
                $ext = 'exe';
1957
                $mime = 'application/octet-stream';
1958
                $type = 'binary';
1959
1960
                break;
1961 35
            case 7784:
1962
                $ext = 'midi';
1963
                $mime = 'audio/x-midi';
1964
                $type = 'binary';
1965
1966
                break;
1967 35
            case 8075:
1968 7
                $ext = 'zip';
1969 7
                $mime = 'application/zip';
1970 7
                $type = 'binary';
1971
1972 7
                break;
1973 35
            case 8297:
1974
                $ext = 'rar';
1975
                $mime = 'application/rar';
1976
                $type = 'binary';
1977
1978
                break;
1979 35
            case 255216:
1980
                $ext = 'jpg';
1981
                $mime = 'image/jpeg';
1982
                $type = 'binary';
1983
1984
                break;
1985 35
            case 7173:
1986
                $ext = 'gif';
1987
                $mime = 'image/gif';
1988
                $type = 'binary';
1989
1990
                break;
1991 35
            case 6677:
1992
                $ext = 'bmp';
1993
                $mime = 'image/bmp';
1994
                $type = 'binary';
1995
1996
                break;
1997 35
            case 13780:
1998 7
                $ext = 'png';
1999 7
                $mime = 'image/png';
2000 7
                $type = 'binary';
2001
2002 7
                break;
2003
            default:
2004 32
                return $fallback;
2005
        }
2006
2007
        return [
2008 7
            'ext'  => $ext,
2009 7
            'mime' => $mime,
2010 7
            'type' => $type,
2011
        ];
2012
    }
2013
2014
    /**
2015
     * @param int    $length        <p>Length of the random string.</p>
2016
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2017
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2018
     *
2019
     * @return string
2020
     */
2021 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2022
    {
2023
        // init
2024 1
        $i = 0;
2025 1
        $str = '';
2026 1
        $maxlength = (int) self::strlen($possibleChars, $encoding);
2027
2028 1
        if ($maxlength === 0) {
2029 1
            return '';
2030
        }
2031
2032
        // add random chars
2033 1
        while ($i < $length) {
2034
            try {
2035 1
                $randInt = \random_int(0, $maxlength - 1);
2036
            } catch (\Exception $e) {
2037
                /** @noinspection RandomApiMigrationInspection */
2038
                $randInt = \mt_rand(0, $maxlength - 1);
2039
            }
2040 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2041 1
            if ($char !== false) {
2042 1
                $str .= $char;
2043 1
                ++$i;
2044
            }
2045
        }
2046
2047 1
        return $str;
2048
    }
2049
2050
    /**
2051
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2052
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2053
     *
2054
     * @return string
2055
     */
2056 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2057
    {
2058 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2059 1
                        \session_id() .
2060 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2061 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2062 1
                        $entropyExtra;
2063
2064 1
        $uniqueString = \uniqid($uniqueHelper, true);
2065
2066 1
        if ($md5) {
2067 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2068
        }
2069
2070 1
        return $uniqueString;
2071
    }
2072
2073
    /**
2074
     * alias for "UTF8::string_has_bom()"
2075
     *
2076
     * @see        UTF8::string_has_bom()
2077
     *
2078
     * @param string $str
2079
     *
2080
     * @return bool
2081
     *
2082
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2083
     */
2084 2
    public static function hasBom(string $str): bool
2085
    {
2086 2
        return self::string_has_bom($str);
2087
    }
2088
2089
    /**
2090
     * Returns true if the string contains a lower case char, false otherwise.
2091
     *
2092
     * @param string $str <p>The input string.</p>
2093
     *
2094
     * @return bool whether or not the string contains a lower case character
2095
     */
2096 47
    public static function has_lowercase(string $str): bool
2097
    {
2098 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2099
    }
2100
2101
    /**
2102
     * Returns true if the string contains an upper case char, false otherwise.
2103
     *
2104
     * @param string $str <p>The input string.</p>
2105
     *
2106
     * @return bool whether or not the string contains an upper case character
2107
     */
2108 12
    public static function has_uppercase(string $str): bool
2109
    {
2110 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2111
    }
2112
2113
    /**
2114
     * Converts a hexadecimal-value into an UTF-8 character.
2115
     *
2116
     * @param string $hexdec <p>The hexadecimal value.</p>
2117
     *
2118
     * @return false|string one single UTF-8 character
2119
     */
2120 4
    public static function hex_to_chr(string $hexdec)
2121
    {
2122 4
        return self::decimal_to_chr(\hexdec($hexdec));
2123
    }
2124
2125
    /**
2126
     * Converts hexadecimal U+xxxx code point representation to integer.
2127
     *
2128
     * INFO: opposite to UTF8::int_to_hex()
2129
     *
2130
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2131
     *
2132
     * @return false|int the code point, or false on failure
2133
     */
2134 2
    public static function hex_to_int($hexDec)
2135
    {
2136
        // init
2137 2
        $hexDec = (string) $hexDec;
2138
2139 2
        if ($hexDec === '') {
2140 2
            return false;
2141
        }
2142
2143 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2144 2
            return \intval($match[1], 16);
2145
        }
2146
2147 2
        return false;
2148
    }
2149
2150
    /**
2151
     * alias for "UTF8::html_entity_decode()"
2152
     *
2153
     * @see UTF8::html_entity_decode()
2154
     *
2155
     * @param string $str
2156
     * @param int    $flags
2157
     * @param string $encoding
2158
     *
2159
     * @return string
2160
     */
2161 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2162
    {
2163 4
        return self::html_entity_decode($str, $flags, $encoding);
2164
    }
2165
2166
    /**
2167
     * Converts a UTF-8 string to a series of HTML numbered entities.
2168
     *
2169
     * INFO: opposite to UTF8::html_decode()
2170
     *
2171
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2172
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2173
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2174
     *
2175
     * @return string HTML numbered entities
2176
     */
2177 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2178
    {
2179 13
        if ($str === '') {
2180 4
            return '';
2181
        }
2182
2183 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2184 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2185
        }
2186
2187 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2188
            self::checkForSupport();
2189
        }
2190
2191
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2192 13
        if (self::$SUPPORT['mbstring'] === true) {
2193 13
            $startCode = 0x00;
2194 13
            if ($keepAsciiChars === true) {
2195 13
                $startCode = 0x80;
2196
            }
2197
2198 13
            return \mb_encode_numericentity(
2199 13
                $str,
2200 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2201 13
                $encoding
2202
            );
2203
        }
2204
2205
        //
2206
        // fallback via vanilla php
2207
        //
2208
2209
        return \implode(
2210
            '',
2211
            \array_map(
2212
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2213
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2214
                },
2215
                self::split($str)
2216
            )
2217
        );
2218
    }
2219
2220
    /**
2221
     * UTF-8 version of html_entity_decode()
2222
     *
2223
     * The reason we are not using html_entity_decode() by itself is because
2224
     * while it is not technically correct to leave out the semicolon
2225
     * at the end of an entity most browsers will still interpret the entity
2226
     * correctly. html_entity_decode() does not convert entities without
2227
     * semicolons, so we are left with our own little solution here. Bummer.
2228
     *
2229
     * Convert all HTML entities to their applicable characters
2230
     *
2231
     * INFO: opposite to UTF8::html_encode()
2232
     *
2233
     * @see http://php.net/manual/en/function.html-entity-decode.php
2234
     *
2235
     * @param string $str      <p>
2236
     *                         The input string.
2237
     *                         </p>
2238
     * @param int    $flags    [optional] <p>
2239
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2240
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2241
     *                         <table>
2242
     *                         Available <i>flags</i> constants
2243
     *                         <tr valign="top">
2244
     *                         <td>Constant Name</td>
2245
     *                         <td>Description</td>
2246
     *                         </tr>
2247
     *                         <tr valign="top">
2248
     *                         <td><b>ENT_COMPAT</b></td>
2249
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2250
     *                         </tr>
2251
     *                         <tr valign="top">
2252
     *                         <td><b>ENT_QUOTES</b></td>
2253
     *                         <td>Will convert both double and single quotes.</td>
2254
     *                         </tr>
2255
     *                         <tr valign="top">
2256
     *                         <td><b>ENT_NOQUOTES</b></td>
2257
     *                         <td>Will leave both double and single quotes unconverted.</td>
2258
     *                         </tr>
2259
     *                         <tr valign="top">
2260
     *                         <td><b>ENT_HTML401</b></td>
2261
     *                         <td>
2262
     *                         Handle code as HTML 4.01.
2263
     *                         </td>
2264
     *                         </tr>
2265
     *                         <tr valign="top">
2266
     *                         <td><b>ENT_XML1</b></td>
2267
     *                         <td>
2268
     *                         Handle code as XML 1.
2269
     *                         </td>
2270
     *                         </tr>
2271
     *                         <tr valign="top">
2272
     *                         <td><b>ENT_XHTML</b></td>
2273
     *                         <td>
2274
     *                         Handle code as XHTML.
2275
     *                         </td>
2276
     *                         </tr>
2277
     *                         <tr valign="top">
2278
     *                         <td><b>ENT_HTML5</b></td>
2279
     *                         <td>
2280
     *                         Handle code as HTML 5.
2281
     *                         </td>
2282
     *                         </tr>
2283
     *                         </table>
2284
     *                         </p>
2285
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2286
     *
2287
     * @return string the decoded string
2288
     */
2289 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2290
    {
2291 40
        if ($str === '') {
2292 12
            return '';
2293
        }
2294
2295 40
        if (!isset($str[3])) { // examples: &; || &x;
2296 19
            return $str;
2297
        }
2298
2299
        if (
2300 39
            \strpos($str, '&') === false
2301
            ||
2302
            (
2303 39
                \strpos($str, '&#') === false
2304
                &&
2305 39
                \strpos($str, ';') === false
2306
            )
2307
        ) {
2308 18
            return $str;
2309
        }
2310
2311 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2312 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2313
        }
2314
2315 39
        if ($flags === null) {
2316 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2317
        }
2318
2319
        if (
2320 39
            $encoding !== 'UTF-8'
2321
            &&
2322 39
            $encoding !== 'ISO-8859-1'
2323
            &&
2324 39
            $encoding !== 'WINDOWS-1252'
2325
            &&
2326 39
            self::$SUPPORT['mbstring'] === false
2327
        ) {
2328
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2329
        }
2330
2331 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2332
            self::checkForSupport();
2333
        }
2334
2335
        do {
2336 39
            $str_compare = $str;
2337
2338
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2339 39
            if (self::$SUPPORT['mbstring'] === true) {
2340 39
                $str = \mb_decode_numericentity(
2341 39
                    $str,
2342 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2343 39
                    $encoding
2344
                );
2345
            } else {
2346
                $str = (string) \preg_replace_callback(
2347
                    "/&#\d{2,6};/",
2348
                    /**
2349
                     * @param string[] $matches
2350
                     *
2351
                     * @return string
2352
                     */
2353
                    static function (array $matches) use ($encoding): string {
2354
                        // always fallback via symfony polyfill
2355
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2356
2357
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2358
                            return $returnTmp;
2359
                        }
2360
2361
                        return $matches[0];
2362
                    },
2363
                    $str
2364
                );
2365
            }
2366
2367
            // decode numeric & UTF16 two byte entities
2368 39
            $str = \html_entity_decode(
2369 39
                (string) \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2370 39
                $flags,
2371 39
                $encoding
2372
            );
2373 39
        } while ($str_compare !== $str);
2374
2375 39
        return $str;
2376
    }
2377
2378
    /**
2379
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2380
     *
2381
     * @param string $str
2382
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2383
     *
2384
     * @return string
2385
     */
2386 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2387
    {
2388 6
        return self::htmlspecialchars(
2389 6
            $str,
2390 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2391 6
            $encoding
2392
        );
2393
    }
2394
2395
    /**
2396
     * Remove empty html-tag.
2397
     *
2398
     * e.g.: <tag></tag>
2399
     *
2400
     * @param string $str
2401
     *
2402
     * @return string
2403
     */
2404 1
    public static function html_stripe_empty_tags(string $str): string
2405
    {
2406 1
        return (string) \preg_replace(
2407 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2408 1
            '',
2409 1
            $str
2410
        );
2411
    }
2412
2413
    /**
2414
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2415
     *
2416
     * @see http://php.net/manual/en/function.htmlentities.php
2417
     *
2418
     * @param string $str           <p>
2419
     *                              The input string.
2420
     *                              </p>
2421
     * @param int    $flags         [optional] <p>
2422
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2423
     *                              invalid code unit sequences and the used document type. The default is
2424
     *                              ENT_COMPAT | ENT_HTML401.
2425
     *                              <table>
2426
     *                              Available <i>flags</i> constants
2427
     *                              <tr valign="top">
2428
     *                              <td>Constant Name</td>
2429
     *                              <td>Description</td>
2430
     *                              </tr>
2431
     *                              <tr valign="top">
2432
     *                              <td><b>ENT_COMPAT</b></td>
2433
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2434
     *                              </tr>
2435
     *                              <tr valign="top">
2436
     *                              <td><b>ENT_QUOTES</b></td>
2437
     *                              <td>Will convert both double and single quotes.</td>
2438
     *                              </tr>
2439
     *                              <tr valign="top">
2440
     *                              <td><b>ENT_NOQUOTES</b></td>
2441
     *                              <td>Will leave both double and single quotes unconverted.</td>
2442
     *                              </tr>
2443
     *                              <tr valign="top">
2444
     *                              <td><b>ENT_IGNORE</b></td>
2445
     *                              <td>
2446
     *                              Silently discard invalid code unit sequences instead of returning
2447
     *                              an empty string. Using this flag is discouraged as it
2448
     *                              may have security implications.
2449
     *                              </td>
2450
     *                              </tr>
2451
     *                              <tr valign="top">
2452
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2453
     *                              <td>
2454
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2455
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2456
     *                              </td>
2457
     *                              </tr>
2458
     *                              <tr valign="top">
2459
     *                              <td><b>ENT_DISALLOWED</b></td>
2460
     *                              <td>
2461
     *                              Replace invalid code points for the given document type with a
2462
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2463
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2464
     *                              instance, to ensure the well-formedness of XML documents with
2465
     *                              embedded external content.
2466
     *                              </td>
2467
     *                              </tr>
2468
     *                              <tr valign="top">
2469
     *                              <td><b>ENT_HTML401</b></td>
2470
     *                              <td>
2471
     *                              Handle code as HTML 4.01.
2472
     *                              </td>
2473
     *                              </tr>
2474
     *                              <tr valign="top">
2475
     *                              <td><b>ENT_XML1</b></td>
2476
     *                              <td>
2477
     *                              Handle code as XML 1.
2478
     *                              </td>
2479
     *                              </tr>
2480
     *                              <tr valign="top">
2481
     *                              <td><b>ENT_XHTML</b></td>
2482
     *                              <td>
2483
     *                              Handle code as XHTML.
2484
     *                              </td>
2485
     *                              </tr>
2486
     *                              <tr valign="top">
2487
     *                              <td><b>ENT_HTML5</b></td>
2488
     *                              <td>
2489
     *                              Handle code as HTML 5.
2490
     *                              </td>
2491
     *                              </tr>
2492
     *                              </table>
2493
     *                              </p>
2494
     * @param string $encoding      [optional] <p>
2495
     *                              Like <b>htmlspecialchars</b>,
2496
     *                              <b>htmlentities</b> takes an optional third argument
2497
     *                              <i>encoding</i> which defines encoding used in
2498
     *                              conversion.
2499
     *                              Although this argument is technically optional, you are highly
2500
     *                              encouraged to specify the correct value for your code.
2501
     *                              </p>
2502
     * @param bool   $double_encode [optional] <p>
2503
     *                              When <i>double_encode</i> is turned off PHP will not
2504
     *                              encode existing html entities. The default is to convert everything.
2505
     *                              </p>
2506
     *
2507
     * @return string
2508
     *                <p>
2509
     *                The encoded string.
2510
     *                <br><br>
2511
     *                If the input <i>string</i> contains an invalid code unit
2512
     *                sequence within the given <i>encoding</i> an empty string
2513
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2514
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2515
     *                </p>
2516
     */
2517 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2518
    {
2519 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2520 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2521
        }
2522
2523 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2524
2525
        /**
2526
         * PHP doesn't replace a backslash to its html entity since this is something
2527
         * that's mostly used to escape characters when inserting in a database. Since
2528
         * we're using a decent database layer, we don't need this shit and we're replacing
2529
         * the double backslashes by its' html entity equivalent.
2530
         *
2531
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2532
         */
2533 9
        $str = \str_replace('\\', '&#92;', $str);
2534
2535 9
        return self::html_encode($str, true, $encoding);
2536
    }
2537
2538
    /**
2539
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2540
     *
2541
     * INFO: Take a look at "UTF8::htmlentities()"
2542
     *
2543
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2544
     *
2545
     * @param string $str           <p>
2546
     *                              The string being converted.
2547
     *                              </p>
2548
     * @param int    $flags         [optional] <p>
2549
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2550
     *                              invalid code unit sequences and the used document type. The default is
2551
     *                              ENT_COMPAT | ENT_HTML401.
2552
     *                              <table>
2553
     *                              Available <i>flags</i> constants
2554
     *                              <tr valign="top">
2555
     *                              <td>Constant Name</td>
2556
     *                              <td>Description</td>
2557
     *                              </tr>
2558
     *                              <tr valign="top">
2559
     *                              <td><b>ENT_COMPAT</b></td>
2560
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2561
     *                              </tr>
2562
     *                              <tr valign="top">
2563
     *                              <td><b>ENT_QUOTES</b></td>
2564
     *                              <td>Will convert both double and single quotes.</td>
2565
     *                              </tr>
2566
     *                              <tr valign="top">
2567
     *                              <td><b>ENT_NOQUOTES</b></td>
2568
     *                              <td>Will leave both double and single quotes unconverted.</td>
2569
     *                              </tr>
2570
     *                              <tr valign="top">
2571
     *                              <td><b>ENT_IGNORE</b></td>
2572
     *                              <td>
2573
     *                              Silently discard invalid code unit sequences instead of returning
2574
     *                              an empty string. Using this flag is discouraged as it
2575
     *                              may have security implications.
2576
     *                              </td>
2577
     *                              </tr>
2578
     *                              <tr valign="top">
2579
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2580
     *                              <td>
2581
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2582
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2583
     *                              </td>
2584
     *                              </tr>
2585
     *                              <tr valign="top">
2586
     *                              <td><b>ENT_DISALLOWED</b></td>
2587
     *                              <td>
2588
     *                              Replace invalid code points for the given document type with a
2589
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2590
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2591
     *                              instance, to ensure the well-formedness of XML documents with
2592
     *                              embedded external content.
2593
     *                              </td>
2594
     *                              </tr>
2595
     *                              <tr valign="top">
2596
     *                              <td><b>ENT_HTML401</b></td>
2597
     *                              <td>
2598
     *                              Handle code as HTML 4.01.
2599
     *                              </td>
2600
     *                              </tr>
2601
     *                              <tr valign="top">
2602
     *                              <td><b>ENT_XML1</b></td>
2603
     *                              <td>
2604
     *                              Handle code as XML 1.
2605
     *                              </td>
2606
     *                              </tr>
2607
     *                              <tr valign="top">
2608
     *                              <td><b>ENT_XHTML</b></td>
2609
     *                              <td>
2610
     *                              Handle code as XHTML.
2611
     *                              </td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_HTML5</b></td>
2615
     *                              <td>
2616
     *                              Handle code as HTML 5.
2617
     *                              </td>
2618
     *                              </tr>
2619
     *                              </table>
2620
     *                              </p>
2621
     * @param string $encoding      [optional] <p>
2622
     *                              Defines encoding used in conversion.
2623
     *                              </p>
2624
     *                              <p>
2625
     *                              For the purposes of this function, the encodings
2626
     *                              ISO-8859-1, ISO-8859-15,
2627
     *                              UTF-8, cp866,
2628
     *                              cp1251, cp1252, and
2629
     *                              KOI8-R are effectively equivalent, provided the
2630
     *                              <i>string</i> itself is valid for the encoding, as
2631
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2632
     *                              the same positions in all of these encodings.
2633
     *                              </p>
2634
     * @param bool   $double_encode [optional] <p>
2635
     *                              When <i>double_encode</i> is turned off PHP will not
2636
     *                              encode existing html entities, the default is to convert everything.
2637
     *                              </p>
2638
     *
2639
     * @return string the converted string.
2640
     *                </p>
2641
     *                <p>
2642
     *                If the input <i>string</i> contains an invalid code unit
2643
     *                sequence within the given <i>encoding</i> an empty string
2644
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2645
     *                <b>ENT_SUBSTITUTE</b> flags are set
2646
     */
2647 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2648
    {
2649 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2650 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2651
        }
2652
2653 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2654
    }
2655
2656
    /**
2657
     * Checks whether iconv is available on the server.
2658
     *
2659
     * @return bool
2660
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2661
     */
2662
    public static function iconv_loaded(): bool
2663
    {
2664
        return \extension_loaded('iconv');
2665
    }
2666
2667
    /**
2668
     * alias for "UTF8::decimal_to_chr()"
2669
     *
2670
     * @see UTF8::decimal_to_chr()
2671
     *
2672
     * @param mixed $int
2673
     *
2674
     * @return string
2675
     */
2676 4
    public static function int_to_chr($int): string
2677
    {
2678 4
        return self::decimal_to_chr($int);
2679
    }
2680
2681
    /**
2682
     * Converts Integer to hexadecimal U+xxxx code point representation.
2683
     *
2684
     * INFO: opposite to UTF8::hex_to_int()
2685
     *
2686
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2687
     * @param string $pfix [optional]
2688
     *
2689
     * @return string the code point, or empty string on failure
2690
     */
2691 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2692
    {
2693 6
        $hex = \dechex($int);
2694
2695 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2696
2697 6
        return $pfix . $hex . '';
2698
    }
2699
2700
    /**
2701
     * Checks whether intl-char is available on the server.
2702
     *
2703
     * @return bool
2704
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2705
     */
2706
    public static function intlChar_loaded(): bool
2707
    {
2708
        return \class_exists('IntlChar');
2709
    }
2710
2711
    /**
2712
     * Checks whether intl is available on the server.
2713
     *
2714
     * @return bool
2715
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2716
     */
2717 5
    public static function intl_loaded(): bool
2718
    {
2719 5
        return \extension_loaded('intl');
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::is_ascii()"
2724
     *
2725
     * @see        UTF8::is_ascii()
2726
     *
2727
     * @param string $str
2728
     *
2729
     * @return bool
2730
     *
2731
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2732
     */
2733 2
    public static function isAscii(string $str): bool
2734
    {
2735 2
        return self::is_ascii($str);
2736
    }
2737
2738
    /**
2739
     * alias for "UTF8::is_base64()"
2740
     *
2741
     * @see        UTF8::is_base64()
2742
     *
2743
     * @param string $str
2744
     *
2745
     * @return bool
2746
     *
2747
     * @deprecated <p>use "UTF8::is_base64()"</p>
2748
     */
2749 2
    public static function isBase64($str): bool
2750
    {
2751 2
        return self::is_base64($str);
2752
    }
2753
2754
    /**
2755
     * alias for "UTF8::is_binary()"
2756
     *
2757
     * @see        UTF8::is_binary()
2758
     *
2759
     * @param mixed $str
2760
     * @param bool  $strict
2761
     *
2762
     * @return bool
2763
     *
2764
     * @deprecated <p>use "UTF8::is_binary()"</p>
2765
     */
2766 4
    public static function isBinary($str, $strict = false): bool
2767
    {
2768 4
        return self::is_binary($str, $strict);
2769
    }
2770
2771
    /**
2772
     * alias for "UTF8::is_bom()"
2773
     *
2774
     * @see        UTF8::is_bom()
2775
     *
2776
     * @param string $utf8_chr
2777
     *
2778
     * @return bool
2779
     *
2780
     * @deprecated <p>use "UTF8::is_bom()"</p>
2781
     */
2782 2
    public static function isBom(string $utf8_chr): bool
2783
    {
2784 2
        return self::is_bom($utf8_chr);
2785
    }
2786
2787
    /**
2788
     * alias for "UTF8::is_html()"
2789
     *
2790
     * @see        UTF8::is_html()
2791
     *
2792
     * @param string $str
2793
     *
2794
     * @return bool
2795
     *
2796
     * @deprecated <p>use "UTF8::is_html()"</p>
2797
     */
2798 2
    public static function isHtml(string $str): bool
2799
    {
2800 2
        return self::is_html($str);
2801
    }
2802
2803
    /**
2804
     * alias for "UTF8::is_json()"
2805
     *
2806
     * @see        UTF8::is_json()
2807
     *
2808
     * @param string $str
2809
     *
2810
     * @return bool
2811
     *
2812
     * @deprecated <p>use "UTF8::is_json()"</p>
2813
     */
2814
    public static function isJson(string $str): bool
2815
    {
2816
        return self::is_json($str);
2817
    }
2818
2819
    /**
2820
     * alias for "UTF8::is_utf16()"
2821
     *
2822
     * @see        UTF8::is_utf16()
2823
     *
2824
     * @param mixed $str
2825
     *
2826
     * @return false|int
2827
     *                   <strong>false</strong> if is't not UTF16,<br>
2828
     *                   <strong>1</strong> for UTF-16LE,<br>
2829
     *                   <strong>2</strong> for UTF-16BE
2830
     *
2831
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2832
     */
2833 2
    public static function isUtf16($str)
2834
    {
2835 2
        return self::is_utf16($str);
2836
    }
2837
2838
    /**
2839
     * alias for "UTF8::is_utf32()"
2840
     *
2841
     * @see        UTF8::is_utf32()
2842
     *
2843
     * @param mixed $str
2844
     *
2845
     * @return false|int
2846
     *                   <strong>false</strong> if is't not UTF16,
2847
     *                   <strong>1</strong> for UTF-32LE,
2848
     *                   <strong>2</strong> for UTF-32BE
2849
     *
2850
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2851
     */
2852 2
    public static function isUtf32($str)
2853
    {
2854 2
        return self::is_utf32($str);
2855
    }
2856
2857
    /**
2858
     * alias for "UTF8::is_utf8()"
2859
     *
2860
     * @see        UTF8::is_utf8()
2861
     *
2862
     * @param string $str
2863
     * @param bool   $strict
2864
     *
2865
     * @return bool
2866
     *
2867
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2868
     */
2869 17
    public static function isUtf8($str, $strict = false): bool
2870
    {
2871 17
        return self::is_utf8($str, $strict);
2872
    }
2873
2874
    /**
2875
     * Returns true if the string contains only alphabetic chars, false otherwise.
2876
     *
2877
     * @param string $str
2878
     *
2879
     * @return bool
2880
     *              Whether or not $str contains only alphabetic chars
2881
     */
2882 10
    public static function is_alpha(string $str): bool
2883
    {
2884 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2885
    }
2886
2887
    /**
2888
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2889
     *
2890
     * @param string $str
2891
     *
2892
     * @return bool
2893
     *              Whether or not $str contains only alphanumeric chars
2894
     */
2895 13
    public static function is_alphanumeric(string $str): bool
2896
    {
2897 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2898
    }
2899
2900
    /**
2901
     * Checks if a string is 7 bit ASCII.
2902
     *
2903
     * @param string $str <p>The string to check.</p>
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if it is ASCII<br>
2907
     *              <strong>false</strong> otherwise
2908
     */
2909 219
    public static function is_ascii(string $str): bool
2910
    {
2911 219
        if ($str === '') {
2912 10
            return true;
2913
        }
2914
2915 218
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2916
    }
2917
2918
    /**
2919
     * Returns true if the string is base64 encoded, false otherwise.
2920
     *
2921
     * @param mixed|string $str                <p>The input string.</p>
2922
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
2923
     *
2924
     * @return bool whether or not $str is base64 encoded
2925
     */
2926 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
2927
    {
2928 16
        if ($emptyStringIsValid === false && $str === '') {
2929 3
            return false;
2930
        }
2931
2932
        /**
2933
         * @psalm-suppress RedundantConditionGivenDocblockType
2934
         */
2935 15
        if (\is_string($str) === false) {
2936 2
            return false;
2937
        }
2938
2939 15
        $base64String = \base64_decode($str, true);
2940
2941 15
        return $base64String !== false && \base64_encode($base64String) === $str;
2942
    }
2943
2944
    /**
2945
     * Check if the input is binary... (is look like a hack).
2946
     *
2947
     * @param mixed $input
2948
     * @param bool  $strict
2949
     *
2950
     * @return bool
2951
     */
2952 39
    public static function is_binary($input, bool $strict = false): bool
2953
    {
2954 39
        $input = (string) $input;
2955 39
        if ($input === '') {
2956 10
            return false;
2957
        }
2958
2959 39
        if (\preg_match('~^[01]+$~', $input)) {
2960 12
            return true;
2961
        }
2962
2963 39
        $ext = self::get_file_type($input);
2964 39
        if ($ext['type'] === 'binary') {
2965 7
            return true;
2966
        }
2967
2968 36
        $testLength = self::strlen_in_byte($input);
2969 36
        if ($testLength) {
2970 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2971
                self::checkForSupport();
2972
            }
2973
2974 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2975
            if (
2976 36
                $testNull !== false
2977
                &&
2978 36
                ($testNull / $testLength) > 0.25
2979
            ) {
2980 12
                return true;
2981
            }
2982
        }
2983
2984 34
        if ($strict === true) {
2985 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2986
                self::checkForSupport();
2987
            }
2988
2989 34
            if (self::$SUPPORT['finfo'] === false) {
2990
                throw new \RuntimeException('ext-fileinfo: is not installed');
2991
            }
2992
2993
            /** @noinspection PhpComposerExtensionStubsInspection */
2994 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
2995 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
2996 14
                return true;
2997
            }
2998
        }
2999
3000 30
        return false;
3001
    }
3002
3003
    /**
3004
     * Check if the file is binary.
3005
     *
3006
     * @param string $file
3007
     *
3008
     * @return bool
3009
     */
3010 6
    public static function is_binary_file($file): bool
3011
    {
3012
        // init
3013 6
        $block = '';
3014
3015 6
        $fp = \fopen($file, 'rb');
3016 6
        if (\is_resource($fp)) {
3017 6
            $block = \fread($fp, 512);
3018 6
            \fclose($fp);
3019
        }
3020
3021 6
        if ($block === '') {
3022 2
            return false;
3023
        }
3024
3025 6
        return self::is_binary($block, true);
3026
    }
3027
3028
    /**
3029
     * Returns true if the string contains only whitespace chars, false otherwise.
3030
     *
3031
     * @param string $str
3032
     *
3033
     * @return bool
3034
     *              Whether or not $str contains only whitespace characters
3035
     */
3036 15
    public static function is_blank(string $str): bool
3037
    {
3038 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3039
    }
3040
3041
    /**
3042
     * Checks if the given string is equal to any "Byte Order Mark".
3043
     *
3044
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3045
     *
3046
     * @param string $str <p>The input string.</p>
3047
     *
3048
     * @return bool
3049
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3050
     */
3051 2
    public static function is_bom($str): bool
3052
    {
3053
        /** @noinspection PhpUnusedLocalVariableInspection */
3054 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3055 2
            if ($str === $bomString) {
3056 2
                return true;
3057
            }
3058
        }
3059
3060 2
        return false;
3061
    }
3062
3063
    /**
3064
     * Determine whether the string is considered to be empty.
3065
     *
3066
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3067
     * empty() does not generate a warning if the variable does not exist.
3068
     *
3069
     * @param mixed $str
3070
     *
3071
     * @return bool whether or not $str is empty()
3072
     */
3073
    public static function is_empty($str): bool
3074
    {
3075
        return empty($str);
3076
    }
3077
3078
    /**
3079
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3080
     *
3081
     * @param string $str
3082
     *
3083
     * @return bool
3084
     *              Whether or not $str contains only hexadecimal chars
3085
     */
3086 13
    public static function is_hexadecimal(string $str): bool
3087
    {
3088 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3089
    }
3090
3091
    /**
3092
     * Check if the string contains any html-tags <lall>.
3093
     *
3094
     * @param string $str <p>The input string.</p>
3095
     *
3096
     * @return bool
3097
     */
3098 3
    public static function is_html(string $str): bool
3099
    {
3100 3
        if ($str === '') {
3101 3
            return false;
3102
        }
3103
3104
        // init
3105 3
        $matches = [];
3106
3107 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3108
3109 3
        return \count($matches) !== 0;
3110
    }
3111
3112
    /**
3113
     * Try to check if "$str" is an json-string.
3114
     *
3115
     * @param string $str                              <p>The input string.</p>
3116
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3117
     *
3118
     * @return bool
3119
     */
3120 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3121
    {
3122 42
        if ($str === '') {
3123 4
            return false;
3124
        }
3125
3126 40
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3127
            self::checkForSupport();
3128
        }
3129
3130 40
        if (self::$SUPPORT['json'] === false) {
3131
            throw new \RuntimeException('ext-json: is not installed');
3132
        }
3133
3134 40
        $json = self::json_decode($str);
3135 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3136 18
            return false;
3137
        }
3138
3139
        if (
3140 24
            $onlyArrayOrObjectResultsAreValid === true
3141
            &&
3142 24
            \is_object($json) === false
3143
            &&
3144 24
            \is_array($json) === false
3145
        ) {
3146 5
            return false;
3147
        }
3148
3149
        /** @noinspection PhpComposerExtensionStubsInspection */
3150 19
        return \json_last_error() === \JSON_ERROR_NONE;
3151
    }
3152
3153
    /**
3154
     * @param string $str
3155
     *
3156
     * @return bool
3157
     */
3158 8
    public static function is_lowercase(string $str): bool
3159
    {
3160 8
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3161
    }
3162
3163
    /**
3164
     * Returns true if the string is serialized, false otherwise.
3165
     *
3166
     * @param string $str
3167
     *
3168
     * @return bool whether or not $str is serialized
3169
     */
3170 7
    public static function is_serialized(string $str): bool
3171
    {
3172 7
        if ($str === '') {
3173 1
            return false;
3174
        }
3175
3176
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3177
        /** @noinspection UnserializeExploitsInspection */
3178 6
        return $str === 'b:0;'
3179
               ||
3180 6
               @\unserialize($str) !== false;
3181
    }
3182
3183
    /**
3184
     * Returns true if the string contains only lower case chars, false
3185
     * otherwise.
3186
     *
3187
     * @param string $str <p>The input string.</p>
3188
     *
3189
     * @return bool
3190
     *              Whether or not $str contains only lower case characters
3191
     */
3192 8
    public static function is_uppercase(string $str): bool
3193
    {
3194 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3195
    }
3196
3197
    /**
3198
     * Check if the string is UTF-16.
3199
     *
3200
     * @param mixed $str                   <p>The input string.</p>
3201
     * @param bool  $checkIfStringIsBinary
3202
     *
3203
     * @return false|int
3204
     *                   <strong>false</strong> if is't not UTF-16,<br>
3205
     *                   <strong>1</strong> for UTF-16LE,<br>
3206
     *                   <strong>2</strong> for UTF-16BE
3207
     */
3208 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3209
    {
3210
        // init
3211 21
        $str = (string) $str;
3212 21
        $strChars = [];
3213
3214
        if (
3215 21
            $checkIfStringIsBinary === true
3216
            &&
3217 21
            self::is_binary($str, true) === false
3218
        ) {
3219 2
            return false;
3220
        }
3221
3222 21
        if (self::$SUPPORT['mbstring'] === false) {
3223
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3224
        }
3225
3226 21
        $str = self::remove_bom($str);
3227
3228 21
        $maybeUTF16LE = 0;
3229 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3230 21
        if ($test) {
3231 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3232 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3233 15
            if ($test3 === $test) {
3234 15
                if (\count($strChars) === 0) {
3235 15
                    $strChars = self::count_chars($str, true);
3236
                }
3237 15
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3238 15
                    if (\in_array($test3char, $strChars, true) === true) {
3239 15
                        ++$maybeUTF16LE;
3240
                    }
3241
                }
3242 15
                unset($test3charEmpty);
3243
            }
3244
        }
3245
3246 21
        $maybeUTF16BE = 0;
3247 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3248 21
        if ($test) {
3249 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3250 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3251 15
            if ($test3 === $test) {
3252 15
                if (\count($strChars) === 0) {
3253 7
                    $strChars = self::count_chars($str, true);
3254
                }
3255 15
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3256 15
                    if (\in_array($test3char, $strChars, true) === true) {
3257 15
                        ++$maybeUTF16BE;
3258
                    }
3259
                }
3260 15
                unset($test3charEmpty);
3261
            }
3262
        }
3263
3264 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3265 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3266 4
                return 1;
3267
            }
3268
3269 6
            return 2;
3270
        }
3271
3272 17
        return false;
3273
    }
3274
3275
    /**
3276
     * Check if the string is UTF-32.
3277
     *
3278
     * @param mixed $str                   <p>The input string.</p>
3279
     * @param bool  $checkIfStringIsBinary
3280
     *
3281
     * @return false|int
3282
     *                   <strong>false</strong> if is't not UTF-32,<br>
3283
     *                   <strong>1</strong> for UTF-32LE,<br>
3284
     *                   <strong>2</strong> for UTF-32BE
3285
     */
3286 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3287
    {
3288
        // init
3289 17
        $str = (string) $str;
3290 17
        $strChars = [];
3291
3292
        if (
3293 17
            $checkIfStringIsBinary === true
3294
            &&
3295 17
            self::is_binary($str, true) === false
3296
        ) {
3297 2
            return false;
3298
        }
3299
3300 17
        if (self::$SUPPORT['mbstring'] === false) {
3301
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3302
        }
3303
3304 17
        $str = self::remove_bom($str);
3305
3306 17
        $maybeUTF32LE = 0;
3307 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3308 17
        if ($test) {
3309 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3310 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3311 11
            if ($test3 === $test) {
3312 11
                if (\count($strChars) === 0) {
3313 11
                    $strChars = self::count_chars($str, true);
3314
                }
3315 11
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3316 11
                    if (\in_array($test3char, $strChars, true) === true) {
3317 11
                        ++$maybeUTF32LE;
3318
                    }
3319
                }
3320 11
                unset($test3charEmpty);
3321
            }
3322
        }
3323
3324 17
        $maybeUTF32BE = 0;
3325 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3326 17
        if ($test) {
3327 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3328 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3329 11
            if ($test3 === $test) {
3330 11
                if (\count($strChars) === 0) {
3331 7
                    $strChars = self::count_chars($str, true);
3332
                }
3333 11
                foreach (self::count_chars($test3, true) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3, true) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3334 11
                    if (\in_array($test3char, $strChars, true) === true) {
3335 11
                        ++$maybeUTF32BE;
3336
                    }
3337
                }
3338 11
                unset($test3charEmpty);
3339
            }
3340
        }
3341
3342 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3343 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3344 2
                return 1;
3345
            }
3346
3347 2
            return 2;
3348
        }
3349
3350 17
        return false;
3351
    }
3352
3353
    /**
3354
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3355
     *
3356
     * @see    http://hsivonen.iki.fi/php-utf8/
3357
     *
3358
     * @param string|string[] $str    <p>The string to be checked.</p>
3359
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3360
     *
3361
     * @return bool
3362
     */
3363 106
    public static function is_utf8($str, bool $strict = false): bool
3364
    {
3365 106
        if (\is_array($str) === true) {
3366 2
            foreach ($str as &$v) {
3367 2
                if (self::is_utf8($v, $strict) === false) {
3368 2
                    return false;
3369
                }
3370
            }
3371
3372
            return true;
3373
        }
3374
3375 106
        if ($str === '') {
3376 12
            return true;
3377
        }
3378
3379 102
        if ($strict === true) {
3380 2
            $isBinary = self::is_binary($str, true);
3381
3382 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3383 2
                return false;
3384
            }
3385
3386
            if ($isBinary && self::is_utf32($str, false) !== false) {
3387
                return false;
3388
            }
3389
        }
3390
3391 102
        if (self::pcre_utf8_support() !== true) {
3392
3393
            // If even just the first character can be matched, when the /u
3394
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3395
            // invalid, nothing at all will match, even if the string contains
3396
            // some valid sequences
3397
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3398
        }
3399
3400 102
        $mState = 0; // cached expected number of octets after the current octet
3401
        // until the beginning of the next UTF8 character sequence
3402 102
        $mUcs4 = 0; // cached Unicode character
3403 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3404
3405 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3406
            self::checkForSupport();
3407
        }
3408
3409 102
        if (self::$ORD === null) {
3410
            self::$ORD = self::getData('ord');
3411
        }
3412
3413 102
        $len = self::strlen_in_byte((string) $str);
3414
        /** @noinspection ForeachInvariantsInspection */
3415 102
        for ($i = 0; $i < $len; ++$i) {
3416 102
            $in = self::$ORD[$str[$i]];
3417 102
            if ($mState === 0) {
3418
                // When mState is zero we expect either a US-ASCII character or a
3419
                // multi-octet sequence.
3420 102
                if ((0x80 & $in) === 0) {
3421
                    // US-ASCII, pass straight through.
3422 98
                    $mBytes = 1;
3423 83
                } elseif ((0xE0 & $in) === 0xC0) {
3424
                    // First octet of 2 octet sequence.
3425 74
                    $mUcs4 = $in;
3426 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3427 74
                    $mState = 1;
3428 74
                    $mBytes = 2;
3429 58
                } elseif ((0xF0 & $in) === 0xE0) {
3430
                    // First octet of 3 octet sequence.
3431 41
                    $mUcs4 = $in;
3432 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3433 41
                    $mState = 2;
3434 41
                    $mBytes = 3;
3435 30
                } elseif ((0xF8 & $in) === 0xF0) {
3436
                    // First octet of 4 octet sequence.
3437 19
                    $mUcs4 = $in;
3438 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3439 19
                    $mState = 3;
3440 19
                    $mBytes = 4;
3441 13
                } elseif ((0xFC & $in) === 0xF8) {
3442
                    /* First octet of 5 octet sequence.
3443
                     *
3444
                     * This is illegal because the encoded codepoint must be either
3445
                     * (a) not the shortest form or
3446
                     * (b) outside the Unicode range of 0-0x10FFFF.
3447
                     * Rather than trying to resynchronize, we will carry on until the end
3448
                     * of the sequence and let the later error handling code catch it.
3449
                     */
3450 5
                    $mUcs4 = $in;
3451 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3452 5
                    $mState = 4;
3453 5
                    $mBytes = 5;
3454 10
                } elseif ((0xFE & $in) === 0xFC) {
3455
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3456 5
                    $mUcs4 = $in;
3457 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3458 5
                    $mState = 5;
3459 5
                    $mBytes = 6;
3460
                } else {
3461
                    // Current octet is neither in the US-ASCII range nor a legal first
3462
                    // octet of a multi-octet sequence.
3463 102
                    return false;
3464
                }
3465
            } else {
3466
                // When mState is non-zero, we expect a continuation of the multi-octet
3467
                // sequence
3468 83
                if ((0xC0 & $in) === 0x80) {
3469
                    // Legal continuation.
3470 75
                    $shift = ($mState - 1) * 6;
3471 75
                    $tmp = $in;
3472 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3473 75
                    $mUcs4 |= $tmp;
3474
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3475
                    // Unicode code point to be output.
3476 75
                    if (--$mState === 0) {
3477
                        // Check for illegal sequences and code points.
3478
                        //
3479
                        // From Unicode 3.1, non-shortest form is illegal
3480
                        if (
3481 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3482
                            ||
3483 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3484
                            ||
3485 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3486
                            ||
3487 75
                            ($mBytes > 4)
3488
                            ||
3489
                            // From Unicode 3.2, surrogate characters are illegal.
3490 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3491
                            ||
3492
                            // Code points outside the Unicode range are illegal.
3493 75
                            ($mUcs4 > 0x10FFFF)
3494
                        ) {
3495 8
                            return false;
3496
                        }
3497
                        // initialize UTF8 cache
3498 75
                        $mState = 0;
3499 75
                        $mUcs4 = 0;
3500 75
                        $mBytes = 1;
3501
                    }
3502
                } else {
3503
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3504
                    // Incomplete multi-octet sequence.
3505 36
                    return false;
3506
                }
3507
            }
3508
        }
3509
3510 66
        return true;
3511
    }
3512
3513
    /**
3514
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3515
     * Decodes a JSON string
3516
     *
3517
     * @see http://php.net/manual/en/function.json-decode.php
3518
     *
3519
     * @param string $json    <p>
3520
     *                        The <i>json</i> string being decoded.
3521
     *                        </p>
3522
     *                        <p>
3523
     *                        This function only works with UTF-8 encoded strings.
3524
     *                        </p>
3525
     *                        <p>PHP implements a superset of
3526
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3527
     *                        only supports these values when they are nested inside an array or an object.
3528
     *                        </p>
3529
     * @param bool   $assoc   [optional] <p>
3530
     *                        When <b>TRUE</b>, returned objects will be converted into
3531
     *                        associative arrays.
3532
     *                        </p>
3533
     * @param int    $depth   [optional] <p>
3534
     *                        User specified recursion depth.
3535
     *                        </p>
3536
     * @param int    $options [optional] <p>
3537
     *                        Bitmask of JSON decode options. Currently only
3538
     *                        <b>JSON_BIGINT_AS_STRING</b>
3539
     *                        is supported (default is to cast large integers as floats)
3540
     *                        </p>
3541
     *
3542
     * @return mixed
3543
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3544
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3545
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3546
     *               is deeper than the recursion limit.
3547
     */
3548 43
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3549
    {
3550 43
        $json = self::filter($json);
3551
3552 43
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3553
            self::checkForSupport();
3554
        }
3555
3556 43
        if (self::$SUPPORT['json'] === false) {
3557
            throw new \RuntimeException('ext-json: is not installed');
3558
        }
3559
3560
        /** @noinspection PhpComposerExtensionStubsInspection */
3561 43
        return \json_decode($json, $assoc, $depth, $options);
3562
    }
3563
3564
    /**
3565
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3566
     * Returns the JSON representation of a value.
3567
     *
3568
     * @see http://php.net/manual/en/function.json-encode.php
3569
     *
3570
     * @param mixed $value   <p>
3571
     *                       The <i>value</i> being encoded. Can be any type except
3572
     *                       a resource.
3573
     *                       </p>
3574
     *                       <p>
3575
     *                       All string data must be UTF-8 encoded.
3576
     *                       </p>
3577
     *                       <p>PHP implements a superset of
3578
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3579
     *                       only supports these values when they are nested inside an array or an object.
3580
     *                       </p>
3581
     * @param int   $options [optional] <p>
3582
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3583
     *                       <b>JSON_HEX_TAG</b>,
3584
     *                       <b>JSON_HEX_AMP</b>,
3585
     *                       <b>JSON_HEX_APOS</b>,
3586
     *                       <b>JSON_NUMERIC_CHECK</b>,
3587
     *                       <b>JSON_PRETTY_PRINT</b>,
3588
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3589
     *                       <b>JSON_FORCE_OBJECT</b>,
3590
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3591
     *                       constants is described on
3592
     *                       the JSON constants page.
3593
     *                       </p>
3594
     * @param int   $depth   [optional] <p>
3595
     *                       Set the maximum depth. Must be greater than zero.
3596
     *                       </p>
3597
     *
3598
     * @return false|string
3599
     *                      A JSON encoded <strong>string</strong> on success or<br>
3600
     *                      <strong>FALSE</strong> on failure
3601
     */
3602 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3603
    {
3604 5
        $value = self::filter($value);
3605
3606 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3607
            self::checkForSupport();
3608
        }
3609
3610 5
        if (self::$SUPPORT['json'] === false) {
3611
            throw new \RuntimeException('ext-json: is not installed');
3612
        }
3613
3614
        /** @noinspection PhpComposerExtensionStubsInspection */
3615 5
        return \json_encode($value, $options, $depth);
3616
    }
3617
3618
    /**
3619
     * Checks whether JSON is available on the server.
3620
     *
3621
     * @return bool
3622
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3623
     */
3624
    public static function json_loaded(): bool
3625
    {
3626
        return \function_exists('json_decode');
3627
    }
3628
3629
    /**
3630
     * Makes string's first char lowercase.
3631
     *
3632
     * @param string      $str                   <p>The input string</p>
3633
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3634
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3635
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3636
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3637
     *
3638
     * @return string the resulting string
3639
     */
3640 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3641
    {
3642 46
        $strPartTwo = (string) self::substr($str, 1, null, $encoding, $cleanUtf8);
3643
3644 46
        $strPartOne = self::strtolower(
3645 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3646 46
            $encoding,
3647 46
            $cleanUtf8,
3648 46
            $lang,
3649 46
            $tryToKeepStringLength
3650
        );
3651
3652 46
        return $strPartOne . $strPartTwo;
3653
    }
3654
3655
    /**
3656
     * alias for "UTF8::lcfirst()"
3657
     *
3658
     * @see UTF8::lcfirst()
3659
     *
3660
     * @param string      $str
3661
     * @param string      $encoding
3662
     * @param bool        $cleanUtf8
3663
     * @param string|null $lang
3664
     * @param bool        $tryToKeepStringLength
3665
     *
3666
     * @return string
3667
     */
3668 2
    public static function lcword(
3669
        string $str,
3670
        string $encoding = 'UTF-8',
3671
        bool $cleanUtf8 = false,
3672
        string $lang = null,
3673
        bool $tryToKeepStringLength = false
3674
    ): string {
3675 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3676
    }
3677
3678
    /**
3679
     * Lowercase for all words in the string.
3680
     *
3681
     * @param string      $str                   <p>The input string.</p>
3682
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3683
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3684
     *                                           a new word.</p>
3685
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3686
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3687
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3688
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3689
     *
3690
     * @return string
3691
     */
3692 2
    public static function lcwords(
3693
        string $str,
3694
        array $exceptions = [],
3695
        string $charlist = '',
3696
        string $encoding = 'UTF-8',
3697
        bool $cleanUtf8 = false,
3698
        string $lang = null,
3699
        bool $tryToKeepStringLength = false
3700
    ): string {
3701 2
        if (!$str) {
3702 2
            return '';
3703
        }
3704
3705 2
        $words = self::str_to_words($str, $charlist);
3706 2
        $useExceptions = \count($exceptions) > 0;
3707
3708 2
        foreach ($words as &$word) {
3709 2
            if (!$word) {
3710 2
                continue;
3711
            }
3712
3713
            if (
3714 2
                $useExceptions === false
3715
                ||
3716 2
                !\in_array($word, $exceptions, true)
3717
            ) {
3718 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3719
            }
3720
        }
3721
3722 2
        return \implode('', $words);
3723
    }
3724
3725
    /**
3726
     * alias for "UTF8::lcfirst()"
3727
     *
3728
     * @see UTF8::lcfirst()
3729
     *
3730
     * @param string      $str
3731
     * @param string      $encoding
3732
     * @param bool        $cleanUtf8
3733
     * @param string|null $lang
3734
     * @param bool        $tryToKeepStringLength
3735
     *
3736
     * @return string
3737
     */
3738 5
    public static function lowerCaseFirst(
3739
        string $str,
3740
        string $encoding = 'UTF-8',
3741
        bool $cleanUtf8 = false,
3742
        string $lang = null,
3743
        bool $tryToKeepStringLength = false
3744
    ): string {
3745 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3746
    }
3747
3748
    /**
3749
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3750
     *
3751
     * @param string $str   <p>The string to be trimmed</p>
3752
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3753
     *
3754
     * @return string the string with unwanted characters stripped from the left
3755
     */
3756 22
    public static function ltrim(string $str = '', $chars = \INF): string
3757
    {
3758 22
        if ($str === '') {
3759 3
            return '';
3760
        }
3761
3762
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3763 21
        if ($chars === \INF || !$chars) {
3764 14
            $pattern = "^[\pZ\pC]+";
3765
        } else {
3766 10
            $chars = \preg_quote($chars, '/');
3767 10
            $pattern = "^[${chars}]+";
3768
        }
3769
3770
        return self::regex_replace($str, $pattern, '', '', '/');
3771
    }
3772
3773
    /**
3774
     * Returns the UTF-8 character with the maximum code point in the given data.
3775
     *
3776
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3777
     *
3778
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3779
     */
3780
    public static function max($arg)
3781
    {
3782 2
        if (\is_array($arg) === true) {
3783 2
            $arg = \implode('', $arg);
3784
        }
3785
3786 2
        $codepoints = self::codepoints($arg, false);
3787 2
        if (\count($codepoints) === 0) {
3788 2
            return null;
3789
        }
3790
3791 2
        $codepoint_max = \max($codepoints);
3792
3793 2
        return self::chr($codepoint_max);
3794
    }
3795
3796
    /**
3797
     * Calculates and returns the maximum number of bytes taken by any
3798
     * UTF-8 encoded character in the given string.
3799
     *
3800
     * @param string $str <p>The original Unicode string.</p>
3801
     *
3802
     * @return int max byte lengths of the given chars
3803
     */
3804
    public static function max_chr_width(string $str): int
3805
    {
3806 2
        $bytes = self::chr_size_list($str);
3807 2
        if (\count($bytes) > 0) {
3808 2
            return (int) \max($bytes);
3809
        }
3810
3811 2
        return 0;
3812
    }
3813
3814
    /**
3815
     * Checks whether mbstring is available on the server.
3816
     *
3817
     * @return bool
3818
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3819
     */
3820
    public static function mbstring_loaded(): bool
3821
    {
3822 27
        $return = \extension_loaded('mbstring');
3823 27
        if ($return === true) {
3824 27
            \mb_internal_encoding('UTF-8');
3825
        }
3826
3827 27
        return $return;
3828
    }
3829
3830
    /**
3831
     * Returns the UTF-8 character with the minimum code point in the given data.
3832
     *
3833
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3834
     *
3835
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3836
     */
3837
    public static function min($arg)
3838
    {
3839 2
        if (\is_array($arg) === true) {
3840 2
            $arg = \implode('', $arg);
3841
        }
3842
3843 2
        $codepoints = self::codepoints($arg, false);
3844 2
        if (\count($codepoints) === 0) {
3845 2
            return null;
3846
        }
3847
3848 2
        $codepoint_min = \min($codepoints);
3849
3850 2
        return self::chr($codepoint_min);
3851
    }
3852
3853
    /**
3854
     * alias for "UTF8::normalize_encoding()"
3855
     *
3856
     * @see        UTF8::normalize_encoding()
3857
     *
3858
     * @param mixed $encoding
3859
     * @param mixed $fallback
3860
     *
3861
     * @return mixed
3862
     *
3863
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3864
     */
3865
    public static function normalizeEncoding($encoding, $fallback = '')
3866
    {
3867 2
        return self::normalize_encoding($encoding, $fallback);
3868
    }
3869
3870
    /**
3871
     * Normalize the encoding-"name" input.
3872
     *
3873
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3874
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3875
     *
3876
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3877
     */
3878
    public static function normalize_encoding($encoding, $fallback = '')
3879
    {
3880 354
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3881
3882
        // init
3883 354
        $encoding = (string) $encoding;
3884
3885
        if (
3886 354
            !$encoding
3887
            ||
3888 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3889
            ||
3890 354
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3891
        ) {
3892 309
            return $fallback;
3893
        }
3894
3895
        if (
3896 49
            $encoding === 'UTF-8'
3897
            ||
3898 49
            $encoding === 'UTF8'
3899
        ) {
3900 22
            return 'UTF-8';
3901
        }
3902
3903
        if (
3904 42
            $encoding === '8BIT'
3905
            ||
3906 42
            $encoding === 'BINARY'
3907
        ) {
3908
            return 'CP850';
3909
        }
3910
3911
        if (
3912 42
            $encoding === 'HTML'
3913
            ||
3914 42
            $encoding === 'HTML-ENTITIES'
3915
        ) {
3916 2
            return 'HTML-ENTITIES';
3917
        }
3918
3919 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3920 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3921
        }
3922
3923 6
        if (self::$ENCODINGS === null) {
3924 1
            self::$ENCODINGS = self::getData('encodings');
3925
        }
3926
3927 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
3928 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3929
3930 4
            return $encoding;
3931
        }
3932
3933 5
        $encodingOrig = $encoding;
3934 5
        $encoding = \strtoupper($encoding);
3935 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3936
3937
        $equivalences = [
3938 5
            'ISO8859'     => 'ISO-8859-1',
3939
            'ISO88591'    => 'ISO-8859-1',
3940
            'ISO'         => 'ISO-8859-1',
3941
            'LATIN'       => 'ISO-8859-1',
3942
            'LATIN1'      => 'ISO-8859-1', // Western European
3943
            'ISO88592'    => 'ISO-8859-2',
3944
            'LATIN2'      => 'ISO-8859-2', // Central European
3945
            'ISO88593'    => 'ISO-8859-3',
3946
            'LATIN3'      => 'ISO-8859-3', // Southern European
3947
            'ISO88594'    => 'ISO-8859-4',
3948
            'LATIN4'      => 'ISO-8859-4', // Northern European
3949
            'ISO88595'    => 'ISO-8859-5',
3950
            'ISO88596'    => 'ISO-8859-6', // Greek
3951
            'ISO88597'    => 'ISO-8859-7',
3952
            'ISO88598'    => 'ISO-8859-8', // Hebrew
3953
            'ISO88599'    => 'ISO-8859-9',
3954
            'LATIN5'      => 'ISO-8859-9', // Turkish
3955
            'ISO885911'   => 'ISO-8859-11',
3956
            'TIS620'      => 'ISO-8859-11', // Thai
3957
            'ISO885910'   => 'ISO-8859-10',
3958
            'LATIN6'      => 'ISO-8859-10', // Nordic
3959
            'ISO885913'   => 'ISO-8859-13',
3960
            'LATIN7'      => 'ISO-8859-13', // Baltic
3961
            'ISO885914'   => 'ISO-8859-14',
3962
            'LATIN8'      => 'ISO-8859-14', // Celtic
3963
            'ISO885915'   => 'ISO-8859-15',
3964
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3965
            'ISO885916'   => 'ISO-8859-16',
3966
            'LATIN10'     => 'ISO-8859-16', // Southeast European
3967
            'CP1250'      => 'WINDOWS-1250',
3968
            'WIN1250'     => 'WINDOWS-1250',
3969
            'WINDOWS1250' => 'WINDOWS-1250',
3970
            'CP1251'      => 'WINDOWS-1251',
3971
            'WIN1251'     => 'WINDOWS-1251',
3972
            'WINDOWS1251' => 'WINDOWS-1251',
3973
            'CP1252'      => 'WINDOWS-1252',
3974
            'WIN1252'     => 'WINDOWS-1252',
3975
            'WINDOWS1252' => 'WINDOWS-1252',
3976
            'CP1253'      => 'WINDOWS-1253',
3977
            'WIN1253'     => 'WINDOWS-1253',
3978
            'WINDOWS1253' => 'WINDOWS-1253',
3979
            'CP1254'      => 'WINDOWS-1254',
3980
            'WIN1254'     => 'WINDOWS-1254',
3981
            'WINDOWS1254' => 'WINDOWS-1254',
3982
            'CP1255'      => 'WINDOWS-1255',
3983
            'WIN1255'     => 'WINDOWS-1255',
3984
            'WINDOWS1255' => 'WINDOWS-1255',
3985
            'CP1256'      => 'WINDOWS-1256',
3986
            'WIN1256'     => 'WINDOWS-1256',
3987
            'WINDOWS1256' => 'WINDOWS-1256',
3988
            'CP1257'      => 'WINDOWS-1257',
3989
            'WIN1257'     => 'WINDOWS-1257',
3990
            'WINDOWS1257' => 'WINDOWS-1257',
3991
            'CP1258'      => 'WINDOWS-1258',
3992
            'WIN1258'     => 'WINDOWS-1258',
3993
            'WINDOWS1258' => 'WINDOWS-1258',
3994
            'UTF16'       => 'UTF-16',
3995
            'UTF32'       => 'UTF-32',
3996
            'UTF8'        => 'UTF-8',
3997
            'UTF'         => 'UTF-8',
3998
            'UTF7'        => 'UTF-7',
3999
            '8BIT'        => 'CP850',
4000
            'BINARY'      => 'CP850',
4001
        ];
4002
4003 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4004 4
            $encoding = $equivalences[$encodingUpperHelper];
4005
        }
4006
4007 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4008
4009 5
        return $encoding;
4010
    }
4011
4012
    /**
4013
     * Standardize line ending to unix-like.
4014
     *
4015
     * @param string $str
4016
     *
4017
     * @return string
4018
     */
4019
    public static function normalize_line_ending(string $str): string
4020
    {
4021 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
4022
    }
4023
4024
    /**
4025
     * Normalize some MS Word special characters.
4026
     *
4027
     * @param string $str <p>The string to be normalized.</p>
4028
     *
4029
     * @return string
4030
     */
4031
    public static function normalize_msword(string $str): string
4032
    {
4033 38
        if ($str === '') {
4034 2
            return '';
4035
        }
4036
4037 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
4038 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4039
4040 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4041 1
            if (self::$UTF8_MSWORD === null) {
4042 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4043
            }
4044
4045 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4046 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4047
        }
4048
4049 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4050
    }
4051
4052
    /**
4053
     * Normalize the whitespace.
4054
     *
4055
     * @param string $str                     <p>The string to be normalized.</p>
4056
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4057
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4058
     *                                        bidirectional text chars.</p>
4059
     *
4060
     * @return string
4061
     */
4062
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4063
    {
4064 86
        if ($str === '') {
4065 9
            return '';
4066
        }
4067
4068 86
        static $WHITESPACE_CACHE = [];
4069 86
        $cacheKey = (int) $keepNonBreakingSpace;
4070
4071 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4072 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4073
4074 2
            if ($keepNonBreakingSpace === true) {
4075 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4076
            }
4077
4078 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4079
        }
4080
4081 86
        if ($keepBidiUnicodeControls === false) {
4082 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4083
4084 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4085 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4086
            }
4087
4088 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4089
        }
4090
4091 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4092
    }
4093
4094
    /**
4095
     * Calculates Unicode code point of the given UTF-8 encoded character.
4096
     *
4097
     * INFO: opposite to UTF8::chr()
4098
     *
4099
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4100
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4101
     *
4102
     * @return int
4103
     *             Unicode code point of the given character,<br>
4104
     *             0 on invalid UTF-8 byte sequence
4105
     */
4106
    public static function ord($chr, string $encoding = 'UTF-8'): int
4107
    {
4108
        // init
4109 30
        $chr = (string) $chr;
4110
4111 30
        static $CHAR_CACHE = [];
4112
4113
        // save the original string
4114 30
        $chr_orig = $chr;
4115
4116 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4117 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4118
        }
4119
4120 30
        $cacheKey = $chr_orig . $encoding;
4121 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4122 23
            return $CHAR_CACHE[$cacheKey];
4123
        }
4124
4125 25
        if (self::$ORD === null) {
4126
            self::$ORD = self::getData('ord');
4127
        }
4128
4129 25
        if (isset(self::$ORD[$chr])) {
4130 25
            return self::$ORD[$chr];
4131
        }
4132
4133
        // check again, if it's still not UTF-8
4134 7
        if ($encoding !== 'UTF-8') {
4135 1
            $chr = self::encode($encoding, $chr);
4136
        }
4137
4138 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4139
            self::checkForSupport();
4140
        }
4141
4142 7
        if (self::$SUPPORT['intlChar'] === true) {
4143
            /** @noinspection PhpComposerExtensionStubsInspection */
4144 7
            $code = \IntlChar::ord($chr);
4145 7
            if ($code) {
4146 6
                return $CHAR_CACHE[$cacheKey] = $code;
4147
            }
4148
        }
4149
4150
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4151 1
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4152 1
        $code = $chr ? $chr[1] : 0;
4153
4154 1
        if ($code >= 0xF0 && isset($chr[4])) {
4155
            /** @noinspection UnnecessaryCastingInspection */
4156
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4157
        }
4158
4159 1
        if ($code >= 0xE0 && isset($chr[3])) {
4160
            /** @noinspection UnnecessaryCastingInspection */
4161
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4162
        }
4163
4164 1
        if ($code >= 0xC0 && isset($chr[2])) {
4165
            /** @noinspection UnnecessaryCastingInspection */
4166
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4167
        }
4168
4169 1
        return $CHAR_CACHE[$cacheKey] = $code;
4170
    }
4171
4172
    /**
4173
     * Parses the string into an array (into the the second parameter).
4174
     *
4175
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4176
     *          if the second parameter is not set!
4177
     *
4178
     * @see http://php.net/manual/en/function.parse-str.php
4179
     *
4180
     * @param string $str       <p>The input string.</p>
4181
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4182
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4183
     *
4184
     * @return bool
4185
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4186
     */
4187
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4188
    {
4189 2
        if ($cleanUtf8 === true) {
4190 2
            $str = self::clean($str);
4191
        }
4192
4193 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4194
            self::checkForSupport();
4195
        }
4196
4197 2
        if (self::$SUPPORT['mbstring'] === true) {
4198 2
            $return = \mb_parse_str($str, $result);
4199
4200 2
            return $return !== false && !empty($result);
4201
        }
4202
4203
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4204
        \parse_str($str, $result);
4205
4206
        return !empty($result);
4207
    }
4208
4209
    /**
4210
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4211
     *
4212
     * @return bool
4213
     *              <strong>true</strong> if support is available,<br>
4214
     *              <strong>false</strong> otherwise
4215
     */
4216
    public static function pcre_utf8_support(): bool
4217
    {
4218
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4219 102
        return (bool) @\preg_match('//u', '');
4220
    }
4221
4222
    /**
4223
     * Create an array containing a range of UTF-8 characters.
4224
     *
4225
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4226
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4227
     *
4228
     * @return string[]
4229
     */
4230
    public static function range($var1, $var2): array
4231
    {
4232 2
        if (!$var1 || !$var2) {
4233 2
            return [];
4234
        }
4235
4236 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4237
            self::checkForSupport();
4238
        }
4239
4240 2
        if (self::$SUPPORT['ctype'] === false) {
4241
            throw new \RuntimeException('ext-ctype: is not installed');
4242
        }
4243
4244
        /** @noinspection PhpComposerExtensionStubsInspection */
4245 2
        if (\ctype_digit((string) $var1)) {
4246 2
            $start = (int) $var1;
4247 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4248
            $start = (int) self::hex_to_int($var1);
4249
        } else {
4250 2
            $start = self::ord($var1);
4251
        }
4252
4253 2
        if (!$start) {
4254
            return [];
4255
        }
4256
4257
        /** @noinspection PhpComposerExtensionStubsInspection */
4258 2
        if (\ctype_digit((string) $var2)) {
4259 2
            $end = (int) $var2;
4260 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4261
            $end = (int) self::hex_to_int($var2);
4262
        } else {
4263 2
            $end = self::ord($var2);
4264
        }
4265
4266 2
        if (!$end) {
4267
            return [];
4268
        }
4269
4270 2
        return \array_map(
4271
            static function (int $i): string {
4272 2
                return (string) self::chr($i);
4273 2
            },
4274 2
            \range($start, $end)
4275
        );
4276
    }
4277
4278
    /**
4279
     * Multi decode html entity & fix urlencoded-win1252-chars.
4280
     *
4281
     * e.g:
4282
     * 'test+test'                     => 'test+test'
4283
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4284
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4285
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4286
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4287
     * 'Düsseldorf'                   => 'Düsseldorf'
4288
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4289
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4290
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4291
     *
4292
     * @param string $str          <p>The input string.</p>
4293
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4294
     *
4295
     * @return string
4296
     */
4297
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4298
    {
4299 3
        if ($str === '') {
4300 2
            return '';
4301
        }
4302
4303 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4304 3
        if (\preg_match($pattern, $str)) {
4305 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4306
        }
4307
4308 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4309
4310 3
        if ($multi_decode === true) {
4311
            do {
4312 3
                $str_compare = $str;
4313
4314
                /**
4315
                 * @psalm-suppress PossiblyInvalidArgument
4316
                 */
4317 3
                $str = self::fix_simple_utf8(
4318 3
                    \rawurldecode(
4319 3
                        self::html_entity_decode(
4320 3
                            self::to_utf8($str),
4321 3
                            $flags
4322
                        )
4323
                    )
4324
                );
4325 3
            } while ($str_compare !== $str);
4326
        }
4327
4328 3
        return $str;
4329
    }
4330
4331
    /**
4332
     * Replaces all occurrences of $pattern in $str by $replacement.
4333
     *
4334
     * @param string $str         <p>The input string.</p>
4335
     * @param string $pattern     <p>The regular expression pattern.</p>
4336
     * @param string $replacement <p>The string to replace with.</p>
4337
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4338
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4339
     *
4340
     * @return string
4341
     */
4342
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4343
    {
4344 259
        if ($options === 'msr') {
4345 9
            $options = 'ms';
4346
        }
4347
4348
        // fallback
4349 259
        if (!$delimiter) {
4350
            $delimiter = '/';
4351
        }
4352
4353 259
        return (string) \preg_replace(
4354 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4355 259
            $replacement,
4356 259
            $str
4357
        );
4358
    }
4359
4360
    /**
4361
     * alias for "UTF8::remove_bom()"
4362
     *
4363
     * @see        UTF8::remove_bom()
4364
     *
4365
     * @param string $str
4366
     *
4367
     * @return string
4368
     *
4369
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4370
     */
4371
    public static function removeBOM(string $str): string
4372
    {
4373
        return self::remove_bom($str);
4374
    }
4375
4376
    /**
4377
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4378
     *
4379
     * @param string $str <p>The input string.</p>
4380
     *
4381
     * @return string string without UTF-BOM
4382
     */
4383
    public static function remove_bom(string $str): string
4384
    {
4385 79
        if ($str === '') {
4386 7
            return '';
4387
        }
4388
4389 79
        $strLength = self::strlen_in_byte($str);
4390 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4391 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4392 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4393 10
                if ($strTmp === false) {
4394
                    return '';
4395
                }
4396
4397 10
                $strLength -= (int) $bomByteLength;
4398
4399 79
                $str = (string) $strTmp;
4400
            }
4401
        }
4402
4403 79
        return $str;
4404
    }
4405
4406
    /**
4407
     * Removes duplicate occurrences of a string in another string.
4408
     *
4409
     * @param string          $str  <p>The base string.</p>
4410
     * @param string|string[] $what <p>String to search for in the base string.</p>
4411
     *
4412
     * @return string the result string with removed duplicates
4413
     */
4414
    public static function remove_duplicates(string $str, $what = ' '): string
4415
    {
4416 2
        if (\is_string($what) === true) {
4417 2
            $what = [$what];
4418
        }
4419
4420 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4421
            /** @noinspection ForeachSourceInspection */
4422 2
            foreach ($what as $item) {
4423 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4424
            }
4425
        }
4426
4427 2
        return $str;
4428
    }
4429
4430
    /**
4431
     * Remove html via "strip_tags()" from the string.
4432
     *
4433
     * @param string $str
4434
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4435
     *                              not be stripped. Default: null
4436
     *                              </p>
4437
     *
4438
     * @return string
4439
     */
4440
    public static function remove_html(string $str, string $allowableTags = ''): string
4441
    {
4442 6
        return \strip_tags($str, $allowableTags);
4443
    }
4444
4445
    /**
4446
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4447
     *
4448
     * @param string $str
4449
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4450
     *
4451
     * @return string
4452
     */
4453
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4454
    {
4455 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4456
    }
4457
4458
    /**
4459
     * Remove invisible characters from a string.
4460
     *
4461
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4462
     *
4463
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4464
     *
4465
     * @param string $str
4466
     * @param bool   $url_encoded
4467
     * @param string $replacement
4468
     *
4469
     * @return string
4470
     */
4471
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4472
    {
4473
        // init
4474 113
        $non_displayables = [];
4475
4476
        // every control character except newline (dec 10),
4477
        // carriage return (dec 13) and horizontal tab (dec 09)
4478 113
        if ($url_encoded) {
4479 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4480 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4481
        }
4482
4483 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4484
4485
        do {
4486 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4487 113
        } while ($count !== 0);
4488
4489 113
        return $str;
4490
    }
4491
4492
    /**
4493
     * Returns a new string with the prefix $substring removed, if present.
4494
     *
4495
     * @param string $str
4496
     * @param string $substring <p>The prefix to remove.</p>
4497
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4498
     *
4499
     * @return string string without the prefix $substring
4500
     */
4501
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4502
    {
4503 12
        if (self::str_starts_with($str, $substring)) {
4504 6
            return (string) self::substr(
4505 6
                $str,
4506 6
                (int) self::strlen($substring, $encoding),
4507 6
                null,
4508 6
                $encoding
4509
            );
4510
        }
4511
4512 6
        return $str;
4513
    }
4514
4515
    /**
4516
     * Returns a new string with the suffix $substring removed, if present.
4517
     *
4518
     * @param string $str
4519
     * @param string $substring <p>The suffix to remove.</p>
4520
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4521
     *
4522
     * @return string string having a $str without the suffix $substring
4523
     */
4524
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4525
    {
4526 12
        if (self::str_ends_with($str, $substring)) {
4527 6
            return (string) self::substr(
4528 6
                $str,
4529 6
                0,
4530 6
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding)
4531
            );
4532
        }
4533
4534 6
        return $str;
4535
    }
4536
4537
    /**
4538
     * Replaces all occurrences of $search in $str by $replacement.
4539
     *
4540
     * @param string $str           <p>The input string.</p>
4541
     * @param string $search        <p>The needle to search for.</p>
4542
     * @param string $replacement   <p>The string to replace with.</p>
4543
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4544
     *
4545
     * @return string string after the replacements
4546
     */
4547
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4548
    {
4549 29
        if ($caseSensitive) {
4550 22
            return self::str_replace($search, $replacement, $str);
4551
        }
4552
4553 7
        return self::str_ireplace($search, $replacement, $str);
4554
    }
4555
4556
    /**
4557
     * Replaces all occurrences of $search in $str by $replacement.
4558
     *
4559
     * @param string       $str           <p>The input string.</p>
4560
     * @param array        $search        <p>The elements to search for.</p>
4561
     * @param array|string $replacement   <p>The string to replace with.</p>
4562
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4563
     *
4564
     * @return string string after the replacements
4565
     */
4566
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4567
    {
4568 30
        if ($caseSensitive) {
4569 23
            return self::str_replace($search, $replacement, $str);
4570
        }
4571
4572 7
        return self::str_ireplace($search, $replacement, $str);
4573
    }
4574
4575
    /**
4576
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4577
     *
4578
     * @param string $str                <p>The input string</p>
4579
     * @param string $replacementChar    <p>The replacement character.</p>
4580
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4581
     *
4582
     * @return string
4583
     */
4584
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4585
    {
4586 62
        if ($str === '') {
4587 9
            return '';
4588
        }
4589
4590 62
        if ($processInvalidUtf8 === true) {
4591 62
            $replacementCharHelper = $replacementChar;
4592 62
            if ($replacementChar === '') {
4593 62
                $replacementCharHelper = 'none';
4594
            }
4595
4596 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4597
                self::checkForSupport();
4598
            }
4599
4600 62
            if (self::$SUPPORT['mbstring'] === false) {
4601
                // if there is no native support for "mbstring",
4602
                // then we need to clean the string before ...
4603
                $str = self::clean($str);
4604
            }
4605
4606
            // always fallback via symfony polyfill
4607 62
            $save = \mb_substitute_character();
4608 62
            \mb_substitute_character($replacementCharHelper);
4609 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4610 62
            \mb_substitute_character($save);
4611
4612
            // the polyfill maybe return false
4613
            /** @psalm-suppress RedundantCondition */
4614 62
            $str = \is_string($strTmp) ? $strTmp : '';
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4615
        }
4616
4617 62
        return \str_replace(
4618
            [
4619 62
                "\xEF\xBF\xBD",
4620
                '�',
4621
            ],
4622
            [
4623 62
                $replacementChar,
4624 62
                $replacementChar,
4625
            ],
4626 62
            $str
4627
        );
4628
    }
4629
4630
    /**
4631
     * Strip whitespace or other characters from end of a UTF-8 string.
4632
     *
4633
     * @param string $str   <p>The string to be trimmed.</p>
4634
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4635
     *
4636
     * @return string the string with unwanted characters stripped from the right
4637
     */
4638
    public static function rtrim(string $str = '', $chars = \INF): string
4639
    {
4640 22
        if ($str === '') {
4641 3
            return '';
4642
        }
4643
4644
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4645 21
        if ($chars === \INF || !$chars) {
4646 16
            $pattern = "[\pZ\pC]+\$";
4647
        } else {
4648 8
            $chars = \preg_quote($chars, '/');
4649 8
            $pattern = "[${chars}]+\$";
4650
        }
4651
4652 21
        return self::regex_replace($str, $pattern, '', '', '/');
4653
    }
4654
4655
    /**
4656
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4657
     */
4658
    public static function showSupport()
4659
    {
4660 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4661
            self::checkForSupport();
4662
        }
4663
4664 2
        echo '<pre>';
4665 2
        foreach (self::$SUPPORT as $key => &$value) {
4666 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4667
        }
4668 2
        unset($value);
4669 2
        echo '</pre>';
4670 2
    }
4671
4672
    /**
4673
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4674
     *
4675
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4676
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4677
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4678
     *
4679
     * @return string the HTML numbered entity
4680
     */
4681
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4682
    {
4683 2
        if ($char === '') {
4684 2
            return '';
4685
        }
4686
4687
        if (
4688 2
            $keepAsciiChars === true
4689
            &&
4690 2
            self::is_ascii($char) === true
4691
        ) {
4692 2
            return $char;
4693
        }
4694
4695 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4696 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4697
        }
4698
4699 2
        return '&#' . self::ord($char, $encoding) . ';';
4700
    }
4701
4702
    /**
4703
     * @param string $str
4704
     * @param int    $tabLength
4705
     *
4706
     * @return string
4707
     */
4708
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4709
    {
4710 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4711
    }
4712
4713
    /**
4714
     * Convert a string to an array of Unicode characters.
4715
     *
4716
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4717
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4718
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4719
     *
4720
     * @return array
4721
     *               <p>An array containing chunks of the input.</p>
4722
     */
4723
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4724
    {
4725 83
        if ($length <= 0) {
4726 3
            return [];
4727
        }
4728
4729 82
        if (\is_array($str) === true) {
4730 2
            foreach ($str as $k => &$v) {
4731 2
                $v = self::split($v, $length);
4732
            }
4733
4734 2
            return $str;
4735
        }
4736
4737
        // init
4738 82
        $str = (string) $str;
4739
4740 82
        if ($str === '') {
4741 13
            return [];
4742
        }
4743
4744
        // init
4745 79
        $ret = [];
4746
4747 79
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4748
            self::checkForSupport();
4749
        }
4750
4751 79
        if ($cleanUtf8 === true) {
4752 19
            $str = self::clean($str);
4753
        }
4754
4755 79
        if (self::$SUPPORT['pcre_utf8'] === true) {
4756 79
            \preg_match_all('/./us', $str, $retArray);
4757 79
            if (isset($retArray[0])) {
4758 79
                $ret = $retArray[0];
4759
            }
4760 79
            unset($retArray);
4761
        } else {
4762
4763
            // fallback
4764
4765
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4766
                self::checkForSupport();
4767
            }
4768
4769
            $len = self::strlen_in_byte($str);
4770
4771
            /** @noinspection ForeachInvariantsInspection */
4772
            for ($i = 0; $i < $len; ++$i) {
4773
                if (($str[$i] & "\x80") === "\x00") {
4774
                    $ret[] = $str[$i];
4775
                } elseif (
4776
                    isset($str[$i + 1])
4777
                    &&
4778
                    ($str[$i] & "\xE0") === "\xC0"
4779
                ) {
4780
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4781
                        $ret[] = $str[$i] . $str[$i + 1];
4782
4783
                        ++$i;
4784
                    }
4785
                } elseif (
4786
                    isset($str[$i + 2])
4787
                    &&
4788
                    ($str[$i] & "\xF0") === "\xE0"
4789
                ) {
4790
                    if (
4791
                        ($str[$i + 1] & "\xC0") === "\x80"
4792
                        &&
4793
                        ($str[$i + 2] & "\xC0") === "\x80"
4794
                    ) {
4795
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4796
4797
                        $i += 2;
4798
                    }
4799
                } elseif (
4800
                    isset($str[$i + 3])
4801
                    &&
4802
                    ($str[$i] & "\xF8") === "\xF0"
4803
                ) {
4804
                    if (
4805
                        ($str[$i + 1] & "\xC0") === "\x80"
4806
                        &&
4807
                        ($str[$i + 2] & "\xC0") === "\x80"
4808
                        &&
4809
                        ($str[$i + 3] & "\xC0") === "\x80"
4810
                    ) {
4811
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4812
4813
                        $i += 3;
4814
                    }
4815
                }
4816
            }
4817
        }
4818
4819 79
        if ($length > 1) {
4820 11
            $ret = \array_chunk($ret, $length);
4821
4822 11
            return \array_map(
4823
                static function (array $item): string {
4824 11
                    return \implode('', $item);
4825 11
                },
4826 11
                $ret
4827
            );
4828
        }
4829
4830 72
        if (isset($ret[0]) && $ret[0] === '') {
4831
            return [];
4832
        }
4833
4834 72
        return $ret;
4835
    }
4836
4837
    /**
4838
     * Returns a camelCase version of the string. Trims surrounding spaces,
4839
     * capitalizes letters following digits, spaces, dashes and underscores,
4840
     * and removes spaces, dashes, as well as underscores.
4841
     *
4842
     * @param string      $str                   <p>The input string.</p>
4843
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
4844
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4845
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4846
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4847
     *
4848
     * @return string
4849
     */
4850
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
4851
    {
4852 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4853 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4854
4855 32
        $str = (string) \preg_replace_callback(
4856 32
            '/[-_\s]+(.)?/u',
4857
            /**
4858
             * @param array $match
4859
             *
4860
             * @return string
4861
             */
4862
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4863 27
                if (isset($match[1])) {
4864 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4865
                }
4866
4867 1
                return '';
4868 32
            },
4869 32
            $str
4870
        );
4871
4872 32
        return (string) \preg_replace_callback(
4873 32
            '/[\d]+(.)?/u',
4874
            /**
4875
             * @param array $match
4876
             *
4877
             * @return string
4878
             */
4879
            static function (array $match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4880 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4881 32
            },
4882 32
            $str
4883
        );
4884
    }
4885
4886
    /**
4887
     * Returns the string with the first letter of each word capitalized,
4888
     * except for when the word is a name which shouldn't be capitalized.
4889
     *
4890
     * @param string $str
4891
     *
4892
     * @return string string with $str capitalized
4893
     */
4894
    public static function str_capitalize_name(string $str): string
4895
    {
4896 1
        $str = self::collapse_whitespace($str);
4897
4898 1
        $str = self::str_capitalize_name_helper($str, ' ');
4899
4900 1
        return self::str_capitalize_name_helper($str, '-');
4901
    }
4902
4903
    /**
4904
     * Returns true if the string contains $needle, false otherwise. By default
4905
     * the comparison is case-sensitive, but can be made insensitive by setting
4906
     * $caseSensitive to false.
4907
     *
4908
     * @param string $haystack      <p>The input string.</p>
4909
     * @param string $needle        <p>Substring to look for.</p>
4910
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4911
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4912
     *
4913
     * @return bool whether or not $haystack contains $needle
4914
     */
4915
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4916
    {
4917 106
        if ($haystack === '' || $needle === '') {
4918 1
            return false;
4919
        }
4920
4921
        // only a fallback to prevent BC in the api ...
4922
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4923 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4924 2
            $encoding = (string) $caseSensitive;
4925
        }
4926
4927 105
        if ($caseSensitive) {
4928 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
4929
        }
4930
4931 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
4932
    }
4933
4934
    /**
4935
     * Returns true if the string contains all $needles, false otherwise. By
4936
     * default the comparison is case-sensitive, but can be made insensitive by
4937
     * setting $caseSensitive to false.
4938
     *
4939
     * @param string $haystack      <p>The input string.</p>
4940
     * @param array  $needles       <p>SubStrings to look for.</p>
4941
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4942
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4943
     *
4944
     * @return bool whether or not $haystack contains $needle
4945
     */
4946
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4947
    {
4948 44
        if ($haystack === '') {
4949
            return false;
4950
        }
4951
4952 44
        if (empty($needles)) {
4953 1
            return false;
4954
        }
4955
4956
        // only a fallback to prevent BC in the api ...
4957
        /** @psalm-suppress RedundantConditionGivenDocblockType */
4958 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4959 1
            $encoding = (string) $caseSensitive;
4960
        }
4961
4962 43
        foreach ($needles as &$needle) {
4963 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4964 43
                return false;
4965
            }
4966
        }
4967
4968 24
        return true;
4969
    }
4970
4971
    /**
4972
     * Returns true if the string contains any $needles, false otherwise. By
4973
     * default the comparison is case-sensitive, but can be made insensitive by
4974
     * setting $caseSensitive to false.
4975
     *
4976
     * @param string $haystack      <p>The input string.</p>
4977
     * @param array  $needles       <p>SubStrings to look for.</p>
4978
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4979
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4980
     *
4981
     * @return bool
4982
     *              Whether or not $str contains $needle
4983
     */
4984
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4985
    {
4986 43
        if (empty($needles)) {
4987 1
            return false;
4988
        }
4989
4990 42
        foreach ($needles as &$needle) {
4991 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4992 42
                return true;
4993
            }
4994
        }
4995
4996 18
        return false;
4997
    }
4998
4999
    /**
5000
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5001
     * inserted before uppercase characters (with the exception of the first
5002
     * character of the string), and in place of spaces as well as underscores.
5003
     *
5004
     * @param string $str      <p>The input string.</p>
5005
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5006
     *
5007
     * @return string
5008
     */
5009
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5010
    {
5011 19
        return self::str_delimit($str, '-', $encoding);
5012
    }
5013
5014
    /**
5015
     * Returns a lowercase and trimmed string separated by the given delimiter.
5016
     * Delimiters are inserted before uppercase characters (with the exception
5017
     * of the first character of the string), and in place of spaces, dashes,
5018
     * and underscores. Alpha delimiters are not converted to lowercase.
5019
     *
5020
     * @param string      $str                   <p>The input string.</p>
5021
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5022
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5023
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5024
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5025
     *                                           tr</p>
5026
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5027
     *                                           ß</p>
5028
     *
5029
     * @return string
5030
     */
5031
    public static function str_delimit(
5032
        string $str,
5033
        string $delimiter,
5034
        string $encoding = 'UTF-8',
5035
        bool $cleanUtf8 = false,
5036
        string $lang = null,
5037
        bool $tryToKeepStringLength = false
5038
    ): string {
5039 49
        $str = self::trim($str);
5040
5041 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
5042
5043 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5044
5045 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5046
    }
5047
5048
    /**
5049
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5050
     *
5051
     * @param string $str <p>The input string.</p>
5052
     *
5053
     * @return false|string
5054
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5055
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5056
     */
5057
    public static function str_detect_encoding($str)
5058
    {
5059
        // init
5060 30
        $str = (string) $str;
5061
5062
        //
5063
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5064
        //
5065
5066 30
        if (self::is_binary($str, true) === true) {
5067 10
            $isUtf16 = self::is_utf16($str, false);
5068 10
            if ($isUtf16 === 1) {
5069 2
                return 'UTF-16LE';
5070
            }
5071 10
            if ($isUtf16 === 2) {
5072 2
                return 'UTF-16BE';
5073
            }
5074
5075 8
            $isUtf32 = self::is_utf32($str, false);
5076 8
            if ($isUtf32 === 1) {
5077
                return 'UTF-32LE';
5078
            }
5079 8
            if ($isUtf32 === 2) {
5080
                return 'UTF-32BE';
5081
            }
5082
5083
            // is binary but not "UTF-16" or "UTF-32"
5084 8
            return false;
5085
        }
5086
5087
        //
5088
        // 2.) simple check for ASCII chars
5089
        //
5090
5091 26
        if (self::is_ascii($str) === true) {
5092 9
            return 'ASCII';
5093
        }
5094
5095
        //
5096
        // 3.) simple check for UTF-8 chars
5097
        //
5098
5099 26
        if (self::is_utf8($str) === true) {
5100 18
            return 'UTF-8';
5101
        }
5102
5103
        //
5104
        // 4.) check via "mb_detect_encoding()"
5105
        //
5106
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5107
5108
        $detectOrder = [
5109 16
            'ISO-8859-1',
5110
            'ISO-8859-2',
5111
            'ISO-8859-3',
5112
            'ISO-8859-4',
5113
            'ISO-8859-5',
5114
            'ISO-8859-6',
5115
            'ISO-8859-7',
5116
            'ISO-8859-8',
5117
            'ISO-8859-9',
5118
            'ISO-8859-10',
5119
            'ISO-8859-13',
5120
            'ISO-8859-14',
5121
            'ISO-8859-15',
5122
            'ISO-8859-16',
5123
            'WINDOWS-1251',
5124
            'WINDOWS-1252',
5125
            'WINDOWS-1254',
5126
            'CP932',
5127
            'CP936',
5128
            'CP950',
5129
            'CP866',
5130
            'CP850',
5131
            'CP51932',
5132
            'CP50220',
5133
            'CP50221',
5134
            'CP50222',
5135
            'ISO-2022-JP',
5136
            'ISO-2022-KR',
5137
            'JIS',
5138
            'JIS-ms',
5139
            'EUC-CN',
5140
            'EUC-JP',
5141
        ];
5142
5143 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5144
            self::checkForSupport();
5145
        }
5146
5147 16
        if (self::$SUPPORT['mbstring'] === true) {
5148
            // info: do not use the symfony polyfill here
5149 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5150 16
            if ($encoding) {
5151 16
                return $encoding;
5152
            }
5153
        }
5154
5155
        //
5156
        // 5.) check via "iconv()"
5157
        //
5158
5159
        if (self::$ENCODINGS === null) {
5160
            self::$ENCODINGS = self::getData('encodings');
5161
        }
5162
5163
        foreach (self::$ENCODINGS as $encodingTmp) {
5164
            // INFO: //IGNORE but still throw notice
5165
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5166
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5167
                return $encodingTmp;
5168
            }
5169
        }
5170
5171
        return false;
5172
    }
5173
5174
    /**
5175
     * Check if the string ends with the given substring.
5176
     *
5177
     * @param string $haystack <p>The string to search in.</p>
5178
     * @param string $needle   <p>The substring to search for.</p>
5179
     *
5180
     * @return bool
5181
     */
5182
    public static function str_ends_with(string $haystack, string $needle): bool
5183
    {
5184 40
        if ($haystack === '' || $needle === '') {
5185 4
            return false;
5186
        }
5187
5188 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5189
    }
5190
5191
    /**
5192
     * Returns true if the string ends with any of $substrings, false otherwise.
5193
     *
5194
     * - case-sensitive
5195
     *
5196
     * @param string   $str        <p>The input string.</p>
5197
     * @param string[] $substrings <p>Substrings to look for.</p>
5198
     *
5199
     * @return bool whether or not $str ends with $substring
5200
     */
5201
    public static function str_ends_with_any(string $str, array $substrings): bool
5202
    {
5203 7
        if (empty($substrings)) {
5204
            return false;
5205
        }
5206
5207 7
        foreach ($substrings as &$substring) {
5208 7
            if (self::str_ends_with($str, $substring)) {
5209 7
                return true;
5210
            }
5211
        }
5212
5213 6
        return false;
5214
    }
5215
5216
    /**
5217
     * Ensures that the string begins with $substring. If it doesn't, it's
5218
     * prepended.
5219
     *
5220
     * @param string $str       <p>The input string.</p>
5221
     * @param string $substring <p>The substring to add if not present.</p>
5222
     *
5223
     * @return string
5224
     */
5225
    public static function str_ensure_left(string $str, string $substring): string
5226
    {
5227 10
        if (!self::str_starts_with($str, $substring)) {
5228 4
            $str = $substring . $str;
5229
        }
5230
5231 10
        return $str;
5232
    }
5233
5234
    /**
5235
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5236
     *
5237
     * @param string $str       <p>The input string.</p>
5238
     * @param string $substring <p>The substring to add if not present.</p>
5239
     *
5240
     * @return string
5241
     */
5242
    public static function str_ensure_right(string $str, string $substring): string
5243
    {
5244 10
        if (!self::str_ends_with($str, $substring)) {
5245 4
            $str .= $substring;
5246
        }
5247
5248 10
        return $str;
5249
    }
5250
5251
    /**
5252
     * Capitalizes the first word of the string, replaces underscores with
5253
     * spaces, and strips '_id'.
5254
     *
5255
     * @param string $str
5256
     *
5257
     * @return string
5258
     */
5259
    public static function str_humanize($str): string
5260
    {
5261 3
        $str = self::str_replace(
5262
            [
5263 3
                '_id',
5264
                '_',
5265
            ],
5266
            [
5267 3
                '',
5268
                ' ',
5269
            ],
5270 3
            $str
5271
        );
5272
5273 3
        return self::ucfirst(self::trim($str));
5274
    }
5275
5276
    /**
5277
     * Check if the string ends with the given substring, case insensitive.
5278
     *
5279
     * @param string $haystack <p>The string to search in.</p>
5280
     * @param string $needle   <p>The substring to search for.</p>
5281
     *
5282
     * @return bool
5283
     */
5284
    public static function str_iends_with(string $haystack, string $needle): bool
5285
    {
5286 12
        if ($haystack === '' || $needle === '') {
5287 2
            return false;
5288
        }
5289
5290 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5291
    }
5292
5293
    /**
5294
     * Returns true if the string ends with any of $substrings, false otherwise.
5295
     *
5296
     * - case-insensitive
5297
     *
5298
     * @param string   $str        <p>The input string.</p>
5299
     * @param string[] $substrings <p>Substrings to look for.</p>
5300
     *
5301
     * @return bool whether or not $str ends with $substring
5302
     */
5303
    public static function str_iends_with_any(string $str, array $substrings): bool
5304
    {
5305 4
        if (empty($substrings)) {
5306
            return false;
5307
        }
5308
5309 4
        foreach ($substrings as &$substring) {
5310 4
            if (self::str_iends_with($str, $substring)) {
5311 4
                return true;
5312
            }
5313
        }
5314
5315
        return false;
5316
    }
5317
5318
    /**
5319
     * Returns the index of the first occurrence of $needle in the string,
5320
     * and false if not found. Accepts an optional offset from which to begin
5321
     * the search.
5322
     *
5323
     * @param string $str      <p>The input string.</p>
5324
     * @param string $needle   <p>Substring to look for.</p>
5325
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5326
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5327
     *
5328
     * @return false|int
5329
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5330
     */
5331
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5332
    {
5333 2
        return self::stripos(
5334 2
            $str,
5335 2
            $needle,
5336 2
            $offset,
5337 2
            $encoding
5338
        );
5339
    }
5340
5341
    /**
5342
     * Returns the index of the last occurrence of $needle in the string,
5343
     * and false if not found. Accepts an optional offset from which to begin
5344
     * the search. Offsets may be negative to count from the last character
5345
     * in the string.
5346
     *
5347
     * @param string $str      <p>The input string.</p>
5348
     * @param string $needle   <p>Substring to look for.</p>
5349
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5350
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5351
     *
5352
     * @return false|int
5353
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5354
     */
5355
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5356
    {
5357 2
        return self::strripos(
5358 2
            $str,
5359 2
            $needle,
5360 2
            $offset,
5361 2
            $encoding
5362
        );
5363
    }
5364
5365
    /**
5366
     * Returns the index of the first occurrence of $needle in the string,
5367
     * and false if not found. Accepts an optional offset from which to begin
5368
     * the search.
5369
     *
5370
     * @param string $str      <p>The input string.</p>
5371
     * @param string $needle   <p>Substring to look for.</p>
5372
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5373
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5374
     *
5375
     * @return false|int
5376
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5377
     */
5378
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5379
    {
5380 12
        return self::strpos(
5381 12
            $str,
5382 12
            $needle,
5383 12
            $offset,
5384 12
            $encoding
5385
        );
5386
    }
5387
5388
    /**
5389
     * Returns the index of the last occurrence of $needle in the string,
5390
     * and false if not found. Accepts an optional offset from which to begin
5391
     * the search. Offsets may be negative to count from the last character
5392
     * in the string.
5393
     *
5394
     * @param string $str      <p>The input string.</p>
5395
     * @param string $needle   <p>Substring to look for.</p>
5396
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5397
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5398
     *
5399
     * @return false|int
5400
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5401
     */
5402
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5403
    {
5404 12
        return self::strrpos(
5405 12
            $str,
5406 12
            $needle,
5407 12
            $offset,
5408 12
            $encoding
5409
        );
5410
    }
5411
5412
    /**
5413
     * Inserts $substring into the string at the $index provided.
5414
     *
5415
     * @param string $str       <p>The input string.</p>
5416
     * @param string $substring <p>String to be inserted.</p>
5417
     * @param int    $index     <p>The index at which to insert the substring.</p>
5418
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5419
     *
5420
     * @return string
5421
     */
5422
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5423
    {
5424 8
        $len = (int) self::strlen($str, $encoding);
5425
5426 8
        if ($index > $len) {
5427 1
            return $str;
5428
        }
5429
5430 7
        return (string) self::substr($str, 0, $index, $encoding) .
5431 7
               $substring .
5432 7
               (string) self::substr($str, $index, $len, $encoding);
5433
    }
5434
5435
    /**
5436
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5437
     *
5438
     * @see  http://php.net/manual/en/function.str-ireplace.php
5439
     *
5440
     * @param mixed $search  <p>
5441
     *                       Every replacement with search array is
5442
     *                       performed on the result of previous replacement.
5443
     *                       </p>
5444
     * @param mixed $replace <p>
5445
     *                       </p>
5446
     * @param mixed $subject <p>
5447
     *                       If subject is an array, then the search and
5448
     *                       replace is performed with every entry of
5449
     *                       subject, and the return value is an array as
5450
     *                       well.
5451
     *                       </p>
5452
     * @param int   $count   [optional] <p>
5453
     *                       The number of matched and replaced needles will
5454
     *                       be returned in count which is passed by
5455
     *                       reference.
5456
     *                       </p>
5457
     *
5458
     * @return mixed a string or an array of replacements
5459
     */
5460
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5461
    {
5462 29
        $search = (array) $search;
5463
5464
        /** @noinspection AlterInForeachInspection */
5465 29
        foreach ($search as &$s) {
5466 29
            $s = (string) $s;
5467 29
            if ($s === '') {
5468 6
                $s = '/^(?<=.)$/';
5469
            } else {
5470 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5471
            }
5472
        }
5473
5474 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5475 29
        $count = $replace; // used as reference parameter
5476
5477 29
        return $subject;
5478
    }
5479
5480
    /**
5481
     * Replaces $search from the beginning of string with $replacement.
5482
     *
5483
     * @param string $str         <p>The input string.</p>
5484
     * @param string $search      <p>The string to search for.</p>
5485
     * @param string $replacement <p>The replacement.</p>
5486
     *
5487
     * @return string string after the replacements
5488
     */
5489
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5490
    {
5491 17
        if ($str === '') {
5492 4
            if ($replacement === '') {
5493 2
                return '';
5494
            }
5495
5496 2
            if ($search === '') {
5497 2
                return $replacement;
5498
            }
5499
        }
5500
5501 13
        if ($search === '') {
5502 2
            return $str . $replacement;
5503
        }
5504
5505 11
        if (\stripos($str, $search) === 0) {
5506 10
            return $replacement . \substr($str, \strlen($search));
5507
        }
5508
5509 1
        return $str;
5510
    }
5511
5512
    /**
5513
     * Replaces $search from the ending of string with $replacement.
5514
     *
5515
     * @param string $str         <p>The input string.</p>
5516
     * @param string $search      <p>The string to search for.</p>
5517
     * @param string $replacement <p>The replacement.</p>
5518
     *
5519
     * @return string string after the replacements
5520
     */
5521
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5522
    {
5523 17
        if ($str === '') {
5524 4
            if ($replacement === '') {
5525 2
                return '';
5526
            }
5527
5528 2
            if ($search === '') {
5529 2
                return $replacement;
5530
            }
5531
        }
5532
5533 13
        if ($search === '') {
5534 2
            return $str . $replacement;
5535
        }
5536
5537 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5538 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5539
        }
5540
5541 11
        return $str;
5542
    }
5543
5544
    /**
5545
     * Check if the string starts with the given substring, case insensitive.
5546
     *
5547
     * @param string $haystack <p>The string to search in.</p>
5548
     * @param string $needle   <p>The substring to search for.</p>
5549
     *
5550
     * @return bool
5551
     */
5552
    public static function str_istarts_with(string $haystack, string $needle): bool
5553
    {
5554 12
        if ($haystack === '' || $needle === '') {
5555 2
            return false;
5556
        }
5557
5558 12
        return self::stripos($haystack, $needle) === 0;
5559
    }
5560
5561
    /**
5562
     * Returns true if the string begins with any of $substrings, false otherwise.
5563
     *
5564
     * - case-insensitive
5565
     *
5566
     * @param string $str        <p>The input string.</p>
5567
     * @param array  $substrings <p>Substrings to look for.</p>
5568
     *
5569
     * @return bool whether or not $str starts with $substring
5570
     */
5571
    public static function str_istarts_with_any(string $str, array $substrings): bool
5572
    {
5573 4
        if ($str === '') {
5574
            return false;
5575
        }
5576
5577 4
        if (empty($substrings)) {
5578
            return false;
5579
        }
5580
5581 4
        foreach ($substrings as &$substring) {
5582 4
            if (self::str_istarts_with($str, $substring)) {
5583 4
                return true;
5584
            }
5585
        }
5586
5587
        return false;
5588
    }
5589
5590
    /**
5591
     * Gets the substring after the first occurrence of a separator.
5592
     *
5593
     * @param string $str       <p>The input string.</p>
5594
     * @param string $separator <p>The string separator.</p>
5595
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5596
     *
5597
     * @return string
5598
     */
5599
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5600
    {
5601
        if (
5602 1
            $separator === ''
5603
            ||
5604 1
            $str === ''
5605
        ) {
5606 1
            return '';
5607
        }
5608
5609 1
        $offset = self::str_iindex_first($str, $separator);
5610 1
        if ($offset === false) {
5611 1
            return '';
5612
        }
5613
5614 1
        return (string) self::substr(
5615 1
            $str,
5616 1
            $offset + (int) self::strlen($separator, $encoding),
5617 1
            null,
5618 1
            $encoding
5619
        );
5620
    }
5621
5622
    /**
5623
     * Gets the substring after the last occurrence of a separator.
5624
     *
5625
     * @param string $str       <p>The input string.</p>
5626
     * @param string $separator <p>The string separator.</p>
5627
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5628
     *
5629
     * @return string
5630
     */
5631
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5632
    {
5633
        if (
5634 1
            $separator === ''
5635
            ||
5636 1
            $str === ''
5637
        ) {
5638 1
            return '';
5639
        }
5640
5641 1
        $offset = self::str_iindex_last($str, $separator);
5642 1
        if ($offset === false) {
5643 1
            return '';
5644
        }
5645
5646 1
        return (string) self::substr(
5647 1
            $str,
5648 1
            $offset + (int) self::strlen($separator, $encoding),
5649 1
            null,
5650 1
            $encoding
5651
        );
5652
    }
5653
5654
    /**
5655
     * Gets the substring before the first occurrence of a separator.
5656
     *
5657
     * @param string $str       <p>The input string.</p>
5658
     * @param string $separator <p>The string separator.</p>
5659
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5660
     *
5661
     * @return string
5662
     */
5663
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5664
    {
5665
        if (
5666 1
            $separator === ''
5667
            ||
5668 1
            $str === ''
5669
        ) {
5670 1
            return '';
5671
        }
5672
5673 1
        $offset = self::str_iindex_first($str, $separator);
5674 1
        if ($offset === false) {
5675 1
            return '';
5676
        }
5677
5678 1
        return (string) self::substr($str, 0, $offset, $encoding);
5679
    }
5680
5681
    /**
5682
     * Gets the substring before the last occurrence of a separator.
5683
     *
5684
     * @param string $str       <p>The input string.</p>
5685
     * @param string $separator <p>The string separator.</p>
5686
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5687
     *
5688
     * @return string
5689
     */
5690
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5691
    {
5692
        if (
5693 1
            $separator === ''
5694
            ||
5695 1
            $str === ''
5696
        ) {
5697 1
            return '';
5698
        }
5699
5700 1
        $offset = self::str_iindex_last($str, $separator);
5701 1
        if ($offset === false) {
5702 1
            return '';
5703
        }
5704
5705 1
        return (string) self::substr($str, 0, $offset, $encoding);
5706
    }
5707
5708
    /**
5709
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5710
     *
5711
     * @param string $str          <p>The input string.</p>
5712
     * @param string $needle       <p>The string to look for.</p>
5713
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5714
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5715
     *
5716
     * @return string
5717
     */
5718
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5719
    {
5720
        if (
5721 2
            $needle === ''
5722
            ||
5723 2
            $str === ''
5724
        ) {
5725 2
            return '';
5726
        }
5727
5728 2
        $part = self::stristr(
5729 2
            $str,
5730 2
            $needle,
5731 2
            $beforeNeedle,
5732 2
            $encoding
5733
        );
5734 2
        if ($part === false) {
5735 2
            return '';
5736
        }
5737
5738 2
        return $part;
5739
    }
5740
5741
    /**
5742
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5743
     *
5744
     * @param string $str          <p>The input string.</p>
5745
     * @param string $needle       <p>The string to look for.</p>
5746
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5747
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5748
     *
5749
     * @return string
5750
     */
5751
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5752
    {
5753
        if (
5754 1
            $needle === ''
5755
            ||
5756 1
            $str === ''
5757
        ) {
5758 1
            return '';
5759
        }
5760
5761 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5762 1
        if ($part === false) {
5763 1
            return '';
5764
        }
5765
5766 1
        return $part;
5767
    }
5768
5769
    /**
5770
     * Returns the last $n characters of the string.
5771
     *
5772
     * @param string $str      <p>The input string.</p>
5773
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5774
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5775
     *
5776
     * @return string
5777
     */
5778
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5779
    {
5780 12
        if ($n <= 0) {
5781 4
            return '';
5782
        }
5783
5784 8
        return (string) self::substr($str, -$n, null, $encoding);
5785
    }
5786
5787
    /**
5788
     * Limit the number of characters in a string.
5789
     *
5790
     * @param string $str      <p>The input string.</p>
5791
     * @param int    $length   [optional] <p>Default: 100</p>
5792
     * @param string $strAddOn [optional] <p>Default: …</p>
5793
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5794
     *
5795
     * @return string
5796
     */
5797
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5798
    {
5799 2
        if ($str === '') {
5800 2
            return '';
5801
        }
5802
5803 2
        if ($length <= 0) {
5804 2
            return '';
5805
        }
5806
5807 2
        if ((int) self::strlen($str, $encoding) <= $length) {
5808 2
            return $str;
5809
        }
5810
5811 2
        return (string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding) . $strAddOn;
5812
    }
5813
5814
    /**
5815
     * Limit the number of characters in a string, but also after the next word.
5816
     *
5817
     * @param string $str      <p>The input string.</p>
5818
     * @param int    $length   [optional] <p>Default: 100</p>
5819
     * @param string $strAddOn [optional] <p>Default: …</p>
5820
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5821
     *
5822
     * @return string
5823
     */
5824
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5825
    {
5826 6
        if ($str === '') {
5827 2
            return '';
5828
        }
5829
5830 6
        if ($length <= 0) {
5831 2
            return '';
5832
        }
5833
5834 6
        if ((int) self::strlen($str, $encoding) <= $length) {
5835 2
            return $str;
5836
        }
5837
5838 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5839 5
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5840
        }
5841
5842 3
        $str = self::substr($str, 0, $length, $encoding);
5843 3
        if ($str === false) {
5844
            return '' . $strAddOn;
5845
        }
5846
5847 3
        $array = \explode(' ', $str);
5848 3
        \array_pop($array);
5849 3
        $new_str = \implode(' ', $array);
5850
5851 3
        if ($new_str === '') {
5852 2
            return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
5853
        }
5854
5855 3
        return $new_str . $strAddOn;
5856
    }
5857
5858
    /**
5859
     * Returns the longest common prefix between the string and $otherStr.
5860
     *
5861
     * @param string $str      <p>The input sting.</p>
5862
     * @param string $otherStr <p>Second string for comparison.</p>
5863
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5864
     *
5865
     * @return string
5866
     */
5867
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5868
    {
5869 10
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5870
5871 10
        $longestCommonPrefix = '';
5872 10
        for ($i = 0; $i < $maxLength; ++$i) {
5873 8
            $char = self::substr($str, $i, 1, $encoding);
5874
5875
            if (
5876 8
                $char !== false
5877
                &&
5878 8
                $char === self::substr($otherStr, $i, 1, $encoding)
5879
            ) {
5880 6
                $longestCommonPrefix .= $char;
5881
            } else {
5882 6
                break;
5883
            }
5884
        }
5885
5886 10
        return $longestCommonPrefix;
5887
    }
5888
5889
    /**
5890
     * Returns the longest common substring between the string and $otherStr.
5891
     * In the case of ties, it returns that which occurs first.
5892
     *
5893
     * @param string $str
5894
     * @param string $otherStr <p>Second string for comparison.</p>
5895
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5896
     *
5897
     * @return string string with its $str being the longest common substring
5898
     */
5899
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5900
    {
5901
        // Uses dynamic programming to solve
5902
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5903 11
        $strLength = (int) self::strlen($str, $encoding);
5904 11
        $otherLength = (int) self::strlen($otherStr, $encoding);
5905
5906
        // Return if either string is empty
5907 11
        if ($strLength === 0 || $otherLength === 0) {
5908 2
            return '';
5909
        }
5910
5911 9
        $len = 0;
5912 9
        $end = 0;
5913 9
        $table = \array_fill(
5914 9
            0,
5915 9
            $strLength + 1,
5916 9
            \array_fill(0, $otherLength + 1, 0)
5917
        );
5918
5919 9
        for ($i = 1; $i <= $strLength; ++$i) {
5920 9
            for ($j = 1; $j <= $otherLength; ++$j) {
5921 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
5922 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5923
5924 9
                if ($strChar === $otherChar) {
5925 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5926 8
                    if ($table[$i][$j] > $len) {
5927 8
                        $len = $table[$i][$j];
5928 8
                        $end = $i;
5929
                    }
5930
                } else {
5931 9
                    $table[$i][$j] = 0;
5932
                }
5933
            }
5934
        }
5935
5936 9
        return (string) self::substr($str, $end - $len, $len, $encoding);
5937
    }
5938
5939
    /**
5940
     * Returns the longest common suffix between the string and $otherStr.
5941
     *
5942
     * @param string $str
5943
     * @param string $otherStr <p>Second string for comparison.</p>
5944
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5945
     *
5946
     * @return string
5947
     */
5948
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5949
    {
5950 10
        $maxLength = \min(self::strlen($str, $encoding), (int) self::strlen($otherStr, $encoding));
5951
5952 10
        $longestCommonSuffix = '';
5953 10
        for ($i = 1; $i <= $maxLength; ++$i) {
5954 8
            $char = self::substr($str, -$i, 1, $encoding);
5955
5956
            if (
5957 8
                $char !== false
5958
                &&
5959 8
                $char === self::substr($otherStr, -$i, 1, $encoding)
5960
            ) {
5961 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
5962
            } else {
5963 6
                break;
5964
            }
5965
        }
5966
5967 10
        return $longestCommonSuffix;
5968
    }
5969
5970
    /**
5971
     * Returns true if $str matches the supplied pattern, false otherwise.
5972
     *
5973
     * @param string $str     <p>The input string.</p>
5974
     * @param string $pattern <p>Regex pattern to match against.</p>
5975
     *
5976
     * @return bool whether or not $str matches the pattern
5977
     */
5978
    public static function str_matches_pattern(string $str, string $pattern): bool
5979
    {
5980 126
        return (bool) \preg_match('/' . $pattern . '/u', $str);
5981
    }
5982
5983
    /**
5984
     * Returns whether or not a character exists at an index. Offsets may be
5985
     * negative to count from the last character in the string. Implements
5986
     * part of the ArrayAccess interface.
5987
     *
5988
     * @param string $str      <p>The input string.</p>
5989
     * @param int    $offset   <p>The index to check.</p>
5990
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5991
     *
5992
     * @return bool whether or not the index exists
5993
     */
5994
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5995
    {
5996
        // init
5997 6
        $length = (int) self::strlen($str, $encoding);
5998
5999 6
        if ($offset >= 0) {
6000 3
            return $length > $offset;
6001
        }
6002
6003 3
        return $length >= \abs($offset);
6004
    }
6005
6006
    /**
6007
     * Returns the character at the given index. Offsets may be negative to
6008
     * count from the last character in the string. Implements part of the
6009
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6010
     * does not exist.
6011
     *
6012
     * @param string $str      <p>The input string.</p>
6013
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6014
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6015
     *
6016
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6017
     *
6018
     * @return string the character at the specified index
6019
     */
6020
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6021
    {
6022
        // init
6023 2
        $length = (int) self::strlen($str);
6024
6025
        if (
6026 2
            ($index >= 0 && $length <= $index)
6027
            ||
6028 2
            $length < \abs($index)
6029
        ) {
6030 1
            throw new \OutOfBoundsException('No character exists at the index');
6031
        }
6032
6033 1
        return self::char_at($str, $index, $encoding);
6034
    }
6035
6036
    /**
6037
     * Pad a UTF-8 string to given length with another string.
6038
     *
6039
     * @param string     $str        <p>The input string.</p>
6040
     * @param int        $pad_length <p>The length of return string.</p>
6041
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6042
     * @param int|string $pad_type   [optional] <p>
6043
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6044
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6045
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6046
     *                               </p>
6047
     * @param string     $encoding   [optional] <p>Default: UTF-8</p>
6048
     *
6049
     * @return string returns the padded string
6050
     */
6051
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6052
    {
6053 41
        if ($str === '') {
6054
            return '';
6055
        }
6056
6057 41
        if ($pad_type !== (int) $pad_type) {
6058 13
            if ($pad_type === 'left') {
6059 3
                $pad_type = \STR_PAD_LEFT;
6060 10
            } elseif ($pad_type === 'right') {
6061 6
                $pad_type = \STR_PAD_RIGHT;
6062 4
            } elseif ($pad_type === 'both') {
6063 3
                $pad_type = \STR_PAD_BOTH;
6064
            } else {
6065 1
                throw new \InvalidArgumentException(
6066 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6067
                );
6068
            }
6069
        }
6070
6071 40
        $str_length = (int) self::strlen($str, $encoding);
6072
6073
        if (
6074 40
            $pad_length > 0
6075
            &&
6076 40
            $pad_length >= $str_length
6077
        ) {
6078 39
            $ps_length = (int) self::strlen($pad_string, $encoding);
6079
6080 39
            $diff = ($pad_length - $str_length);
6081
6082
            switch ($pad_type) {
6083 39
                case \STR_PAD_LEFT:
6084 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6085 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6086 13
                    $post = '';
6087
6088 13
                    break;
6089
6090 29
                case \STR_PAD_BOTH:
6091 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6092 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6093 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6094 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6095
6096 14
                    break;
6097
6098 18
                case \STR_PAD_RIGHT:
6099
                default:
6100 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6101 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6102 18
                    $pre = '';
6103
            }
6104
6105 39
            return $pre . $str . $post;
6106
        }
6107
6108 4
        return $str;
6109
    }
6110
6111
    /**
6112
     * Returns a new string of a given length such that both sides of the
6113
     * string are padded. Alias for pad() with a $padType of 'both'.
6114
     *
6115
     * @param string $str
6116
     * @param int    $length   <p>Desired string length after padding.</p>
6117
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6118
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6119
     *
6120
     * @return string string with padding applied
6121
     */
6122
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6123
    {
6124 11
        $padding = $length - (int) self::strlen($str, $encoding);
6125
6126 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6127
    }
6128
6129
    /**
6130
     * Returns a new string of a given length such that the beginning of the
6131
     * string is padded. Alias for pad() with a $padType of 'left'.
6132
     *
6133
     * @param string $str
6134
     * @param int    $length   <p>Desired string length after padding.</p>
6135
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6136
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6137
     *
6138
     * @return string string with left padding
6139
     */
6140
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6141
    {
6142 7
        return self::apply_padding($str, $length - (int) self::strlen($str), 0, $padStr, $encoding);
6143
    }
6144
6145
    /**
6146
     * Returns a new string of a given length such that the end of the string
6147
     * is padded. Alias for pad() with a $padType of 'right'.
6148
     *
6149
     * @param string $str
6150
     * @param int    $length   <p>Desired string length after padding.</p>
6151
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6152
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6153
     *
6154
     * @return string string with right padding
6155
     */
6156
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6157
    {
6158 7
        return self::apply_padding($str, 0, $length - (int) self::strlen($str), $padStr, $encoding);
6159
    }
6160
6161
    /**
6162
     * Repeat a string.
6163
     *
6164
     * @param string $str        <p>
6165
     *                           The string to be repeated.
6166
     *                           </p>
6167
     * @param int    $multiplier <p>
6168
     *                           Number of time the input string should be
6169
     *                           repeated.
6170
     *                           </p>
6171
     *                           <p>
6172
     *                           multiplier has to be greater than or equal to 0.
6173
     *                           If the multiplier is set to 0, the function
6174
     *                           will return an empty string.
6175
     *                           </p>
6176
     *
6177
     * @return string the repeated string
6178
     */
6179
    public static function str_repeat(string $str, int $multiplier): string
6180
    {
6181 9
        $str = self::filter($str);
6182
6183 9
        return \str_repeat($str, $multiplier);
6184
    }
6185
6186
    /**
6187
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6188
     *
6189
     * Replace all occurrences of the search string with the replacement string
6190
     *
6191
     * @see http://php.net/manual/en/function.str-replace.php
6192
     *
6193
     * @param mixed $search  <p>
6194
     *                       The value being searched for, otherwise known as the needle.
6195
     *                       An array may be used to designate multiple needles.
6196
     *                       </p>
6197
     * @param mixed $replace <p>
6198
     *                       The replacement value that replaces found search
6199
     *                       values. An array may be used to designate multiple replacements.
6200
     *                       </p>
6201
     * @param mixed $subject <p>
6202
     *                       The string or array being searched and replaced on,
6203
     *                       otherwise known as the haystack.
6204
     *                       </p>
6205
     *                       <p>
6206
     *                       If subject is an array, then the search and
6207
     *                       replace is performed with every entry of
6208
     *                       subject, and the return value is an array as
6209
     *                       well.
6210
     *                       </p>
6211
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6212
     *
6213
     * @return mixed this function returns a string or an array with the replaced values
6214
     */
6215
    public static function str_replace($search, $replace, $subject, int &$count = null)
6216
    {
6217
        /** @psalm-suppress PossiblyNullArgument */
6218 60
        return \str_replace($search, $replace, $subject, $count);
6219
    }
6220
6221
    /**
6222
     * Replaces $search from the beginning of string with $replacement.
6223
     *
6224
     * @param string $str         <p>The input string.</p>
6225
     * @param string $search      <p>The string to search for.</p>
6226
     * @param string $replacement <p>The replacement.</p>
6227
     *
6228
     * @return string string after the replacements
6229
     */
6230
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6231
    {
6232 17
        if ($str === '') {
6233 4
            if ($replacement === '') {
6234 2
                return '';
6235
            }
6236
6237 2
            if ($search === '') {
6238 2
                return $replacement;
6239
            }
6240
        }
6241
6242 13
        if ($search === '') {
6243 2
            return $str . $replacement;
6244
        }
6245
6246 11
        if (\strpos($str, $search) === 0) {
6247 9
            return $replacement . \substr($str, \strlen($search));
6248
        }
6249
6250 2
        return $str;
6251
    }
6252
6253
    /**
6254
     * Replaces $search from the ending of string with $replacement.
6255
     *
6256
     * @param string $str         <p>The input string.</p>
6257
     * @param string $search      <p>The string to search for.</p>
6258
     * @param string $replacement <p>The replacement.</p>
6259
     *
6260
     * @return string string after the replacements
6261
     */
6262
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6263
    {
6264 17
        if ($str === '') {
6265 4
            if ($replacement === '') {
6266 2
                return '';
6267
            }
6268
6269 2
            if ($search === '') {
6270 2
                return $replacement;
6271
            }
6272
        }
6273
6274 13
        if ($search === '') {
6275 2
            return $str . $replacement;
6276
        }
6277
6278 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6279 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6280
        }
6281
6282 11
        return $str;
6283
    }
6284
6285
    /**
6286
     * Replace the first "$search"-term with the "$replace"-term.
6287
     *
6288
     * @param string $search
6289
     * @param string $replace
6290
     * @param string $subject
6291
     *
6292
     * @return string
6293
     *
6294
     * @psalm-suppress InvalidReturnType
6295
     */
6296
    public static function str_replace_first(string $search, string $replace, string $subject): string
6297
    {
6298 2
        $pos = self::strpos($subject, $search);
6299 2
        if ($pos !== false) {
6300
            /** @psalm-suppress InvalidReturnStatement */
6301 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6302
        }
6303
6304 2
        return $subject;
6305
    }
6306
6307
    /**
6308
     * Replace the last "$search"-term with the "$replace"-term.
6309
     *
6310
     * @param string $search
6311
     * @param string $replace
6312
     * @param string $subject
6313
     *
6314
     * @return string
6315
     *
6316
     * @psalm-suppress InvalidReturnType
6317
     */
6318
    public static function str_replace_last(string $search, string $replace, string $subject): string
6319
    {
6320 2
        $pos = self::strrpos($subject, $search);
6321 2
        if ($pos !== false) {
6322
            /** @psalm-suppress InvalidReturnStatement */
6323 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6324
        }
6325
6326 2
        return $subject;
6327
    }
6328
6329
    /**
6330
     * Shuffles all the characters in the string.
6331
     *
6332
     * PS: uses random algorithm which is weak for cryptography purposes
6333
     *
6334
     * @param string $str <p>The input string</p>
6335
     *
6336
     * @return string the shuffled string
6337
     */
6338
    public static function str_shuffle(string $str): string
6339
    {
6340 5
        $indexes = \range(0, (int) self::strlen($str) - 1);
6341
        /** @noinspection NonSecureShuffleUsageInspection */
6342 5
        \shuffle($indexes);
6343
6344 5
        $shuffledStr = '';
6345 5
        foreach ($indexes as &$i) {
6346 5
            $tmpSubStr = self::substr($str, $i, 1);
6347 5
            if ($tmpSubStr !== false) {
6348 5
                $shuffledStr .= $tmpSubStr;
6349
            }
6350
        }
6351
6352 5
        return $shuffledStr;
6353
    }
6354
6355
    /**
6356
     * Returns the substring beginning at $start, and up to, but not including
6357
     * the index specified by $end. If $end is omitted, the function extracts
6358
     * the remaining string. If $end is negative, it is computed from the end
6359
     * of the string.
6360
     *
6361
     * @param string $str
6362
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6363
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6364
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6365
     *
6366
     * @return false|string
6367
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6368
     *                      characters long, <b>FALSE</b> will be returned.
6369
     */
6370
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6371
    {
6372 18
        if ($end === null) {
6373 6
            $length = (int) self::strlen($str);
6374 12
        } elseif ($end >= 0 && $end <= $start) {
6375 4
            return '';
6376 8
        } elseif ($end < 0) {
6377 2
            $length = (int) self::strlen($str) + $end - $start;
6378
        } else {
6379 6
            $length = $end - $start;
6380
        }
6381
6382 14
        return self::substr($str, $start, $length, $encoding);
6383
    }
6384
6385
    /**
6386
     * Convert a string to e.g.: "snake_case"
6387
     *
6388
     * @param string $str
6389
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6390
     *
6391
     * @return string string in snake_case
6392
     */
6393
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6394
    {
6395 20
        $str = self::normalize_whitespace($str);
6396 20
        $str = \str_replace('-', '_', $str);
6397
6398 20
        $str = (string) \preg_replace_callback(
6399 20
            '/([\d|A-Z])/u',
6400
            /**
6401
             * @param string[] $matches
6402
             *
6403
             * @return string
6404
             */
6405
            static function (array $matches) use ($encoding): string {
6406 8
                $match = $matches[1];
6407 8
                $matchInt = (int) $match;
6408
6409 8
                if ((string) $matchInt === $match) {
6410 4
                    return '_' . $match . '_';
6411
                }
6412
6413 4
                return '_' . self::strtolower($match, $encoding);
6414 20
            },
6415 20
            $str
6416
        );
6417
6418 20
        $str = (string) \preg_replace(
6419
            [
6420 20
                '/\s+/',        // convert spaces to "_"
6421
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6422
                '/_+/',         // remove double "_"
6423
            ],
6424
            [
6425 20
                '_',
6426
                '',
6427
                '_',
6428
            ],
6429 20
            $str
6430
        );
6431
6432 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6433
6434 20
        return self::trim($str); // trim leading & trailing whitespace
6435
    }
6436
6437
    /**
6438
     * Sort all characters according to code points.
6439
     *
6440
     * @param string $str    <p>A UTF-8 string.</p>
6441
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6442
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6443
     *
6444
     * @return string string of sorted characters
6445
     */
6446
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6447
    {
6448 2
        $array = self::codepoints($str);
6449
6450 2
        if ($unique) {
6451 2
            $array = \array_flip(\array_flip($array));
6452
        }
6453
6454 2
        if ($desc) {
6455 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6455
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6456
        } else {
6457 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6457
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6458
        }
6459
6460 2
        return self::string($array);
6461
    }
6462
6463
    /**
6464
     * alias for "UTF8::split()"
6465
     *
6466
     * @see UTF8::split()
6467
     *
6468
     * @param string|string[] $str
6469
     * @param int             $len
6470
     *
6471
     * @return string[]
6472
     */
6473
    public static function str_split($str, int $len = 1): array
6474
    {
6475 25
        return self::split($str, $len);
6476
    }
6477
6478
    /**
6479
     * Splits the string with the provided regular expression, returning an
6480
     * array of Stringy objects. An optional integer $limit will truncate the
6481
     * results.
6482
     *
6483
     * @param string $str
6484
     * @param string $pattern <p>The regex with which to split the string.</p>
6485
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6486
     *
6487
     * @return string[] an array of strings
6488
     */
6489
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6490
    {
6491 16
        if ($limit === 0) {
6492 2
            return [];
6493
        }
6494
6495
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6496
        // and current versions of HHVM (3.8 and below)
6497 14
        if ($pattern === '') {
6498 1
            return [$str];
6499
        }
6500
6501
        // this->split returns the remaining unsplit string in the last index when
6502
        // supplying a limit
6503 13
        if ($limit > 0) {
6504 8
            ++$limit;
6505
        } else {
6506 5
            $limit = -1;
6507
        }
6508
6509 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6510
6511 13
        if ($array === false) {
6512
            return [];
6513
        }
6514
6515 13
        if ($limit > 0 && \count($array) === $limit) {
6516 4
            \array_pop($array);
6517
        }
6518
6519 13
        return $array;
6520
    }
6521
6522
    /**
6523
     * Check if the string starts with the given substring.
6524
     *
6525
     * @param string $haystack <p>The string to search in.</p>
6526
     * @param string $needle   <p>The substring to search for.</p>
6527
     *
6528
     * @return bool
6529
     */
6530
    public static function str_starts_with(string $haystack, string $needle): bool
6531
    {
6532 41
        if ($haystack === '' || $needle === '') {
6533 4
            return false;
6534
        }
6535
6536 39
        return \strpos($haystack, $needle) === 0;
6537
    }
6538
6539
    /**
6540
     * Returns true if the string begins with any of $substrings, false otherwise.
6541
     *
6542
     * - case-sensitive
6543
     *
6544
     * @param string $str        <p>The input string.</p>
6545
     * @param array  $substrings <p>Substrings to look for.</p>
6546
     *
6547
     * @return bool whether or not $str starts with $substring
6548
     */
6549
    public static function str_starts_with_any(string $str, array $substrings): bool
6550
    {
6551 8
        if ($str === '') {
6552
            return false;
6553
        }
6554
6555 8
        if (empty($substrings)) {
6556
            return false;
6557
        }
6558
6559 8
        foreach ($substrings as &$substring) {
6560 8
            if (self::str_starts_with($str, $substring)) {
6561 8
                return true;
6562
            }
6563
        }
6564
6565 6
        return false;
6566
    }
6567
6568
    /**
6569
     * Gets the substring after the first occurrence of a separator.
6570
     *
6571
     * @param string $str       <p>The input string.</p>
6572
     * @param string $separator <p>The string separator.</p>
6573
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6574
     *
6575
     * @return string
6576
     */
6577
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6578
    {
6579
        if (
6580 1
            $separator === ''
6581
            ||
6582 1
            $str === ''
6583
        ) {
6584 1
            return '';
6585
        }
6586
6587 1
        $offset = self::str_index_first($str, $separator);
6588 1
        if ($offset === false) {
6589 1
            return '';
6590
        }
6591
6592 1
        return (string) self::substr(
6593 1
            $str,
6594 1
            $offset + (int) self::strlen($separator, $encoding),
6595 1
            null,
6596 1
            $encoding
6597
        );
6598
    }
6599
6600
    /**
6601
     * Gets the substring after the last occurrence of a separator.
6602
     *
6603
     * @param string $str       <p>The input string.</p>
6604
     * @param string $separator <p>The string separator.</p>
6605
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6606
     *
6607
     * @return string
6608
     */
6609
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6610
    {
6611
        if (
6612 1
            $separator === ''
6613
            ||
6614 1
            $str === ''
6615
        ) {
6616 1
            return '';
6617
        }
6618
6619 1
        $offset = self::str_index_last($str, $separator);
6620 1
        if ($offset === false) {
6621 1
            return '';
6622
        }
6623
6624 1
        return (string) self::substr(
6625 1
            $str,
6626 1
            $offset + (int) self::strlen($separator, $encoding),
6627 1
            null,
6628 1
            $encoding
6629
        );
6630
    }
6631
6632
    /**
6633
     * Gets the substring before the first occurrence of a separator.
6634
     *
6635
     * @param string $str       <p>The input string.</p>
6636
     * @param string $separator <p>The string separator.</p>
6637
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6638
     *
6639
     * @return string
6640
     */
6641
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6642
    {
6643
        if (
6644 1
            $separator === ''
6645
            ||
6646 1
            $str === ''
6647
        ) {
6648 1
            return '';
6649
        }
6650
6651 1
        $offset = self::str_index_first($str, $separator);
6652 1
        if ($offset === false) {
6653 1
            return '';
6654
        }
6655
6656 1
        return (string) self::substr(
6657 1
            $str,
6658 1
            0,
6659 1
            $offset,
6660 1
            $encoding
6661
        );
6662
    }
6663
6664
    /**
6665
     * Gets the substring before the last occurrence of a separator.
6666
     *
6667
     * @param string $str       <p>The input string.</p>
6668
     * @param string $separator <p>The string separator.</p>
6669
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6670
     *
6671
     * @return string
6672
     */
6673
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6674
    {
6675
        if (
6676 1
            $separator === ''
6677
            ||
6678 1
            $str === ''
6679
        ) {
6680 1
            return '';
6681
        }
6682
6683 1
        $offset = self::str_index_last($str, $separator);
6684 1
        if ($offset === false) {
6685 1
            return '';
6686
        }
6687
6688 1
        return (string) self::substr(
6689 1
            $str,
6690 1
            0,
6691 1
            $offset,
6692 1
            $encoding
6693
        );
6694
    }
6695
6696
    /**
6697
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6698
     *
6699
     * @param string $str          <p>The input string.</p>
6700
     * @param string $needle       <p>The string to look for.</p>
6701
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6702
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6703
     *
6704
     * @return string
6705
     */
6706
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6707
    {
6708
        if (
6709 2
            $str === ''
6710
            ||
6711 2
            $needle === ''
6712
        ) {
6713 2
            return '';
6714
        }
6715
6716 2
        $part = self::strstr(
6717 2
            $str,
6718 2
            $needle,
6719 2
            $beforeNeedle,
6720 2
            $encoding
6721
        );
6722 2
        if ($part === false) {
6723 2
            return '';
6724
        }
6725
6726 2
        return $part;
6727
    }
6728
6729
    /**
6730
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6731
     *
6732
     * @param string $str          <p>The input string.</p>
6733
     * @param string $needle       <p>The string to look for.</p>
6734
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6735
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6736
     *
6737
     * @return string
6738
     */
6739
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6740
    {
6741
        if (
6742 2
            $str === ''
6743
            ||
6744 2
            $needle === ''
6745
        ) {
6746 2
            return '';
6747
        }
6748
6749 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6750 2
        if ($part === false) {
6751 2
            return '';
6752
        }
6753
6754 2
        return $part;
6755
    }
6756
6757
    /**
6758
     * Surrounds $str with the given substring.
6759
     *
6760
     * @param string $str
6761
     * @param string $substring <p>The substring to add to both sides.</P>
6762
     *
6763
     * @return string string with the substring both prepended and appended
6764
     */
6765
    public static function str_surround(string $str, string $substring): string
6766
    {
6767 5
        return \implode('', [$substring, $str, $substring]);
6768
    }
6769
6770
    /**
6771
     * Returns a trimmed string with the first letter of each word capitalized.
6772
     * Also accepts an array, $ignore, allowing you to list words not to be
6773
     * capitalized.
6774
     *
6775
     * @param string              $str
6776
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
6777
     *                                                   Default: null</p>
6778
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6779
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6780
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6781
     *                                                   tr</p>
6782
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6783
     *                                                   ß</p>
6784
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6785
     *
6786
     * @return string the titleized string
6787
     */
6788
    public static function str_titleize(
6789
        string $str,
6790
        array $ignore = null,
6791
        string $encoding = 'UTF-8',
6792
        bool $cleanUtf8 = false,
6793
        string $lang = null,
6794
        bool $tryToKeepStringLength = false,
6795
        bool $useTrimFirst = true
6796
    ): string {
6797 10
        if ($useTrimFirst === true) {
6798 5
            $str = self::trim($str);
6799
        }
6800
6801 10
        $str_array = self::str_to_words($str);
6802
6803 10
        foreach ($str_array as &$str_tmp) {
6804 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
6805 2
                continue;
6806
            }
6807
6808 10
            $str_tmp = self::str_upper_first(
6809 10
                self::strtolower(
6810 10
                    $str_tmp,
6811 10
                    $encoding,
6812 10
                    $cleanUtf8,
6813 10
                    $lang,
6814 10
                    $tryToKeepStringLength
6815
                ),
6816 10
                $encoding,
6817 10
                $cleanUtf8,
6818 10
                $lang,
6819 10
                $tryToKeepStringLength
6820
            );
6821
        }
6822
6823 10
        return \implode('', $str_array);
6824
    }
6825
6826
    /**
6827
     * Returns a trimmed string in proper title case.
6828
     *
6829
     * Also accepts an array, $ignore, allowing you to list words not to be
6830
     * capitalized.
6831
     *
6832
     * Adapted from John Gruber's script.
6833
     *
6834
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6835
     *
6836
     * @param string $str
6837
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
6838
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6839
     *
6840
     * @return string the titleized string
6841
     */
6842
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6843
    {
6844 35
        $smallWords = \array_merge(
6845
            [
6846 35
                '(?<!q&)a',
6847
                'an',
6848
                'and',
6849
                'as',
6850
                'at(?!&t)',
6851
                'but',
6852
                'by',
6853
                'en',
6854
                'for',
6855
                'if',
6856
                'in',
6857
                'of',
6858
                'on',
6859
                'or',
6860
                'the',
6861
                'to',
6862
                'v[.]?',
6863
                'via',
6864
                'vs[.]?',
6865
            ],
6866 35
            $ignore
6867
        );
6868
6869 35
        $smallWordsRx = \implode('|', $smallWords);
6870 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6871
6872 35
        $str = self::trim($str);
6873
6874 35
        if (self::has_lowercase($str) === false) {
6875 2
            $str = self::strtolower($str);
6876
        }
6877
6878
        // the main substitutions
6879 35
        $str = (string) \preg_replace_callback(
6880
            '~\b (_*) (?:                                                              # 1. Leading underscore and
6881
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6882 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6883
                        |
6884 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6885
                        |
6886 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6887
                        |
6888 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6889
                      ) (_*) \b                                                           # 6. With trailing underscore
6890
                    ~ux',
6891
            /**
6892
             * @param string[] $matches
6893
             *
6894
             * @return string
6895
             */
6896
            static function (array $matches) use ($encoding): string {
6897
                // preserve leading underscore
6898 35
                $str = $matches[1];
6899 35
                if ($matches[2]) {
6900
                    // preserve URLs, domains, emails and file paths
6901 5
                    $str .= $matches[2];
6902 35
                } elseif ($matches[3]) {
6903
                    // lower-case small words
6904 25
                    $str .= self::strtolower($matches[3], $encoding);
6905 35
                } elseif ($matches[4]) {
6906
                    // capitalize word w/o internal caps
6907 34
                    $str .= static::str_upper_first($matches[4], $encoding);
6908
                } else {
6909
                    // preserve other kinds of word (iPhone)
6910 7
                    $str .= $matches[5];
6911
                }
6912
                // Preserve trailing underscore
6913 35
                $str .= $matches[6];
6914
6915 35
                return $str;
6916 35
            },
6917 35
            $str
6918
        );
6919
6920
        // Exceptions for small words: capitalize at start of title...
6921 35
        $str = (string) \preg_replace_callback(
6922
            '~(  \A [[:punct:]]*                # start of title...
6923
                      |  [:.;?!][ ]+               # or of subsentence...
6924
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6925 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6926
                     ~uxi',
6927
            /**
6928
             * @param string[] $matches
6929
             *
6930
             * @return string
6931
             */
6932
            static function (array $matches) use ($encoding): string {
6933 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6934 35
            },
6935 35
            $str
6936
        );
6937
6938
        // ...and end of title
6939 35
        $str = (string) \preg_replace_callback(
6940 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
6941
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6942
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6943
                     ~uxi',
6944
            /**
6945
             * @param string[] $matches
6946
             *
6947
             * @return string
6948
             */
6949
            static function (array $matches) use ($encoding): string {
6950 3
                return static::str_upper_first($matches[1], $encoding);
6951 35
            },
6952 35
            $str
6953
        );
6954
6955
        // Exceptions for small words in hyphenated compound words.
6956
        // e.g. "in-flight" -> In-Flight
6957 35
        $str = (string) \preg_replace_callback(
6958
            '~\b
6959
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6960 35
                        ( ' . $smallWordsRx . ' )
6961
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6962
                       ~uxi',
6963
            /**
6964
             * @param string[] $matches
6965
             *
6966
             * @return string
6967
             */
6968
            static function (array $matches) use ($encoding): string {
6969
                return static::str_upper_first($matches[1], $encoding);
6970 35
            },
6971 35
            $str
6972
        );
6973
6974
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6975 35
        $str = (string) \preg_replace_callback(
6976
            '~\b
6977
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6978
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6979 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6980
                      (?!	- )                   # Negative lookahead for another -
6981
                     ~uxi',
6982
            /**
6983
             * @param string[] $matches
6984
             *
6985
             * @return string
6986
             */
6987
            static function (array $matches) use ($encoding): string {
6988
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6989 35
            },
6990 35
            $str
6991
        );
6992
6993 35
        return $str;
6994
    }
6995
6996
    /**
6997
     * Get a binary representation of a specific string.
6998
     *
6999
     * @param string $str <p>The input string.</p>
7000
     *
7001
     * @return string
7002
     */
7003
    public static function str_to_binary(string $str): string
7004
    {
7005 2
        $value = \unpack('H*', $str);
7006
7007 2
        return \base_convert($value[1], 16, 2);
7008
    }
7009
7010
    /**
7011
     * @param string   $str
7012
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7013
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7014
     *
7015
     * @return string[]
7016
     */
7017
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7018
    {
7019 17
        if ($str === '') {
7020 1
            return $removeEmptyValues === true ? [] : [''];
7021
        }
7022
7023 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
7024 16
        if ($return === false) {
7025
            return $removeEmptyValues === true ? [] : [''];
7026
        }
7027
7028
        if (
7029 16
            $removeShortValues === null
7030
            &&
7031 16
            $removeEmptyValues === false
7032
        ) {
7033 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7034
        }
7035
7036
        return self::reduce_string_array(
7037
            $return,
7038
            $removeEmptyValues,
7039
            $removeShortValues
7040
        );
7041
    }
7042
7043
    /**
7044
     * Convert a string into an array of words.
7045
     *
7046
     * @param string   $str
7047
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7048
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7049
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7050
     *
7051
     * @return string[]
7052
     */
7053
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7054
    {
7055 23
        if ($str === '') {
7056 4
            return $removeEmptyValues === true ? [] : [''];
7057
        }
7058
7059 23
        $charList = self::rxClass($charList, '\pL');
7060
7061 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7062 23
        if ($return === false) {
7063
            return $removeEmptyValues === true ? [] : [''];
7064
        }
7065
7066
        if (
7067 23
            $removeShortValues === null
7068
            &&
7069 23
            $removeEmptyValues === false
7070
        ) {
7071 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7072
        }
7073
7074 2
        $tmpReturn = self::reduce_string_array(
7075 2
            $return,
7076 2
            $removeEmptyValues,
7077 2
            $removeShortValues
7078
        );
7079
7080 2
        foreach ($tmpReturn as &$item) {
7081 2
            $item = (string) $item;
7082
        }
7083
7084 2
        return $tmpReturn;
7085
    }
7086
7087
    /**
7088
     * alias for "UTF8::to_ascii()"
7089
     *
7090
     * @see UTF8::to_ascii()
7091
     *
7092
     * @param string $str
7093
     * @param string $unknown
7094
     * @param bool   $strict
7095
     *
7096
     * @return string
7097
     */
7098
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7099
    {
7100 8
        return self::to_ascii($str, $unknown, $strict);
7101
    }
7102
7103
    /**
7104
     * Truncates the string to a given length. If $substring is provided, and
7105
     * truncating occurs, the string is further truncated so that the substring
7106
     * may be appended without exceeding the desired length.
7107
     *
7108
     * @param string $str
7109
     * @param int    $length    <p>Desired length of the truncated string.</p>
7110
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7111
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7112
     *
7113
     * @return string string after truncating
7114
     */
7115
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7116
    {
7117
        // init
7118 22
        $str = (string) $str;
7119
7120 22
        if ($str === '') {
7121
            return '';
7122
        }
7123
7124 22
        if ($length >= (int) self::strlen($str, $encoding)) {
7125 4
            return $str;
7126
        }
7127
7128
        // Need to further trim the string so we can append the substring
7129 18
        $substringLength = (int) self::strlen($substring, $encoding);
7130 18
        $length -= $substringLength;
7131
7132 18
        return ((string) self::substr($str, 0, $length, $encoding)) . $substring;
7133
    }
7134
7135
    /**
7136
     * Truncates the string to a given length, while ensuring that it does not
7137
     * split words. If $substring is provided, and truncating occurs, the
7138
     * string is further truncated so that the substring may be appended without
7139
     * exceeding the desired length.
7140
     *
7141
     * @param string $str
7142
     * @param int    $length                          <p>Desired length of the truncated string.</p>
7143
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
7144
     *                                                ''</p>
7145
     * @param string $encoding                        [optional] <p>Default: UTF-8</p>
7146
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7147
     *
7148
     * @return string string after truncating
7149
     */
7150
    public static function str_truncate_safe(
7151
        string $str,
7152
        int $length,
7153
        string $substring = '',
7154
        string $encoding = 'UTF-8',
7155
        bool $ignoreDoNotSplitWordsForOneWord = false
7156
    ): string {
7157 46
        if ($length >= (int) self::strlen($str, $encoding)) {
7158 8
            return $str;
7159
        }
7160
7161
        // need to further trim the string so we can append the substring
7162 38
        $substringLength = (int) self::strlen($substring, $encoding);
7163 38
        $length -= $substringLength;
7164
7165 38
        $truncated = self::substr($str, 0, $length, $encoding);
7166 38
        if ($truncated === false) {
7167
            return '';
7168
        }
7169
7170
        // if the last word was truncated
7171 38
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7172 38
        if ($strPosSpace !== $length) {
7173
            // find pos of the last occurrence of a space, get up to that
7174 24
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7175
7176
            if (
7177 24
                $lastPos !== false
7178
                ||
7179 24
                ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7180
            ) {
7181 20
                $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
7182
            }
7183
        }
7184
7185 38
        return $truncated . $substring;
7186
    }
7187
7188
    /**
7189
     * Returns a lowercase and trimmed string separated by underscores.
7190
     * Underscores are inserted before uppercase characters (with the exception
7191
     * of the first character of the string), and in place of spaces as well as
7192
     * dashes.
7193
     *
7194
     * @param string $str
7195
     *
7196
     * @return string the underscored string
7197
     */
7198
    public static function str_underscored(string $str): string
7199
    {
7200 16
        return self::str_delimit($str, '_');
7201
    }
7202
7203
    /**
7204
     * Returns an UpperCamelCase version of the supplied string. It trims
7205
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7206
     * and underscores, and removes spaces, dashes, underscores.
7207
     *
7208
     * @param string      $str                   <p>The input string.</p>
7209
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7210
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7211
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7212
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7213
     *
7214
     * @return string string in UpperCamelCase
7215
     */
7216
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7217
    {
7218 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7219
    }
7220
7221
    /**
7222
     * alias for "UTF8::ucfirst()"
7223
     *
7224
     * @see UTF8::ucfirst()
7225
     *
7226
     * @param string      $str
7227
     * @param string      $encoding
7228
     * @param bool        $cleanUtf8
7229
     * @param string|null $lang
7230
     * @param bool        $tryToKeepStringLength
7231
     *
7232
     * @return string
7233
     */
7234
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7235
    {
7236 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7237
    }
7238
7239
    /**
7240
     * Counts number of words in the UTF-8 string.
7241
     *
7242
     * @param string $str      <p>The input string.</p>
7243
     * @param int    $format   [optional] <p>
7244
     *                         <strong>0</strong> => return a number of words (default)<br>
7245
     *                         <strong>1</strong> => return an array of words<br>
7246
     *                         <strong>2</strong> => return an array of words with word-offset as key
7247
     *                         </p>
7248
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7249
     *
7250
     * @return int|string[] The number of words in the string
7251
     */
7252
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7253
    {
7254 2
        $strParts = self::str_to_words($str, $charlist);
7255
7256 2
        $len = \count($strParts);
7257
7258 2
        if ($format === 1) {
7259 2
            $numberOfWords = [];
7260 2
            for ($i = 1; $i < $len; $i += 2) {
7261 2
                $numberOfWords[] = $strParts[$i];
7262
            }
7263 2
        } elseif ($format === 2) {
7264 2
            $numberOfWords = [];
7265 2
            $offset = (int) self::strlen($strParts[0]);
7266 2
            for ($i = 1; $i < $len; $i += 2) {
7267 2
                $numberOfWords[$offset] = $strParts[$i];
7268 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
7269
            }
7270
        } else {
7271 2
            $numberOfWords = (int) (($len - 1) / 2);
7272
        }
7273
7274 2
        return $numberOfWords;
7275
    }
7276
7277
    /**
7278
     * Case-insensitive string comparison.
7279
     *
7280
     * INFO: Case-insensitive version of UTF8::strcmp()
7281
     *
7282
     * @param string $str1     <p>The first string.</p>
7283
     * @param string $str2     <p>The second string.</p>
7284
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7285
     *
7286
     * @return int
7287
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7288
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7289
     *             <strong>0</strong> if they are equal
7290
     */
7291
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7292
    {
7293 23
        return self::strcmp(
7294 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7295 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7296
        );
7297
    }
7298
7299
    /**
7300
     * alias for "UTF8::strstr()"
7301
     *
7302
     * @see UTF8::strstr()
7303
     *
7304
     * @param string $haystack
7305
     * @param string $needle
7306
     * @param bool   $before_needle
7307
     * @param string $encoding
7308
     * @param bool   $cleanUtf8
7309
     *
7310
     * @return false|string
7311
     */
7312
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7313
    {
7314 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7315
    }
7316
7317
    /**
7318
     * Case-sensitive string comparison.
7319
     *
7320
     * @param string $str1 <p>The first string.</p>
7321
     * @param string $str2 <p>The second string.</p>
7322
     *
7323
     * @return int
7324
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7325
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7326
     *             <strong>0</strong> if they are equal
7327
     */
7328
    public static function strcmp(string $str1, string $str2): int
7329
    {
7330
        /** @noinspection PhpUndefinedClassInspection */
7331 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7332 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7333 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7334
        );
7335
    }
7336
7337
    /**
7338
     * Find length of initial segment not matching mask.
7339
     *
7340
     * @param string $str
7341
     * @param string $charList
7342
     * @param int    $offset
7343
     * @param int    $length
7344
     *
7345
     * @return int
7346
     */
7347
    public static function strcspn(string $str, string $charList, int $offset = null, int $length = null): int
7348
    {
7349 12
        if ($charList === '') {
7350 2
            return (int) self::strlen($str);
7351
        }
7352
7353 11
        if ($offset !== null || $length !== null) {
7354
            /** @noinspection UnnecessaryCastingInspection */
7355 3
            $strTmp = self::substr($str, (int) $offset, $length);
7356 3
            if ($strTmp === false) {
7357
                return 0;
7358
            }
7359 3
            $str = $strTmp;
7360
        }
7361
7362 11
        if ($str === '') {
7363 2
            return 0;
7364
        }
7365
7366 10
        $matches = [];
7367 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
7368 9
            $return = self::strlen($matches[1]);
7369 9
            if ($return === false) {
7370
                return 0;
7371
            }
7372
7373 9
            return $return;
7374
        }
7375
7376 2
        return (int) self::strlen($str);
7377
    }
7378
7379
    /**
7380
     * alias for "UTF8::stristr()"
7381
     *
7382
     * @see UTF8::stristr()
7383
     *
7384
     * @param string $haystack
7385
     * @param string $needle
7386
     * @param bool   $before_needle
7387
     * @param string $encoding
7388
     * @param bool   $cleanUtf8
7389
     *
7390
     * @return false|string
7391
     */
7392
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7393
    {
7394 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7395
    }
7396
7397
    /**
7398
     * Create a UTF-8 string from code points.
7399
     *
7400
     * INFO: opposite to UTF8::codepoints()
7401
     *
7402
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7403
     *
7404
     * @return string UTF-8 encoded string
7405
     */
7406
    public static function string(array $array): string
7407
    {
7408 4
        return \implode(
7409 4
            '',
7410 4
            \array_map(
7411
                [
7412 4
                    self::class,
7413
                    'chr',
7414
                ],
7415 4
                $array
7416
            )
7417
        );
7418
    }
7419
7420
    /**
7421
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7422
     *
7423
     * @param string $str <p>The input string.</p>
7424
     *
7425
     * @return bool
7426
     *              <strong>true</strong> if the string has BOM at the start,<br>
7427
     *              <strong>false</strong> otherwise
7428
     */
7429
    public static function string_has_bom(string $str): bool
7430
    {
7431
        /** @noinspection PhpUnusedLocalVariableInspection */
7432 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
7433 6
            if (\strpos($str, $bomString) === 0) {
7434 6
                return true;
7435
            }
7436
        }
7437
7438 6
        return false;
7439
    }
7440
7441
    /**
7442
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7443
     *
7444
     * @see http://php.net/manual/en/function.strip-tags.php
7445
     *
7446
     * @param string $str            <p>
7447
     *                               The input string.
7448
     *                               </p>
7449
     * @param string $allowable_tags [optional] <p>
7450
     *                               You can use the optional second parameter to specify tags which should
7451
     *                               not be stripped.
7452
     *                               </p>
7453
     *                               <p>
7454
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
7455
     *                               can not be changed with allowable_tags.
7456
     *                               </p>
7457
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7458
     *
7459
     * @return string the stripped string
7460
     */
7461
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7462
    {
7463 4
        if ($str === '') {
7464 1
            return '';
7465
        }
7466
7467 4
        if ($cleanUtf8 === true) {
7468 2
            $str = self::clean($str);
7469
        }
7470
7471
        /** @noinspection UnnecessaryCastingInspection */
7472 4
        return \strip_tags($str, (string) $allowable_tags);
7473
    }
7474
7475
    /**
7476
     * Strip all whitespace characters. This includes tabs and newline
7477
     * characters, as well as multibyte whitespace such as the thin space
7478
     * and ideographic space.
7479
     *
7480
     * @param string $str
7481
     *
7482
     * @return string
7483
     */
7484
    public static function strip_whitespace(string $str): string
7485
    {
7486 36
        if ($str === '') {
7487 3
            return '';
7488
        }
7489
7490 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7491
    }
7492
7493
    /**
7494
     * Finds position of first occurrence of a string within another, case insensitive.
7495
     *
7496
     * @see http://php.net/manual/en/function.mb-stripos.php
7497
     *
7498
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7499
     * @param string $needle    <p>The string to find in haystack.</p>
7500
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7501
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7502
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7503
     *
7504
     * @return false|int
7505
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7506
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7507
     */
7508
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7509
    {
7510 75
        if ($haystack === '' || $needle === '') {
7511 5
            return false;
7512
        }
7513
7514 74
        if ($cleanUtf8 === true) {
7515
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7516
            // if invalid characters are found in $haystack before $needle
7517 1
            $haystack = self::clean($haystack);
7518 1
            $needle = self::clean($needle);
7519
        }
7520
7521 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7522 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7523
        }
7524
7525 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7526
            self::checkForSupport();
7527
        }
7528
7529 74
        if (self::$SUPPORT['mbstring'] === true) {
7530 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7531 74
            if ($returnTmp !== false) {
7532 54
                return $returnTmp;
7533
            }
7534
        }
7535
7536
        if (
7537 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7538
            &&
7539 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7540
            &&
7541 31
            self::$SUPPORT['intl'] === true
7542
        ) {
7543 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7544 31
            if ($returnTmp !== false) {
7545
                return $returnTmp;
7546
            }
7547
        }
7548
7549
        //
7550
        // fallback for ascii only
7551
        //
7552
7553 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7554 15
            return \stripos($haystack, $needle, $offset);
7555
        }
7556
7557
        //
7558
        // fallback via vanilla php
7559
        //
7560
7561 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7562 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7563
7564 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7565
    }
7566
7567
    /**
7568
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7569
     *
7570
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
7571
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
7572
     * @param bool   $before_needle [optional] <p>
7573
     *                              If <b>TRUE</b>, it returns the part of the
7574
     *                              haystack before the first occurrence of the needle (excluding the needle).
7575
     *                              </p>
7576
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7577
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7578
     *
7579
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7580
     */
7581
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7582
    {
7583 12
        if ($haystack === '' || $needle === '') {
7584 3
            return false;
7585
        }
7586
7587 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7588 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7589
        }
7590
7591 9
        if ($cleanUtf8 === true) {
7592
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7593
            // if invalid characters are found in $haystack before $needle
7594 1
            $needle = self::clean($needle);
7595 1
            $haystack = self::clean($haystack);
7596
        }
7597
7598 9
        if (!$needle) {
7599
            return $haystack;
7600
        }
7601
7602 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7603
            self::checkForSupport();
7604
        }
7605
7606
        if (
7607 9
            $encoding !== 'UTF-8'
7608
            &&
7609 9
            self::$SUPPORT['mbstring'] === false
7610
        ) {
7611
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7612
        }
7613
7614 9
        if (self::$SUPPORT['mbstring'] === true) {
7615 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7616
        }
7617
7618
        if (
7619
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7620
            &&
7621
            self::$SUPPORT['intl'] === true
7622
        ) {
7623
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7624
            if ($returnTmp !== false) {
7625
                return $returnTmp;
7626
            }
7627
        }
7628
7629
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7630
            return \stristr($haystack, $needle, $before_needle);
7631
        }
7632
7633
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7634
7635
        if (!isset($match[1])) {
7636
            return false;
7637
        }
7638
7639
        if ($before_needle) {
7640
            return $match[1];
7641
        }
7642
7643
        return self::substr($haystack, (int) self::strlen($match[1]));
7644
    }
7645
7646
    /**
7647
     * Get the string length, not the byte-length!
7648
     *
7649
     * @see     http://php.net/manual/en/function.mb-strlen.php
7650
     *
7651
     * @param string $str       <p>The string being checked for length.</p>
7652
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7653
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7654
     *
7655
     * @return false|int
7656
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
7657
     *                   $encoding.
7658
     *                   (One multi-byte character counted as +1).
7659
     *                   <br>
7660
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
7661
     *                   chars.
7662
     */
7663
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7664
    {
7665 314
        if ($str === '') {
7666 46
            return 0;
7667
        }
7668
7669 312
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7670 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7671
        }
7672
7673
        //
7674
        // fallback for binary || ascii only
7675
        //
7676
7677
        if (
7678 312
            $encoding === 'CP850'
7679
            ||
7680 312
            $encoding === 'ASCII'
7681
        ) {
7682 2
            return self::strlen_in_byte($str);
7683
        }
7684
7685 312
        if ($cleanUtf8 === true) {
7686
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7687
            // if invalid characters are found in $str
7688 4
            $str = self::clean($str);
7689
        }
7690
7691 312
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7692
            self::checkForSupport();
7693
        }
7694
7695
        if (
7696 312
            $encoding !== 'UTF-8'
7697
            &&
7698 312
            self::$SUPPORT['mbstring'] === false
7699
            &&
7700 312
            self::$SUPPORT['iconv'] === false
7701
        ) {
7702
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7703
        }
7704
7705
        //
7706
        // fallback via mbstring
7707
        //
7708
7709 312
        if (self::$SUPPORT['mbstring'] === true) {
7710 312
            $returnTmp = \mb_strlen($str, $encoding);
7711 312
            if ($returnTmp !== false) {
7712 312
                return $returnTmp;
7713
            }
7714
        }
7715
7716
        //
7717
        // fallback via iconv
7718
        //
7719
7720
        if (self::$SUPPORT['iconv'] === true) {
7721
            $returnTmp = \iconv_strlen($str, $encoding);
7722
            if ($returnTmp !== false) {
7723
                return $returnTmp;
7724
            }
7725
        }
7726
7727
        //
7728
        // fallback via intl
7729
        //
7730
7731
        if (
7732
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7733
            &&
7734
            self::$SUPPORT['intl'] === true
7735
        ) {
7736
            $returnTmp = \grapheme_strlen($str);
7737
            if ($returnTmp !== null) {
7738
                return $returnTmp;
7739
            }
7740
        }
7741
7742
        //
7743
        // fallback for ascii only
7744
        //
7745
7746
        if (self::is_ascii($str)) {
7747
            return \strlen($str);
7748
        }
7749
7750
        //
7751
        // fallback via vanilla php
7752
        //
7753
7754
        \preg_match_all('/./us', $str, $parts);
7755
7756
        $returnTmp = \count($parts[0]);
7757
        if ($returnTmp === 0) {
7758
            return false;
7759
        }
7760
7761
        return $returnTmp;
7762
    }
7763
7764
    /**
7765
     * Get string length in byte.
7766
     *
7767
     * @param string $str
7768
     *
7769
     * @return int
7770
     */
7771
    public static function strlen_in_byte(string $str): int
7772
    {
7773 190
        if ($str === '') {
7774
            return 0;
7775
        }
7776
7777 190
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7778
            self::checkForSupport();
7779
        }
7780
7781 190
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7782
            // "mb_" is available if overload is used, so use it ...
7783 190
            return \mb_strlen($str, 'CP850'); // 8-BIT
7784
        }
7785
7786
        return \strlen($str);
7787
    }
7788
7789
    /**
7790
     * Case insensitive string comparisons using a "natural order" algorithm.
7791
     *
7792
     * INFO: natural order version of UTF8::strcasecmp()
7793
     *
7794
     * @param string $str1     <p>The first string.</p>
7795
     * @param string $str2     <p>The second string.</p>
7796
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7797
     *
7798
     * @return int
7799
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7800
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7801
     *             <strong>0</strong> if they are equal
7802
     */
7803
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7804
    {
7805 2
        return self::strnatcmp(
7806 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7807 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
7808
        );
7809
    }
7810
7811
    /**
7812
     * String comparisons using a "natural order" algorithm
7813
     *
7814
     * INFO: natural order version of UTF8::strcmp()
7815
     *
7816
     * @see  http://php.net/manual/en/function.strnatcmp.php
7817
     *
7818
     * @param string $str1 <p>The first string.</p>
7819
     * @param string $str2 <p>The second string.</p>
7820
     *
7821
     * @return int
7822
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7823
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7824
     *             <strong>0</strong> if they are equal
7825
     */
7826
    public static function strnatcmp(string $str1, string $str2): int
7827
    {
7828 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
7829
    }
7830
7831
    /**
7832
     * Case-insensitive string comparison of the first n characters.
7833
     *
7834
     * @see  http://php.net/manual/en/function.strncasecmp.php
7835
     *
7836
     * @param string $str1     <p>The first string.</p>
7837
     * @param string $str2     <p>The second string.</p>
7838
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7839
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7840
     *
7841
     * @return int
7842
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7843
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7844
     *             <strong>0</strong> if they are equal
7845
     */
7846
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7847
    {
7848 2
        return self::strncmp(
7849 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7850 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
7851 2
            $len
7852
        );
7853
    }
7854
7855
    /**
7856
     * String comparison of the first n characters.
7857
     *
7858
     * @see  http://php.net/manual/en/function.strncmp.php
7859
     *
7860
     * @param string $str1 <p>The first string.</p>
7861
     * @param string $str2 <p>The second string.</p>
7862
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
7863
     *
7864
     * @return int
7865
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7866
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7867
     *             <strong>0</strong> if they are equal
7868
     */
7869
    public static function strncmp(string $str1, string $str2, int $len): int
7870
    {
7871 4
        $str1 = (string) self::substr($str1, 0, $len);
7872 4
        $str2 = (string) self::substr($str2, 0, $len);
7873
7874 4
        return self::strcmp($str1, $str2);
7875
    }
7876
7877
    /**
7878
     * Search a string for any of a set of characters.
7879
     *
7880
     * @see  http://php.net/manual/en/function.strpbrk.php
7881
     *
7882
     * @param string $haystack  <p>The string where char_list is looked for.</p>
7883
     * @param string $char_list <p>This parameter is case sensitive.</p>
7884
     *
7885
     * @return false|string string starting from the character found, or false if it is not found
7886
     */
7887
    public static function strpbrk(string $haystack, string $char_list)
7888
    {
7889 2
        if ($haystack === '' || $char_list === '') {
7890 2
            return false;
7891
        }
7892
7893 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7894 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
7895
        }
7896
7897 2
        return false;
7898
    }
7899
7900
    /**
7901
     * Find position of first occurrence of string in a string.
7902
     *
7903
     * @see http://php.net/manual/en/function.mb-strpos.php
7904
     *
7905
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7906
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7907
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7908
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7909
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7910
     *
7911
     * @return false|int
7912
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7913
     *                   string.<br> If needle is not found it returns false.
7914
     */
7915
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7916
    {
7917 161
        if ($haystack === '') {
7918 4
            return false;
7919
        }
7920
7921
        // iconv and mbstring do not support integer $needle
7922 160
        if ((int) $needle === $needle && $needle >= 0) {
7923
            $needle = (string) self::chr($needle);
7924
        }
7925 160
        $needle = (string) $needle;
7926
7927 160
        if ($needle === '') {
7928 2
            return false;
7929
        }
7930
7931 160
        if ($cleanUtf8 === true) {
7932
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7933
            // if invalid characters are found in $haystack before $needle
7934 3
            $needle = self::clean($needle);
7935 3
            $haystack = self::clean($haystack);
7936
        }
7937
7938 160
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7939 66
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7940
        }
7941
7942 160
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7943
            self::checkForSupport();
7944
        }
7945
7946
        //
7947
        // fallback for binary || ascii only
7948
        //
7949
7950
        if (
7951 160
            $encoding === 'CP850'
7952
            ||
7953 160
            $encoding === 'ASCII'
7954
        ) {
7955 2
            return self::strpos_in_byte($haystack, $needle, $offset);
7956
        }
7957
7958
        if (
7959 160
            $encoding !== 'UTF-8'
7960
            &&
7961 160
            self::$SUPPORT['iconv'] === false
7962
            &&
7963 160
            self::$SUPPORT['mbstring'] === false
7964
        ) {
7965
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7966
        }
7967
7968
        //
7969
        // fallback via mbstring
7970
        //
7971
7972 160
        if (self::$SUPPORT['mbstring'] === true) {
7973 160
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7974 160
            if ($returnTmp !== false) {
7975 102
                return $returnTmp;
7976
            }
7977
        }
7978
7979
        //
7980
        // fallback via intl
7981
        //
7982
7983
        if (
7984 72
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7985
            &&
7986 72
            $offset >= 0 // grapheme_strpos() can't handle negative offset
7987
            &&
7988 72
            self::$SUPPORT['intl'] === true
7989
        ) {
7990 72
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7991 72
            if ($returnTmp !== false) {
7992
                return $returnTmp;
7993
            }
7994
        }
7995
7996
        //
7997
        // fallback via iconv
7998
        //
7999
8000
        if (
8001 72
            $offset >= 0 // iconv_strpos() can't handle negative offset
8002
            &&
8003 72
            self::$SUPPORT['iconv'] === true
8004
        ) {
8005
            // ignore invalid negative offset to keep compatibility
8006
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8007 72
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8008 72
            if ($returnTmp !== false) {
8009
                return $returnTmp;
8010
            }
8011
        }
8012
8013
        //
8014
        // fallback for ascii only
8015
        //
8016
8017 72
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8018 37
            return \strpos($haystack, $needle, $offset);
8019
        }
8020
8021
        //
8022
        // fallback via vanilla php
8023
        //
8024
8025 40
        if ($haystackIsAscii) {
8026
            $haystackTmp = \substr($haystack, $offset);
8027
        } else {
8028 40
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8029
        }
8030 40
        if ($haystackTmp === false) {
8031
            $haystackTmp = '';
8032
        }
8033 40
        $haystack = (string) $haystackTmp;
8034
8035 40
        if ($offset < 0) {
8036 2
            $offset = 0;
8037
        }
8038
8039 40
        $pos = \strpos($haystack, $needle);
8040 40
        if ($pos === false) {
8041 40
            return false;
8042
        }
8043
8044 2
        if ($pos) {
8045 2
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8046
        }
8047
8048
        return $offset + 0;
8049
    }
8050
8051
    /**
8052
     * Find position of first occurrence of string in a string.
8053
     *
8054
     * @param string $haystack <p>
8055
     *                         The string being checked.
8056
     *                         </p>
8057
     * @param string $needle   <p>
8058
     *                         The position counted from the beginning of haystack.
8059
     *                         </p>
8060
     * @param int    $offset   [optional] <p>
8061
     *                         The search offset. If it is not specified, 0 is used.
8062
     *                         </p>
8063
     *
8064
     * @return false|int The numeric position of the first occurrence of needle in the
8065
     *                   haystack string. If needle is not found, it returns false.
8066
     */
8067
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8068
    {
8069 81
        if ($haystack === '' || $needle === '') {
8070
            return false;
8071
        }
8072
8073 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8074
            self::checkForSupport();
8075
        }
8076
8077 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8078
            // "mb_" is available if overload is used, so use it ...
8079 81
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8080
        }
8081
8082
        return \strpos($haystack, $needle, $offset);
8083
    }
8084
8085
    /**
8086
     * Finds the last occurrence of a character in a string within another.
8087
     *
8088
     * @see http://php.net/manual/en/function.mb-strrchr.php
8089
     *
8090
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8091
     * @param string $needle        <p>The string to find in haystack</p>
8092
     * @param bool   $before_needle [optional] <p>
8093
     *                              Determines which portion of haystack
8094
     *                              this function returns.
8095
     *                              If set to true, it returns all of haystack
8096
     *                              from the beginning to the last occurrence of needle.
8097
     *                              If set to false, it returns all of haystack
8098
     *                              from the last occurrence of needle to the end,
8099
     *                              </p>
8100
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8101
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8102
     *
8103
     * @return false|string the portion of haystack or false if needle is not found
8104
     */
8105
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8106
    {
8107 4
        if ($haystack === '' || $needle === '') {
8108 2
            return false;
8109
        }
8110
8111 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8112 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8113
        }
8114
8115 4
        if ($cleanUtf8 === true) {
8116
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8117
            // if invalid characters are found in $haystack before $needle
8118 2
            $needle = self::clean($needle);
8119 2
            $haystack = self::clean($haystack);
8120
        }
8121
8122 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8123
            self::checkForSupport();
8124
        }
8125
8126
        if (
8127 4
            $encoding !== 'UTF-8'
8128
            &&
8129 4
            self::$SUPPORT['mbstring'] === false
8130
        ) {
8131
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8132
        }
8133
8134 4
        if (self::$SUPPORT['mbstring'] === true) {
8135 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8136
        }
8137
8138
        //
8139
        // fallback for binary || ascii only
8140
        //
8141
8142
        if (
8143
            $before_needle === false
8144
            &&
8145
            (
8146
                $encoding === 'CP850'
8147
                ||
8148
                $encoding === 'ASCII'
8149
            )
8150
        ) {
8151
            return \strrchr($haystack, $needle);
8152
        }
8153
8154
        //
8155
        // fallback via iconv
8156
        //
8157
8158
        if (self::$SUPPORT['iconv'] === true) {
8159
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8160
            if ($needleTmp === false) {
8161
                return false;
8162
            }
8163
            $needle = (string) $needleTmp;
8164
8165
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8166
            if ($pos === false) {
8167
                return false;
8168
            }
8169
8170
            if ($before_needle) {
8171
                return self::substr($haystack, 0, $pos, $encoding);
8172
            }
8173
8174
            return self::substr($haystack, $pos, null, $encoding);
8175
        }
8176
8177
        //
8178
        // fallback via vanilla php
8179
        //
8180
8181
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8182
        if ($needleTmp === false) {
8183
            return false;
8184
        }
8185
        $needle = (string) $needleTmp;
8186
8187
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
8188
        if ($pos === false) {
8189
            return false;
8190
        }
8191
8192
        if ($before_needle) {
8193
            return self::substr($haystack, 0, $pos, $encoding);
8194
        }
8195
8196
        return self::substr($haystack, $pos, null, $encoding);
8197
    }
8198
8199
    /**
8200
     * Reverses characters order in the string.
8201
     *
8202
     * @param string $str <p>The input string.</p>
8203
     *
8204
     * @return string the string with characters in the reverse sequence
8205
     */
8206
    public static function strrev(string $str): string
8207
    {
8208 10
        if ($str === '') {
8209 4
            return '';
8210
        }
8211
8212 8
        $reversed = '';
8213 8
        $i = (int) self::strlen($str);
8214 8
        while ($i--) {
8215 8
            $reversedTmp = self::substr($str, $i, 1);
8216 8
            if ($reversedTmp !== false) {
8217 8
                $reversed .= $reversedTmp;
8218
            }
8219
        }
8220
8221 8
        return $reversed;
8222
    }
8223
8224
    /**
8225
     * Finds the last occurrence of a character in a string within another, case insensitive.
8226
     *
8227
     * @see http://php.net/manual/en/function.mb-strrichr.php
8228
     *
8229
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8230
     * @param string $needle        <p>The string to find in haystack.</p>
8231
     * @param bool   $before_needle [optional] <p>
8232
     *                              Determines which portion of haystack
8233
     *                              this function returns.
8234
     *                              If set to true, it returns all of haystack
8235
     *                              from the beginning to the last occurrence of needle.
8236
     *                              If set to false, it returns all of haystack
8237
     *                              from the last occurrence of needle to the end,
8238
     *                              </p>
8239
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8240
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8241
     *
8242
     * @return false|string the portion of haystack or<br>false if needle is not found
8243
     */
8244
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8245
    {
8246 3
        if ($haystack === '' || $needle === '') {
8247 2
            return false;
8248
        }
8249
8250 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8251 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8252
        }
8253
8254 3
        if ($cleanUtf8 === true) {
8255
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8256
            // if invalid characters are found in $haystack before $needle
8257 2
            $needle = self::clean($needle);
8258 2
            $haystack = self::clean($haystack);
8259
        }
8260
8261 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8262
            self::checkForSupport();
8263
        }
8264
8265
        //
8266
        // fallback via mbstring
8267
        //
8268
8269 3
        if (self::$SUPPORT['mbstring'] === true) {
8270 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8271
        }
8272
8273
        //
8274
        // fallback via vanilla php
8275
        //
8276
8277
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8278
        if ($needleTmp === false) {
8279
            return false;
8280
        }
8281
        $needle = (string) $needleTmp;
8282
8283
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8284
        if ($pos === false) {
8285
            return false;
8286
        }
8287
8288
        if ($before_needle) {
8289
            return self::substr($haystack, 0, $pos, $encoding);
8290
        }
8291
8292
        return self::substr($haystack, $pos, null, $encoding);
8293
    }
8294
8295
    /**
8296
     * Find position of last occurrence of a case-insensitive string.
8297
     *
8298
     * @param string     $haystack  <p>The string to look in.</p>
8299
     * @param int|string $needle    <p>The string to look for.</p>
8300
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8301
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8302
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8303
     *
8304
     * @return false|int
8305
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8306
     *                   string.<br>If needle is not found, it returns false.
8307
     */
8308
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8309
    {
8310 4
        if ($haystack === '') {
8311
            return false;
8312
        }
8313
8314
        // iconv and mbstring do not support integer $needle
8315 4
        if ((int) $needle === $needle && $needle >= 0) {
8316
            $needle = (string) self::chr($needle);
8317
        }
8318 4
        $needle = (string) $needle;
8319
8320 4
        if ($needle === '') {
8321
            return false;
8322
        }
8323
8324 4
        if ($cleanUtf8 === true) {
8325
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8326 2
            $needle = self::clean($needle);
8327 2
            $haystack = self::clean($haystack);
8328
        }
8329
8330 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8331 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8332
        }
8333
8334
        //
8335
        // fallback for binary || ascii only
8336
        //
8337
8338
        if (
8339 4
            $encoding === 'CP850'
8340
            ||
8341 4
            $encoding === 'ASCII'
8342
        ) {
8343
            return self::strripos_in_byte($haystack, $needle, $offset);
8344
        }
8345
8346 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8347
            self::checkForSupport();
8348
        }
8349
8350
        if (
8351 4
            $encoding !== 'UTF-8'
8352
            &&
8353 4
            self::$SUPPORT['mbstring'] === false
8354
        ) {
8355
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8356
        }
8357
8358
        //
8359
        // fallback via mbstrig
8360
        //
8361
8362 4
        if (self::$SUPPORT['mbstring'] === true) {
8363 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8364
        }
8365
8366
        //
8367
        // fallback via intl
8368
        //
8369
8370
        if (
8371
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8372
            &&
8373
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8374
            &&
8375
            self::$SUPPORT['intl'] === true
8376
        ) {
8377
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8378
            if ($returnTmp !== false) {
8379
                return $returnTmp;
8380
            }
8381
        }
8382
8383
        //
8384
        // fallback for ascii only
8385
        //
8386
8387
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8388
            return self::strripos_in_byte($haystack, $needle, $offset);
8389
        }
8390
8391
        //
8392
        // fallback via vanilla php
8393
        //
8394
8395
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8396
        $needle = self::strtocasefold($needle, true, false, $encoding);
8397
8398
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8399
    }
8400
8401
    /**
8402
     * Finds position of last occurrence of a string within another, case insensitive.
8403
     *
8404
     * @param string $haystack <p>
8405
     *                         The string from which to get the position of the last occurrence
8406
     *                         of needle.
8407
     *                         </p>
8408
     * @param string $needle   <p>
8409
     *                         The string to find in haystack.
8410
     *                         </p>
8411
     * @param int    $offset   [optional] <p>
8412
     *                         The position in haystack
8413
     *                         to start searching.
8414
     *                         </p>
8415
     *
8416
     * @return false|int return the numeric position of the last occurrence of needle in the
8417
     *                   haystack string, or false if needle is not found
8418
     */
8419
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8420
    {
8421
        if ($haystack === '' || $needle === '') {
8422
            return false;
8423
        }
8424
8425
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8426
            self::checkForSupport();
8427
        }
8428
8429
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8430
            // "mb_" is available if overload is used, so use it ...
8431
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8432
        }
8433
8434
        return \strripos($haystack, $needle, $offset);
8435
    }
8436
8437
    /**
8438
     * Find position of last occurrence of a string in a string.
8439
     *
8440
     * @see http://php.net/manual/en/function.mb-strrpos.php
8441
     *
8442
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8443
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8444
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8445
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8446
     *                              the end of the string.
8447
     *                              </p>
8448
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8449
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8450
     *
8451
     * @return false|int
8452
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8453
     *                   string.<br>If needle is not found, it returns false.
8454
     */
8455
    public static function strrpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8456
    {
8457 50
        if ($haystack === '') {
8458 3
            return false;
8459
        }
8460
8461
        // iconv and mbstring do not support integer $needle
8462 49
        if ((int) $needle === $needle && $needle >= 0) {
8463 1
            $needle = (string) self::chr($needle);
8464
        }
8465 49
        $needle = (string) $needle;
8466
8467 49
        if ($needle === '') {
8468 2
            return false;
8469
        }
8470
8471 49
        if ($cleanUtf8 === true) {
8472
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8473 4
            $needle = self::clean($needle);
8474 4
            $haystack = self::clean($haystack);
8475
        }
8476
8477 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8478 20
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8479
        }
8480
8481
        //
8482
        // fallback for binary || ascii only
8483
        //
8484
8485
        if (
8486 49
            $encoding === 'CP850'
8487
            ||
8488 49
            $encoding === 'ASCII'
8489
        ) {
8490 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
8491
        }
8492
8493 49
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8494
            self::checkForSupport();
8495
        }
8496
8497
        if (
8498 49
            $encoding !== 'UTF-8'
8499
            &&
8500 49
            self::$SUPPORT['mbstring'] === false
8501
        ) {
8502
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8503
        }
8504
8505
        //
8506
        // fallback via mbstring
8507
        //
8508
8509 49
        if (self::$SUPPORT['mbstring'] === true) {
8510 49
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8511
        }
8512
8513
        //
8514
        // fallback via intl
8515
        //
8516
8517
        if (
8518
            $offset !== null
8519
            &&
8520
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8521
            &&
8522
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8523
            &&
8524
            self::$SUPPORT['intl'] === true
8525
        ) {
8526
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8527
            if ($returnTmp !== false) {
8528
                return $returnTmp;
8529
            }
8530
        }
8531
8532
        //
8533
        // fallback for ascii only
8534
        //
8535
8536
        if (
8537
            $offset !== null
8538
            &&
8539
            self::is_ascii($haystack)
8540
            &&
8541
            self::is_ascii($needle)
8542
        ) {
8543
            return self::strrpos_in_byte($haystack, $needle, $offset);
8544
        }
8545
8546
        //
8547
        // fallback via vanilla php
8548
        //
8549
8550
        $haystackTmp = null;
8551
        if ($offset > 0) {
8552
            $haystackTmp = self::substr($haystack, $offset);
8553
        } elseif ($offset < 0) {
8554
            $haystackTmp = self::substr($haystack, 0, $offset);
8555
            $offset = 0;
8556
        }
8557
8558
        if ($haystackTmp !== null) {
8559
            if ($haystackTmp === false) {
8560
                $haystackTmp = '';
8561
            }
8562
            $haystack = (string) $haystackTmp;
8563
        }
8564
8565
        $pos = self::strrpos_in_byte($haystack, $needle);
8566
        if ($pos === false) {
8567
            return false;
8568
        }
8569
8570
        $strTmp = self::substr_in_byte($haystack, 0, $pos);
8571
        if ($strTmp === false) {
0 ignored issues
show
introduced by
The condition $strTmp === false is always false.
Loading history...
8572
            return false;
8573
        }
8574
8575
        return $offset + (int) self::strlen($strTmp);
8576
    }
8577
8578
    /**
8579
     * Find position of last occurrence of a string in a string.
8580
     *
8581
     * @param string $haystack <p>
8582
     *                         The string being checked, for the last occurrence
8583
     *                         of needle.
8584
     *                         </p>
8585
     * @param string $needle   <p>
8586
     *                         The string to find in haystack.
8587
     *                         </p>
8588
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8589
     *                         the string. Negative values will stop searching at an arbitrary point
8590
     *                         prior to the end of the string.
8591
     *
8592
     * @return false|int The numeric position of the last occurrence of needle in the
8593
     *                   haystack string. If needle is not found, it returns false.
8594
     */
8595
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8596
    {
8597 2
        if ($haystack === '' || $needle === '') {
8598
            return false;
8599
        }
8600
8601 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8602
            self::checkForSupport();
8603
        }
8604
8605 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8606
            // "mb_" is available if overload is used, so use it ...
8607 2
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8608
        }
8609
8610
        return \strrpos($haystack, $needle, $offset);
8611
    }
8612
8613
    /**
8614
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8615
     * mask.
8616
     *
8617
     * @param string $str    <p>The input string.</p>
8618
     * @param string $mask   <p>The mask of chars</p>
8619
     * @param int    $offset [optional]
8620
     * @param int    $length [optional]
8621
     *
8622
     * @return false|int
8623
     */
8624
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null)
8625
    {
8626 10
        if ($offset || $length !== null) {
8627 2
            $str = (string) self::substr($str, $offset, $length);
8628
        }
8629
8630 10
        if ($str === '' || $mask === '') {
8631 2
            return 0;
8632
        }
8633
8634 8
        $matches = [];
8635
8636 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0]) : 0;
8637
    }
8638
8639
    /**
8640
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8641
     *
8642
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8643
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8644
     * @param bool   $before_needle [optional] <p>
8645
     *                              If <b>TRUE</b>, strstr() returns the part of the
8646
     *                              haystack before the first occurrence of the needle (excluding the needle).
8647
     *                              </p>
8648
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8649
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8650
     *
8651
     * @return false|string
8652
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
8653
     */
8654
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8655
    {
8656 5
        if ($haystack === '' || $needle === '') {
8657 2
            return false;
8658
        }
8659
8660 5
        if ($cleanUtf8 === true) {
8661
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8662
            // if invalid characters are found in $haystack before $needle
8663
            $needle = self::clean($needle);
8664
            $haystack = self::clean($haystack);
8665
        }
8666
8667 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8668 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8669
        }
8670
8671
        //
8672
        // fallback for binary || ascii only
8673
        //
8674
8675
        if (
8676 5
            $encoding === 'CP850'
8677
            ||
8678 5
            $encoding === 'ASCII'
8679
        ) {
8680
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8681
        }
8682
8683 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8684
            self::checkForSupport();
8685
        }
8686
8687
        if (
8688 5
            $encoding !== 'UTF-8'
8689
            &&
8690 5
            self::$SUPPORT['mbstring'] === false
8691
        ) {
8692
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8693
        }
8694
8695
        //
8696
        // fallback via mbstring
8697
        //
8698
8699 5
        if (self::$SUPPORT['mbstring'] === true) {
8700 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8701
        }
8702
8703
        //
8704
        // fallback via intl
8705
        //
8706
8707
        if (
8708
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8709
            &&
8710
            self::$SUPPORT['intl'] === true
8711
        ) {
8712
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8713
            if ($returnTmp !== false) {
8714
                return $returnTmp;
8715
            }
8716
        }
8717
8718
        //
8719
        // fallback for ascii only
8720
        //
8721
8722
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8723
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8724
        }
8725
8726
        //
8727
        // fallback via vanilla php
8728
        //
8729
8730
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8731
8732
        if (!isset($match[1])) {
8733
            return false;
8734
        }
8735
8736
        if ($before_needle) {
8737
            return $match[1];
8738
        }
8739
8740
        return self::substr($haystack, (int) self::strlen($match[1]));
8741
    }
8742
8743
    /**
8744
     *  * Finds first occurrence of a string within another.
8745
     *
8746
     * @param string $haystack      <p>
8747
     *                              The string from which to get the first occurrence
8748
     *                              of needle.
8749
     *                              </p>
8750
     * @param string $needle        <p>
8751
     *                              The string to find in haystack.
8752
     *                              </p>
8753
     * @param bool   $before_needle [optional] <p>
8754
     *                              Determines which portion of haystack
8755
     *                              this function returns.
8756
     *                              If set to true, it returns all of haystack
8757
     *                              from the beginning to the first occurrence of needle.
8758
     *                              If set to false, it returns all of haystack
8759
     *                              from the first occurrence of needle to the end,
8760
     *                              </p>
8761
     *
8762
     * @return false|string the portion of haystack,
8763
     *                      or false if needle is not found
8764
     */
8765
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8766
    {
8767
        if ($haystack === '' || $needle === '') {
8768
            return false;
8769
        }
8770
8771
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8772
            self::checkForSupport();
8773
        }
8774
8775
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8776
            // "mb_" is available if overload is used, so use it ...
8777
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8778
        }
8779
8780
        return \strstr($haystack, $needle, $before_needle);
8781
    }
8782
8783
    /**
8784
     * Unicode transformation for case-less matching.
8785
     *
8786
     * @see http://unicode.org/reports/tr21/tr21-5.html
8787
     *
8788
     * @param string      $str       <p>The input string.</p>
8789
     * @param bool        $full      [optional] <p>
8790
     *                               <b>true</b>, replace full case folding chars (default)<br>
8791
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8792
     *                               </p>
8793
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8794
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8795
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8796
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8797
     *                               is for some languages better ...</p>
8798
     *
8799
     * @return string
8800
     */
8801
    public static function strtocasefold(
8802
        string $str,
8803
        bool $full = true,
8804
        bool $cleanUtf8 = false,
8805
        string $encoding = 'UTF-8',
8806
        string $lang = null,
8807
        $lower = true
8808
    ): string {
8809 53
        if ($str === '') {
8810 5
            return '';
8811
        }
8812
8813 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
8814
8815 52
        if ($lower === true) {
8816 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8817
        }
8818
8819 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8820
    }
8821
8822
    /**
8823
     * Make a string lowercase.
8824
     *
8825
     * @see http://php.net/manual/en/function.mb-strtolower.php
8826
     *
8827
     * @param string      $str                   <p>The string being lowercased.</p>
8828
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8829
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8830
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8831
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8832
     *
8833
     * @return string
8834
     *                <p>String with all alphabetic characters converted to lowercase.</p>
8835
     */
8836
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8837
    {
8838
        // init
8839 156
        $str = (string) $str;
8840
8841 156
        if ($str === '') {
8842 12
            return '';
8843
        }
8844
8845 154
        if ($cleanUtf8 === true) {
8846
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8847
            // if invalid characters are found in $haystack before $needle
8848 4
            $str = self::clean($str);
8849
        }
8850
8851 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8852 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8853
        }
8854
8855
        // hack for old php version or for the polyfill ...
8856 154
        if ($tryToKeepStringLength === true) {
8857
            $str = self::fixStrCaseHelper($str, true);
8858
        }
8859
8860 154
        if ($lang !== null) {
8861 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8862
                self::checkForSupport();
8863
            }
8864
8865 2
            if (self::$SUPPORT['intl'] === true) {
8866 2
                $langCode = $lang . '-Lower';
8867 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8868
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
8869
8870
                    $langCode = 'Any-Lower';
8871
                }
8872
8873
                /** @noinspection PhpComposerExtensionStubsInspection */
8874 2
                return \transliterator_transliterate($langCode, $str);
8875
            }
8876
8877
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
8878
        }
8879
8880
        // always fallback via symfony polyfill
8881 154
        return \mb_strtolower($str, $encoding);
8882
    }
8883
8884
    /**
8885
     * Make a string uppercase.
8886
     *
8887
     * @see http://php.net/manual/en/function.mb-strtoupper.php
8888
     *
8889
     * @param string      $str                   <p>The string being uppercased.</p>
8890
     * @param string      $encoding              [optional] <p>Set the charset.</p>
8891
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8892
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8893
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8894
     *
8895
     * @return string
8896
     *                <p>String with all alphabetic characters converted to uppercase.</p>
8897
     */
8898
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8899
    {
8900
        // init
8901 163
        $str = (string) $str;
8902
8903 163
        if ($str === '') {
8904 12
            return '';
8905
        }
8906
8907 161
        if ($cleanUtf8 === true) {
8908
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8909
            // if invalid characters are found in $haystack before $needle
8910 3
            $str = self::clean($str);
8911
        }
8912
8913 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8914 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8915
        }
8916
8917
        // hack for old php version or for the polyfill ...
8918 161
        if ($tryToKeepStringLength === true) {
8919 2
            $str = self::fixStrCaseHelper($str, false);
8920
        }
8921
8922 161
        if ($lang !== null) {
8923 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8924
                self::checkForSupport();
8925
            }
8926
8927 2
            if (self::$SUPPORT['intl'] === true) {
8928 2
                $langCode = $lang . '-Upper';
8929 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8930
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
8931
8932
                    $langCode = 'Any-Upper';
8933
                }
8934
8935
                /** @noinspection PhpComposerExtensionStubsInspection */
8936 2
                return \transliterator_transliterate($langCode, $str);
8937
            }
8938
8939
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
8940
        }
8941
8942
        // always fallback via symfony polyfill
8943 161
        return \mb_strtoupper($str, $encoding);
8944
    }
8945
8946
    /**
8947
     * Translate characters or replace sub-strings.
8948
     *
8949
     * @see  http://php.net/manual/en/function.strtr.php
8950
     *
8951
     * @param string          $str  <p>The string being translated.</p>
8952
     * @param string|string[] $from <p>The string replacing from.</p>
8953
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
8954
     *
8955
     * @return string
8956
     *                This function returns a copy of str, translating all occurrences of each character in from to the
8957
     *                corresponding character in to
8958
     */
8959
    public static function strtr(string $str, $from, $to = ''): string
8960
    {
8961 2
        if ($str === '') {
8962
            return '';
8963
        }
8964
8965 2
        if ($from === $to) {
8966
            return $str;
8967
        }
8968
8969 2
        if ($to !== '') {
8970 2
            $from = self::str_split($from);
8971 2
            $to = self::str_split($to);
8972 2
            $countFrom = \count($from);
8973 2
            $countTo = \count($to);
8974
8975 2
            if ($countFrom > $countTo) {
8976 2
                $from = \array_slice($from, 0, $countTo);
8977 2
            } elseif ($countFrom < $countTo) {
8978 2
                $to = \array_slice($to, 0, $countFrom);
8979
            }
8980
8981 2
            $from = \array_combine($from, $to);
8982 2
            if ($from === false) {
8983
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
8984
            }
8985
        }
8986
8987 2
        if (\is_string($from)) {
8988 2
            return \str_replace($from, '', $str);
8989
        }
8990
8991 2
        return \strtr($str, $from);
8992
    }
8993
8994
    /**
8995
     * Return the width of a string.
8996
     *
8997
     * @param string $str       <p>The input string.</p>
8998
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8999
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9000
     *
9001
     * @return int
9002
     */
9003
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9004
    {
9005 2
        if ($str === '') {
9006 2
            return 0;
9007
        }
9008
9009 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9010 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9011
        }
9012
9013 2
        if ($cleanUtf8 === true) {
9014
            // iconv and mbstring are not tolerant to invalid encoding
9015
            // further, their behaviour is inconsistent with that of PHP's substr
9016 2
            $str = self::clean($str);
9017
        }
9018
9019 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9020
            self::checkForSupport();
9021
        }
9022
9023
        //
9024
        // fallback via mbstring
9025
        //
9026
9027 2
        if (self::$SUPPORT['mbstring'] === true) {
9028 2
            return \mb_strwidth($str, $encoding);
9029
        }
9030
9031
        //
9032
        // fallback via vanilla php
9033
        //
9034
9035
        if ($encoding !== 'UTF-8') {
9036
            $str = self::encode('UTF-8', $str, false, $encoding);
9037
        }
9038
9039
        $wide = 0;
9040
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9041
9042
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
9043
    }
9044
9045
    /**
9046
     * Get part of a string.
9047
     *
9048
     * @see http://php.net/manual/en/function.mb-substr.php
9049
     *
9050
     * @param string $str       <p>The string being checked.</p>
9051
     * @param int    $offset    <p>The first position used in str.</p>
9052
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9053
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9054
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9055
     *
9056
     * @return false|string
9057
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9058
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9059
     *                      characters long, <b>FALSE</b> will be returned.
9060
     */
9061
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9062
    {
9063 420
        if ($str === '') {
9064 26
            return '';
9065
        }
9066
9067
        // Empty string
9068 415
        if ($length === 0) {
9069 20
            return '';
9070
        }
9071
9072 412
        if ($cleanUtf8 === true) {
9073
            // iconv and mbstring are not tolerant to invalid encoding
9074
            // further, their behaviour is inconsistent with that of PHP's substr
9075 2
            $str = self::clean($str);
9076
        }
9077
9078
        // Whole string
9079 412
        if (!$offset && $length === null) {
9080 38
            return $str;
9081
        }
9082
9083 383
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9084 172
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9085
        }
9086
9087 383
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9088
            self::checkForSupport();
9089
        }
9090
9091
        //
9092
        // fallback for binary || ascii only
9093
        //
9094
9095
        if (
9096 383
            $encoding === 'CP850'
9097
            ||
9098 383
            $encoding === 'ASCII'
9099
        ) {
9100 1
            return self::substr_in_byte($str, $offset, $length);
9101
        }
9102
9103
        //
9104
        // fallback via mbstring
9105
        //
9106
9107 382
        if (self::$SUPPORT['mbstring'] === true) {
9108 382
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9109 382
            if ($return !== false) {
9110 377
                return $return;
9111
            }
9112
        }
9113
9114
        // otherwise we need the string-length and can't fake it via "2147483647"
9115 37
        $str_length = 0;
9116 37
        if ($offset || $length === null) {
9117 37
            $str_length = self::strlen($str, $encoding);
9118
        }
9119
9120
        // e.g.: invalid chars + mbstring not installed
9121 37
        if ($str_length === false) {
9122
            return false;
9123
        }
9124
9125
        // Empty string
9126 37
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9127 29
            return '';
9128
        }
9129
9130
        // Impossible
9131 8
        if ($offset && $offset > $str_length) {
9132
            // "false" is the php native return type here,
9133
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9134 3
            return '';
9135
        }
9136
9137 5
        if ($length === null) {
9138
            $length = (int) $str_length;
9139
        } else {
9140 5
            $length = (int) $length;
9141
        }
9142
9143
        if (
9144 5
            $encoding !== 'UTF-8'
9145
            &&
9146 5
            self::$SUPPORT['mbstring'] === false
9147
        ) {
9148
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9149
        }
9150
9151
        //
9152
        // fallback via intl
9153
        //
9154
9155
        if (
9156 5
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9157
            &&
9158 5
            $offset >= 0 // grapheme_substr() can't handle negative offset
9159
            &&
9160 5
            self::$SUPPORT['intl'] === true
9161
        ) {
9162 5
            $returnTmp = \grapheme_substr($str, $offset, $length);
9163 5
            if ($returnTmp !== false) {
9164
                return $returnTmp;
9165
            }
9166
        }
9167
9168
        //
9169
        // fallback via iconv
9170
        //
9171
9172
        if (
9173 5
            $length >= 0 // "iconv_substr()" can't handle negative length
9174
            &&
9175 5
            self::$SUPPORT['iconv'] === true
9176
        ) {
9177 5
            $returnTmp = \iconv_substr($str, $offset, $length);
9178 5
            if ($returnTmp !== false) {
9179 5
                return $returnTmp;
9180
            }
9181
        }
9182
9183
        //
9184
        // fallback for ascii only
9185
        //
9186
9187
        if (self::is_ascii($str)) {
9188
            return \substr($str, $offset, $length);
9189
        }
9190
9191
        //
9192
        // fallback via vanilla php
9193
        //
9194
9195
        // split to array, and remove invalid characters
9196
        $array = self::split($str);
9197
9198
        // extract relevant part, and join to make sting again
9199
        return \implode('', \array_slice($array, $offset, $length));
9200
    }
9201
9202
    /**
9203
     * Binary safe comparison of two strings from an offset, up to length characters.
9204
     *
9205
     * @param string   $str1               <p>The main string being compared.</p>
9206
     * @param string   $str2               <p>The secondary string being compared.</p>
9207
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9208
     *                                     counting from the end of the string.</p>
9209
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9210
     *                                     of the length of the str compared to the length of main_str less the
9211
     *                                     offset.</p>
9212
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9213
     *                                     insensitive.</p>
9214
     *
9215
     * @return int
9216
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9217
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9218
     *             <strong>0</strong> if they are equal
9219
     */
9220
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9221
    {
9222
        if (
9223 2
            $offset !== 0
9224
            ||
9225 2
            $length !== null
9226
        ) {
9227 2
            $str1 = (string) self::substr($str1, $offset, $length);
9228 2
            $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1));
9229
        }
9230
9231 2
        if ($case_insensitivity === true) {
9232 2
            return self::strcasecmp($str1, $str2);
9233
        }
9234
9235 2
        return self::strcmp($str1, $str2);
9236
    }
9237
9238
    /**
9239
     * Count the number of substring occurrences.
9240
     *
9241
     * @see  http://php.net/manual/en/function.substr-count.php
9242
     *
9243
     * @param string $haystack  <p>The string to search in.</p>
9244
     * @param string $needle    <p>The substring to search for.</p>
9245
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
9246
     * @param int    $length    [optional] <p>
9247
     *                          The maximum length after the specified offset to search for the
9248
     *                          substring. It outputs a warning if the offset plus the length is
9249
     *                          greater than the haystack length.
9250
     *                          </p>
9251
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9252
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9253
     *
9254
     * @return false|int this functions returns an integer or false if there isn't a string
9255
     */
9256
    public static function substr_count(
9257
        string $haystack,
9258
        string $needle,
9259
        int $offset = 0,
9260
        int $length = null,
9261
        string $encoding = 'UTF-8',
9262
        bool $cleanUtf8 = false
9263
    ) {
9264 18
        if ($haystack === '' || $needle === '') {
9265 2
            return false;
9266
        }
9267
9268 18
        if ($offset || $length !== null) {
9269 2
            if ($length === null) {
9270 2
                $lengthTmp = self::strlen($haystack);
9271 2
                if ($lengthTmp === false) {
9272
                    return false;
9273
                }
9274 2
                $length = (int) $lengthTmp;
9275
            }
9276
9277
            if (
9278
                (
9279 2
                    $length !== 0
9280
                    &&
9281 2
                    $offset !== 0
9282
                )
9283
                &&
9284 2
                ($length + $offset) <= 0
9285
                &&
9286 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9287
            ) {
9288 2
                return false;
9289
            }
9290
9291 2
            $haystack = (string) self::substr($haystack, $offset, $length, $encoding);
9292
        }
9293
9294 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9295 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9296
        }
9297
9298 18
        if ($cleanUtf8 === true) {
9299
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9300
            // if invalid characters are found in $haystack before $needle
9301
            $needle = self::clean($needle);
9302
            $haystack = self::clean($haystack);
9303
        }
9304
9305 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9306
            self::checkForSupport();
9307
        }
9308
9309
        if (
9310 18
            $encoding !== 'UTF-8'
9311
            &&
9312 18
            self::$SUPPORT['mbstring'] === false
9313
        ) {
9314
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9315
        }
9316
9317 18
        if (self::$SUPPORT['mbstring'] === true) {
9318 18
            return \mb_substr_count($haystack, $needle, $encoding);
9319
        }
9320
9321
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9322
9323
        return \count($matches);
9324
    }
9325
9326
    /**
9327
     * Count the number of substring occurrences.
9328
     *
9329
     * @param string $haystack <p>
9330
     *                         The string being checked.
9331
     *                         </p>
9332
     * @param string $needle   <p>
9333
     *                         The string being found.
9334
     *                         </p>
9335
     * @param int    $offset   [optional] <p>
9336
     *                         The offset where to start counting
9337
     *                         </p>
9338
     * @param int    $length   [optional] <p>
9339
     *                         The maximum length after the specified offset to search for the
9340
     *                         substring. It outputs a warning if the offset plus the length is
9341
     *                         greater than the haystack length.
9342
     *                         </p>
9343
     *
9344
     * @return false|int the number of times the
9345
     *                   needle substring occurs in the
9346
     *                   haystack string
9347
     */
9348
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9349
    {
9350 36
        if ($haystack === '' || $needle === '') {
9351
            return 0;
9352
        }
9353
9354 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9355
            self::checkForSupport();
9356
        }
9357
9358
        if (
9359 36
            ($offset || $length !== null)
9360
            &&
9361 36
            self::$SUPPORT['mbstring_func_overload'] === true
9362
        ) {
9363 36
            if ($length === null) {
9364
                $lengthTmp = self::strlen($haystack);
9365
                if ($lengthTmp === false) {
9366
                    return false;
9367
                }
9368
                $length = (int) $lengthTmp;
9369
            }
9370
9371
            if (
9372
                (
9373 36
                    $length !== 0
9374
                    &&
9375 36
                    $offset !== 0
9376
                )
9377
                &&
9378 36
                ($length + $offset) <= 0
9379
                &&
9380 36
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9381
            ) {
9382
                return false;
9383
            }
9384
9385 36
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9386 36
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9387
                $haystackTmp = '';
9388
            }
9389 36
            $haystack = (string) $haystackTmp;
9390
        }
9391
9392 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9393
            // "mb_" is available if overload is used, so use it ...
9394 36
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9395
        }
9396
9397
        if ($length === null) {
9398
            return \substr_count($haystack, $needle, $offset);
9399
        }
9400
9401
        return \substr_count($haystack, $needle, $offset, $length);
9402
    }
9403
9404
    /**
9405
     * Returns the number of occurrences of $substring in the given string.
9406
     * By default, the comparison is case-sensitive, but can be made insensitive
9407
     * by setting $caseSensitive to false.
9408
     *
9409
     * @param string $str           <p>The input string.</p>
9410
     * @param string $substring     <p>The substring to search for.</p>
9411
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9412
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9413
     *
9414
     * @return int
9415
     */
9416
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9417
    {
9418 15
        if ($str === '' || $substring === '') {
9419 2
            return 0;
9420
        }
9421
9422
        // only a fallback to prevent BC in the api ...
9423
        /** @psalm-suppress RedundantConditionGivenDocblockType */
9424 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9425 4
            $encoding = (string) $caseSensitive;
9426
        }
9427
9428 13
        if (!$caseSensitive) {
9429 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9430 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9431
        }
9432
9433 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9434
    }
9435
9436
    /**
9437
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9438
     *
9439
     * @param string $haystack <p>The string to search in.</p>
9440
     * @param string $needle   <p>The substring to search for.</p>
9441
     *
9442
     * @return string return the sub-string
9443
     */
9444
    public static function substr_ileft(string $haystack, string $needle): string
9445
    {
9446 2
        if ($haystack === '') {
9447 2
            return '';
9448
        }
9449
9450 2
        if ($needle === '') {
9451 2
            return $haystack;
9452
        }
9453
9454 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9455 2
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9456
        }
9457
9458 2
        return $haystack;
9459
    }
9460
9461
    /**
9462
     * Get part of a string process in bytes.
9463
     *
9464
     * @param string $str    <p>The string being checked.</p>
9465
     * @param int    $offset <p>The first position used in str.</p>
9466
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9467
     *
9468
     * @return false|string
9469
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9470
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9471
     *                      characters long, <b>FALSE</b> will be returned.
9472
     */
9473
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9474
    {
9475 51
        if ($str === '') {
9476
            return '';
9477
        }
9478
9479
        // Empty string
9480 51
        if ($length === 0) {
9481
            return '';
9482
        }
9483
9484
        // Whole string
9485 51
        if (!$offset && $length === null) {
9486
            return $str;
9487
        }
9488
9489 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9490
            self::checkForSupport();
9491
        }
9492
9493 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9494
            // "mb_" is available if overload is used, so use it ...
9495 51
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9496
        }
9497
9498
        return \substr($str, $offset, $length ?? 2147483647);
9499
    }
9500
9501
    /**
9502
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9503
     *
9504
     * @param string $haystack <p>The string to search in.</p>
9505
     * @param string $needle   <p>The substring to search for.</p>
9506
     *
9507
     * @return string return the sub-string
9508
     */
9509
    public static function substr_iright(string $haystack, string $needle): string
9510
    {
9511 2
        if ($haystack === '') {
9512 2
            return '';
9513
        }
9514
9515 2
        if ($needle === '') {
9516 2
            return $haystack;
9517
        }
9518
9519 2
        if (self::str_iends_with($haystack, $needle) === true) {
9520 2
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9521
        }
9522
9523 2
        return $haystack;
9524
    }
9525
9526
    /**
9527
     * Removes an prefix ($needle) from start of the string ($haystack).
9528
     *
9529
     * @param string $haystack <p>The string to search in.</p>
9530
     * @param string $needle   <p>The substring to search for.</p>
9531
     *
9532
     * @return string return the sub-string
9533
     */
9534
    public static function substr_left(string $haystack, string $needle): string
9535
    {
9536 2
        if ($haystack === '') {
9537 2
            return '';
9538
        }
9539
9540 2
        if ($needle === '') {
9541 2
            return $haystack;
9542
        }
9543
9544 2
        if (self::str_starts_with($haystack, $needle) === true) {
9545 2
            $haystack = (string) self::substr($haystack, (int) self::strlen($needle));
9546
        }
9547
9548 2
        return $haystack;
9549
    }
9550
9551
    /**
9552
     * Replace text within a portion of a string.
9553
     *
9554
     * source: https://gist.github.com/stemar/8287074
9555
     *
9556
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
9557
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
9558
     * @param int|int[]       $offset      <p>
9559
     *                                     If start is positive, the replacing will begin at the start'th offset
9560
     *                                     into string.
9561
     *                                     <br><br>
9562
     *                                     If start is negative, the replacing will begin at the start'th character
9563
     *                                     from the end of string.
9564
     *                                     </p>
9565
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
9566
     *                                     portion of string which is to be replaced. If it is negative, it
9567
     *                                     represents the number of characters from the end of string at which to
9568
     *                                     stop replacing. If it is not given, then it will default to strlen(
9569
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
9570
     *                                     length is zero then this function will have the effect of inserting
9571
     *                                     replacement into string at the given start offset.</p>
9572
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
9573
     *
9574
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9575
     */
9576
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9577
    {
9578 10
        if (\is_array($str) === true) {
9579 1
            $num = \count($str);
9580
9581
            // the replacement
9582 1
            if (\is_array($replacement) === true) {
9583 1
                $replacement = \array_slice($replacement, 0, $num);
9584
            } else {
9585 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9586
            }
9587
9588
            // the offset
9589 1
            if (\is_array($offset) === true) {
9590 1
                $offset = \array_slice($offset, 0, $num);
9591 1
                foreach ($offset as &$valueTmp) {
9592 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9593
                }
9594 1
                unset($valueTmp);
9595
            } else {
9596 1
                $offset = \array_pad([$offset], $num, $offset);
9597
            }
9598
9599
            // the length
9600 1
            if ($length === null) {
9601 1
                $length = \array_fill(0, $num, 0);
9602 1
            } elseif (\is_array($length) === true) {
9603 1
                $length = \array_slice($length, 0, $num);
9604 1
                foreach ($length as &$valueTmpV2) {
9605 1
                    if ($valueTmpV2 !== null) {
9606 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9607
                    } else {
9608 1
                        $valueTmpV2 = 0;
9609
                    }
9610
                }
9611 1
                unset($valueTmpV2);
9612
            } else {
9613 1
                $length = \array_pad([$length], $num, $length);
9614
            }
9615
9616
            // recursive call
9617 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9618
        }
9619
9620 10
        if (\is_array($replacement) === true) {
9621 1
            if (\count($replacement) > 0) {
9622 1
                $replacement = $replacement[0];
9623
            } else {
9624 1
                $replacement = '';
9625
            }
9626
        }
9627
9628
        // init
9629 10
        $str = (string) $str;
9630 10
        $replacement = (string) $replacement;
9631
9632 10
        if (\is_array($length) === true) {
9633
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
9634
        }
9635
9636 10
        if (\is_array($offset) === true) {
9637
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
9638
        }
9639
9640 10
        if ($str === '') {
9641 1
            return $replacement;
9642
        }
9643
9644 9
        if (self::is_ascii($str)) {
9645 6
            return ($length === null) ?
9646
                \substr_replace($str, $replacement, $offset) :
9647 6
                \substr_replace($str, $replacement, $offset, $length);
9648
        }
9649
9650 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9651
            self::checkForSupport();
9652
        }
9653
9654 8
        if (self::$SUPPORT['mbstring'] === true) {
9655 8
            $string_length = (int) self::strlen($str, $encoding);
9656
9657 8
            if ($offset < 0) {
9658 1
                $offset = (int) \max(0, $string_length + $offset);
9659 8
            } elseif ($offset > $string_length) {
9660
                $offset = $string_length;
9661
            }
9662
9663 8
            if ($length !== null && $length < 0) {
9664 1
                $length = (int) \max(0, $string_length - $offset + $length);
9665 8
            } elseif ($length === null || $length > $string_length) {
9666 3
                $length = $string_length;
9667
            }
9668
9669
            /** @noinspection AdditionOperationOnArraysInspection */
9670 8
            if (($offset + $length) > $string_length) {
9671 3
                $length = $string_length - $offset;
9672
            }
9673
9674
            /** @noinspection AdditionOperationOnArraysInspection */
9675 8
            return (string) self::substr($str, 0, $offset, $encoding) .
9676 8
                   $replacement .
9677 8
                   (string) self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
9678
        }
9679
9680
        \preg_match_all('/./us', $str, $smatches);
9681
        \preg_match_all('/./us', $replacement, $rmatches);
9682
9683
        if ($length === null) {
9684
            $lengthTmp = self::strlen($str, $encoding);
9685
            if ($lengthTmp === false) {
9686
                // e.g.: non mbstring support + invalid chars
9687
                return '';
9688
            }
9689
            $length = (int) $lengthTmp;
9690
        }
9691
9692
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
9693
9694
        return \implode('', $smatches[0]);
9695
    }
9696
9697
    /**
9698
     * Removes an suffix ($needle) from end of the string ($haystack).
9699
     *
9700
     * @param string $haystack <p>The string to search in.</p>
9701
     * @param string $needle   <p>The substring to search for.</p>
9702
     *
9703
     * @return string return the sub-string
9704
     */
9705
    public static function substr_right(string $haystack, string $needle): string
9706
    {
9707 2
        if ($haystack === '') {
9708 2
            return '';
9709
        }
9710
9711 2
        if ($needle === '') {
9712 2
            return $haystack;
9713
        }
9714
9715 2
        if (self::str_ends_with($haystack, $needle) === true) {
9716 2
            $haystack = (string) self::substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
9717
        }
9718
9719 2
        return $haystack;
9720
    }
9721
9722
    /**
9723
     * Returns a case swapped version of the string.
9724
     *
9725
     * @param string $str       <p>The input string.</p>
9726
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9727
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9728
     *
9729
     * @return string each character's case swapped
9730
     */
9731
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9732
    {
9733 6
        if ($str === '') {
9734 1
            return '';
9735
        }
9736
9737 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9738 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9739
        }
9740
9741 6
        if ($cleanUtf8 === true) {
9742
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9743
            // if invalid characters are found in $haystack before $needle
9744 2
            $str = self::clean($str);
9745
        }
9746
9747 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9748
    }
9749
9750
    /**
9751
     * Checks whether symfony-polyfills are used.
9752
     *
9753
     * @return bool
9754
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
9755
     */
9756
    public static function symfony_polyfill_used(): bool
9757
    {
9758
        // init
9759
        $return = false;
9760
9761
        $returnTmp = \extension_loaded('mbstring');
9762
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9763
            $return = true;
9764
        }
9765
9766
        $returnTmp = \extension_loaded('iconv');
9767
        if ($returnTmp === false && \function_exists('iconv')) {
9768
            $return = true;
9769
        }
9770
9771
        return $return;
9772
    }
9773
9774
    /**
9775
     * @param string $str
9776
     * @param int    $tabLength
9777
     *
9778
     * @return string
9779
     */
9780
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9781
    {
9782 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9783
    }
9784
9785
    /**
9786
     * Converts the first character of each word in the string to uppercase
9787
     * and all other chars to lowercase.
9788
     *
9789
     * @param string      $str                   <p>The input string.</p>
9790
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9791
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9792
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9793
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9794
     *
9795
     * @return string string with all characters of $str being title-cased
9796
     */
9797
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9798
    {
9799 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9800 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9801
        }
9802
9803 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9804
    }
9805
9806
    /**
9807
     * alias for "UTF8::to_ascii()"
9808
     *
9809
     * @see        UTF8::to_ascii()
9810
     *
9811
     * @param string $str
9812
     * @param string $subst_chr
9813
     * @param bool   $strict
9814
     *
9815
     * @return string
9816
     *
9817
     * @deprecated <p>use "UTF8::to_ascii()"</p>
9818
     */
9819
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9820
    {
9821 7
        return self::to_ascii($str, $subst_chr, $strict);
9822
    }
9823
9824
    /**
9825
     * alias for "UTF8::to_iso8859()"
9826
     *
9827
     * @see        UTF8::to_iso8859()
9828
     *
9829
     * @param string|string[] $str
9830
     *
9831
     * @return string|string[]
9832
     *
9833
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
9834
     */
9835
    public static function toIso8859($str)
9836
    {
9837 2
        return self::to_iso8859($str);
9838
    }
9839
9840
    /**
9841
     * alias for "UTF8::to_latin1()"
9842
     *
9843
     * @see        UTF8::to_latin1()
9844
     *
9845
     * @param string|string[] $str
9846
     *
9847
     * @return string|string[]
9848
     *
9849
     * @deprecated <p>use "UTF8::to_latin1()"</p>
9850
     */
9851
    public static function toLatin1($str)
9852
    {
9853 2
        return self::to_latin1($str);
9854
    }
9855
9856
    /**
9857
     * alias for "UTF8::to_utf8()"
9858
     *
9859
     * @see        UTF8::to_utf8()
9860
     *
9861
     * @param string|string[] $str
9862
     *
9863
     * @return string|string[]
9864
     *
9865
     * @deprecated <p>use "UTF8::to_utf8()"</p>
9866
     */
9867
    public static function toUTF8($str)
9868
    {
9869 2
        return self::to_utf8($str);
9870
    }
9871
9872
    /**
9873
     * Convert a string into ASCII.
9874
     *
9875
     * @param string $str     <p>The input string.</p>
9876
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9877
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9878
     *                        performance</p>
9879
     *
9880
     * @return string
9881
     */
9882
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9883
    {
9884 38
        static $UTF8_TO_ASCII;
9885
9886 38
        if ($str === '') {
9887 3
            return '';
9888
        }
9889
9890
        // check if we only have ASCII, first (better performance)
9891 35
        if (self::is_ascii($str) === true) {
9892 9
            return $str;
9893
        }
9894
9895 28
        $str = self::clean(
9896 28
            $str,
9897 28
            true,
9898 28
            true,
9899 28
            true,
9900 28
            false,
9901 28
            true,
9902 28
            true
9903
        );
9904
9905
        // check again, if we only have ASCII, now ...
9906 28
        if (self::is_ascii($str) === true) {
9907 10
            return $str;
9908
        }
9909
9910 19
        if ($strict === true) {
9911 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9912
                self::checkForSupport();
9913
            }
9914
9915 1
            if (self::$SUPPORT['intl'] === true) {
9916
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9917
                /** @noinspection PhpComposerExtensionStubsInspection */
9918 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9919
9920
                // check again, if we only have ASCII, now ...
9921 1
                if (self::is_ascii($str) === true) {
9922 1
                    return $str;
9923
                }
9924
            }
9925
        }
9926
9927 19
        if (self::$ORD === null) {
9928
            self::$ORD = self::getData('ord');
9929
        }
9930
9931 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9932 19
        $chars = $ar[0];
9933 19
        $ord = null;
9934 19
        foreach ($chars as &$c) {
9935 19
            $ordC0 = self::$ORD[$c[0]];
9936
9937 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
9938 15
                continue;
9939
            }
9940
9941 19
            $ordC1 = self::$ORD[$c[1]];
9942
9943
            // ASCII - next please
9944 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
9945 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9946
            }
9947
9948 19
            if ($ordC0 >= 224) {
9949 8
                $ordC2 = self::$ORD[$c[2]];
9950
9951 8
                if ($ordC0 <= 239) {
9952 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9953
                }
9954
9955 8
                if ($ordC0 >= 240) {
9956 2
                    $ordC3 = self::$ORD[$c[3]];
9957
9958 2
                    if ($ordC0 <= 247) {
9959 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9960
                    }
9961
9962 2
                    if ($ordC0 >= 248) {
9963
                        $ordC4 = self::$ORD[$c[4]];
9964
9965
                        if ($ordC0 <= 251) {
9966
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9967
                        }
9968
9969
                        if ($ordC0 >= 252) {
9970
                            $ordC5 = self::$ORD[$c[5]];
9971
9972
                            if ($ordC0 <= 253) {
9973
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9974
                            }
9975
                        }
9976
                    }
9977
                }
9978
            }
9979
9980 19
            if ($ordC0 === 254 || $ordC0 === 255) {
9981
                $c = $unknown;
9982
9983
                continue;
9984
            }
9985
9986 19
            if ($ord === null) {
9987
                $c = $unknown;
9988
9989
                continue;
9990
            }
9991
9992 19
            $bank = $ord >> 8;
9993 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
9994 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
9995 9
                if ($UTF8_TO_ASCII[$bank] === false) {
9996 2
                    $UTF8_TO_ASCII[$bank] = [];
9997
                }
9998
            }
9999
10000 19
            $newchar = $ord & 255;
10001
10002
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
10003 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10004
10005
                // keep for debugging
10006
                /*
10007
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10008
                echo "char: " . $c . "\n";
10009
                echo "ord: " . $ord . "\n";
10010
                echo "newchar: " . $newchar . "\n";
10011
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10012
                echo "bank:" . $bank . "\n\n";
10013
                 */
10014
10015 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
10016
            } else {
10017
10018
                // keep for debugging missing chars
10019
                /*
10020
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10021
                echo "char: " . $c . "\n";
10022
                echo "ord: " . $ord . "\n";
10023
                echo "newchar: " . $newchar . "\n";
10024
                echo "bank:" . $bank . "\n\n";
10025
                 */
10026
10027 19
                $c = $unknown;
10028
            }
10029
        }
10030
10031 19
        return \implode('', $chars);
10032
    }
10033
10034
    /**
10035
     * @param mixed $str
10036
     *
10037
     * @return bool
10038
     */
10039
    public static function to_boolean($str): bool
10040
    {
10041
        // init
10042 19
        $str = (string) $str;
10043
10044 19
        if ($str === '') {
10045 2
            return false;
10046
        }
10047
10048 17
        $key = \strtolower($str);
10049
10050
        // Info: http://php.net/manual/en/filter.filters.validate.php
10051
        $map = [
10052 17
            'true'  => true,
10053
            '1'     => true,
10054
            'on'    => true,
10055
            'yes'   => true,
10056
            'false' => false,
10057
            '0'     => false,
10058
            'off'   => false,
10059
            'no'    => false,
10060
        ];
10061
10062 17
        if (isset($map[$key])) {
10063 13
            return $map[$key];
10064
        }
10065
10066
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10067 4
        if (\is_numeric($str)) {
10068 2
            return ((float) $str + 0) > 0;
10069
        }
10070
10071 2
        return (bool) self::trim($str);
10072
    }
10073
10074
    /**
10075
     * Convert given string to safe filename (and keep string case).
10076
     *
10077
     * @param string $string
10078
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10079
     *                                  simply replaced with hyphen.
10080
     * @param string $fallback_char
10081
     *
10082
     * @return string
10083
     */
10084
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10085
    {
10086 1
        if ($use_transliterate === true) {
10087 1
            $string = self::str_transliterate($string, $fallback_char);
10088
        }
10089
10090 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
10091
10092 1
        $string = (string) \preg_replace(
10093
            [
10094 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10095 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10096 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10097
            ],
10098
            [
10099 1
                '',
10100 1
                $fallback_char,
10101 1
                $fallback_char,
10102
            ],
10103 1
            $string
10104
        );
10105
10106
        // trim "$fallback_char" from beginning and end of the string
10107 1
        return \trim($string, $fallback_char);
10108
    }
10109
10110
    /**
10111
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10112
     *
10113
     * @param string|string[] $str
10114
     *
10115
     * @return string|string[]
10116
     */
10117
    public static function to_iso8859($str)
10118
    {
10119 7
        if (\is_array($str) === true) {
10120 2
            foreach ($str as $k => &$v) {
10121 2
                $v = self::to_iso8859($v);
10122
            }
10123
10124 2
            return $str;
10125
        }
10126
10127 7
        $str = (string) $str;
10128 7
        if ($str === '') {
10129 2
            return '';
10130
        }
10131
10132 7
        return self::utf8_decode($str);
10133
    }
10134
10135
    /**
10136
     * alias for "UTF8::to_iso8859()"
10137
     *
10138
     * @see UTF8::to_iso8859()
10139
     *
10140
     * @param string|string[] $str
10141
     *
10142
     * @return string|string[]
10143
     */
10144
    public static function to_latin1($str)
10145
    {
10146 2
        return self::to_iso8859($str);
10147
    }
10148
10149
    /**
10150
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10151
     *
10152
     * <ul>
10153
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10154
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10155
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10156
     * case.</li>
10157
     * </ul>
10158
     *
10159
     * @param string|string[] $str                    <p>Any string or array.</p>
10160
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10161
     *
10162
     * @return string|string[] the UTF-8 encoded string
10163
     */
10164
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10165
    {
10166 37
        if (\is_array($str) === true) {
10167 4
            foreach ($str as $k => &$v) {
10168 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10169
            }
10170
10171 4
            return $str;
10172
        }
10173
10174 37
        $str = (string) $str;
10175 37
        if ($str === '') {
10176 6
            return $str;
10177
        }
10178
10179 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10180
            self::checkForSupport();
10181
        }
10182
10183 37
        $max = self::strlen_in_byte($str);
10184 37
        $buf = '';
10185
10186 37
        for ($i = 0; $i < $max; ++$i) {
10187 37
            $c1 = $str[$i];
10188
10189 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10190
10191 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10192
10193 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10194
10195 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10196 17
                        $buf .= $c1 . $c2;
10197 17
                        ++$i;
10198
                    } else { // not valid UTF8 - convert it
10199 31
                        $buf .= self::to_utf8_convert_helper($c1);
10200
                    }
10201 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10202
10203 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10204 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10205
10206 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10207 14
                        $buf .= $c1 . $c2 . $c3;
10208 14
                        $i += 2;
10209
                    } else { // not valid UTF8 - convert it
10210 32
                        $buf .= self::to_utf8_convert_helper($c1);
10211
                    }
10212 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10213
10214 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10215 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10216 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10217
10218 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10219 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10220 8
                        $i += 3;
10221
                    } else { // not valid UTF8 - convert it
10222 26
                        $buf .= self::to_utf8_convert_helper($c1);
10223
                    }
10224
                } else { // doesn't look like UTF8, but should be converted
10225 34
                    $buf .= self::to_utf8_convert_helper($c1);
10226
                }
10227 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10228
10229 4
                $buf .= self::to_utf8_convert_helper($c1);
10230
            } else { // it doesn't need conversion
10231 34
                $buf .= $c1;
10232
            }
10233
        }
10234
10235
        // decode unicode escape sequences
10236 37
        $buf = \preg_replace_callback(
10237 37
            '/\\\\u([0-9a-f]{4})/i',
10238
            /**
10239
             * @param array $match
10240
             *
10241
             * @return string
10242
             */
10243
            static function (array $match): string {
10244
                // always fallback via symfony polyfill
10245 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10246 37
            },
10247 37
            $buf
10248
        );
10249
10250 37
        if ($buf === null) {
10251
            return '';
10252
        }
10253
10254
        // decode UTF-8 codepoints
10255 37
        if ($decodeHtmlEntityToUtf8 === true) {
10256 2
            $buf = self::html_entity_decode($buf);
10257
        }
10258
10259 37
        return $buf;
10260
    }
10261
10262
    /**
10263
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10264
     *
10265
     * INFO: This is slower then "trim()"
10266
     *
10267
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10268
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10269
     *
10270
     * @param string $str   <p>The string to be trimmed</p>
10271
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10272
     *
10273
     * @return string the trimmed string
10274
     */
10275
    public static function trim(string $str = '', $chars = \INF): string
10276
    {
10277 214
        if ($str === '') {
10278 11
            return '';
10279
        }
10280
10281
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10282 206
        if ($chars === \INF || !$chars) {
10283 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10284
        } else {
10285 47
            $chars = \preg_quote($chars, '/');
10286 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10287
        }
10288
10289 206
        return self::regex_replace($str, $pattern, '', '', '/');
10290
    }
10291
10292
    /**
10293
     * Makes string's first char uppercase.
10294
     *
10295
     * @param string      $str                   <p>The input string.</p>
10296
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10297
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10298
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10299
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10300
     *
10301
     * @return string the resulting string
10302
     */
10303
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10304
    {
10305 79
        if ($cleanUtf8 === true) {
10306
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10307
            // if invalid characters are found in $haystack before $needle
10308 1
            $str = self::clean($str);
10309
        }
10310
10311 79
        $strPartTwo = (string) self::substr($str, 1, null, $encoding);
10312
10313 79
        $strPartOne = self::strtoupper(
10314 79
            (string) self::substr($str, 0, 1, $encoding),
10315 79
            $encoding,
10316 79
            $cleanUtf8,
10317 79
            $lang,
10318 79
            $tryToKeepStringLength
10319
        );
10320
10321 79
        return $strPartOne . $strPartTwo;
10322
    }
10323
10324
    /**
10325
     * alias for "UTF8::ucfirst()"
10326
     *
10327
     * @see UTF8::ucfirst()
10328
     *
10329
     * @param string $str
10330
     * @param string $encoding
10331
     * @param bool   $cleanUtf8
10332
     *
10333
     * @return string
10334
     */
10335
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10336
    {
10337 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10338
    }
10339
10340
    /**
10341
     * Uppercase for all words in the string.
10342
     *
10343
     * @param string   $str        <p>The input string.</p>
10344
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10345
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10346
     *                             word.</p>
10347
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10348
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10349
     *
10350
     * @return string
10351
     */
10352
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10353
    {
10354 8
        if (!$str) {
10355 2
            return '';
10356
        }
10357
10358
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10359
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10360
10361 7
        if ($cleanUtf8 === true) {
10362
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10363
            // if invalid characters are found in $haystack before $needle
10364 1
            $str = self::clean($str);
10365
        }
10366
10367 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10368
10369
        if (
10370 7
            $usePhpDefaultFunctions === true
10371
            &&
10372 7
            self::is_ascii($str) === true
10373
        ) {
10374
            return \ucwords($str);
10375
        }
10376
10377 7
        $words = self::str_to_words($str, $charlist);
10378 7
        $useExceptions = \count($exceptions) > 0;
10379
10380 7
        foreach ($words as &$word) {
10381 7
            if (!$word) {
10382 7
                continue;
10383
            }
10384
10385
            if (
10386 7
                $useExceptions === false
10387
                ||
10388 7
                !\in_array($word, $exceptions, true)
10389
            ) {
10390 7
                $word = self::ucfirst($word, $encoding);
10391
            }
10392
        }
10393
10394 7
        return \implode('', $words);
10395
    }
10396
10397
    /**
10398
     * Multi decode html entity & fix urlencoded-win1252-chars.
10399
     *
10400
     * e.g:
10401
     * 'test+test'                     => 'test test'
10402
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10403
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10404
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10405
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10406
     * 'Düsseldorf'                   => 'Düsseldorf'
10407
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10408
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10409
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10410
     *
10411
     * @param string $str          <p>The input string.</p>
10412
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10413
     *
10414
     * @return string
10415
     */
10416
    public static function urldecode(string $str, bool $multi_decode = true): string
10417
    {
10418 2
        if ($str === '') {
10419 2
            return '';
10420
        }
10421
10422 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10423 2
        if (\preg_match($pattern, $str)) {
10424 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10425
        }
10426
10427 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10428
10429 2
        if ($multi_decode === true) {
10430
            do {
10431 2
                $str_compare = $str;
10432
10433
                /**
10434
                 * @psalm-suppress PossiblyInvalidArgument
10435
                 */
10436 2
                $str = self::fix_simple_utf8(
10437 2
                    \urldecode(
10438 2
                        self::html_entity_decode(
10439 2
                            self::to_utf8($str),
10440 2
                            $flags
10441
                        )
10442
                    )
10443
                );
10444 2
            } while ($str_compare !== $str);
10445
        }
10446
10447 2
        return $str;
10448
    }
10449
10450
    /**
10451
     * Return a array with "urlencoded"-win1252 -> UTF-8
10452
     *
10453
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10454
     *
10455
     * @return string[]
10456
     */
10457
    public static function urldecode_fix_win1252_chars(): array
10458
    {
10459
        return [
10460 2
            '%20' => ' ',
10461
            '%21' => '!',
10462
            '%22' => '"',
10463
            '%23' => '#',
10464
            '%24' => '$',
10465
            '%25' => '%',
10466
            '%26' => '&',
10467
            '%27' => "'",
10468
            '%28' => '(',
10469
            '%29' => ')',
10470
            '%2A' => '*',
10471
            '%2B' => '+',
10472
            '%2C' => ',',
10473
            '%2D' => '-',
10474
            '%2E' => '.',
10475
            '%2F' => '/',
10476
            '%30' => '0',
10477
            '%31' => '1',
10478
            '%32' => '2',
10479
            '%33' => '3',
10480
            '%34' => '4',
10481
            '%35' => '5',
10482
            '%36' => '6',
10483
            '%37' => '7',
10484
            '%38' => '8',
10485
            '%39' => '9',
10486
            '%3A' => ':',
10487
            '%3B' => ';',
10488
            '%3C' => '<',
10489
            '%3D' => '=',
10490
            '%3E' => '>',
10491
            '%3F' => '?',
10492
            '%40' => '@',
10493
            '%41' => 'A',
10494
            '%42' => 'B',
10495
            '%43' => 'C',
10496
            '%44' => 'D',
10497
            '%45' => 'E',
10498
            '%46' => 'F',
10499
            '%47' => 'G',
10500
            '%48' => 'H',
10501
            '%49' => 'I',
10502
            '%4A' => 'J',
10503
            '%4B' => 'K',
10504
            '%4C' => 'L',
10505
            '%4D' => 'M',
10506
            '%4E' => 'N',
10507
            '%4F' => 'O',
10508
            '%50' => 'P',
10509
            '%51' => 'Q',
10510
            '%52' => 'R',
10511
            '%53' => 'S',
10512
            '%54' => 'T',
10513
            '%55' => 'U',
10514
            '%56' => 'V',
10515
            '%57' => 'W',
10516
            '%58' => 'X',
10517
            '%59' => 'Y',
10518
            '%5A' => 'Z',
10519
            '%5B' => '[',
10520
            '%5C' => '\\',
10521
            '%5D' => ']',
10522
            '%5E' => '^',
10523
            '%5F' => '_',
10524
            '%60' => '`',
10525
            '%61' => 'a',
10526
            '%62' => 'b',
10527
            '%63' => 'c',
10528
            '%64' => 'd',
10529
            '%65' => 'e',
10530
            '%66' => 'f',
10531
            '%67' => 'g',
10532
            '%68' => 'h',
10533
            '%69' => 'i',
10534
            '%6A' => 'j',
10535
            '%6B' => 'k',
10536
            '%6C' => 'l',
10537
            '%6D' => 'm',
10538
            '%6E' => 'n',
10539
            '%6F' => 'o',
10540
            '%70' => 'p',
10541
            '%71' => 'q',
10542
            '%72' => 'r',
10543
            '%73' => 's',
10544
            '%74' => 't',
10545
            '%75' => 'u',
10546
            '%76' => 'v',
10547
            '%77' => 'w',
10548
            '%78' => 'x',
10549
            '%79' => 'y',
10550
            '%7A' => 'z',
10551
            '%7B' => '{',
10552
            '%7C' => '|',
10553
            '%7D' => '}',
10554
            '%7E' => '~',
10555
            '%7F' => '',
10556
            '%80' => '`',
10557
            '%81' => '',
10558
            '%82' => '‚',
10559
            '%83' => 'ƒ',
10560
            '%84' => '„',
10561
            '%85' => '…',
10562
            '%86' => '†',
10563
            '%87' => '‡',
10564
            '%88' => 'ˆ',
10565
            '%89' => '‰',
10566
            '%8A' => 'Š',
10567
            '%8B' => '‹',
10568
            '%8C' => 'Œ',
10569
            '%8D' => '',
10570
            '%8E' => 'Ž',
10571
            '%8F' => '',
10572
            '%90' => '',
10573
            '%91' => '‘',
10574
            '%92' => '’',
10575
            '%93' => '“',
10576
            '%94' => '”',
10577
            '%95' => '•',
10578
            '%96' => '–',
10579
            '%97' => '—',
10580
            '%98' => '˜',
10581
            '%99' => '™',
10582
            '%9A' => 'š',
10583
            '%9B' => '›',
10584
            '%9C' => 'œ',
10585
            '%9D' => '',
10586
            '%9E' => 'ž',
10587
            '%9F' => 'Ÿ',
10588
            '%A0' => '',
10589
            '%A1' => '¡',
10590
            '%A2' => '¢',
10591
            '%A3' => '£',
10592
            '%A4' => '¤',
10593
            '%A5' => '¥',
10594
            '%A6' => '¦',
10595
            '%A7' => '§',
10596
            '%A8' => '¨',
10597
            '%A9' => '©',
10598
            '%AA' => 'ª',
10599
            '%AB' => '«',
10600
            '%AC' => '¬',
10601
            '%AD' => '',
10602
            '%AE' => '®',
10603
            '%AF' => '¯',
10604
            '%B0' => '°',
10605
            '%B1' => '±',
10606
            '%B2' => '²',
10607
            '%B3' => '³',
10608
            '%B4' => '´',
10609
            '%B5' => 'µ',
10610
            '%B6' => '¶',
10611
            '%B7' => '·',
10612
            '%B8' => '¸',
10613
            '%B9' => '¹',
10614
            '%BA' => 'º',
10615
            '%BB' => '»',
10616
            '%BC' => '¼',
10617
            '%BD' => '½',
10618
            '%BE' => '¾',
10619
            '%BF' => '¿',
10620
            '%C0' => 'À',
10621
            '%C1' => 'Á',
10622
            '%C2' => 'Â',
10623
            '%C3' => 'Ã',
10624
            '%C4' => 'Ä',
10625
            '%C5' => 'Å',
10626
            '%C6' => 'Æ',
10627
            '%C7' => 'Ç',
10628
            '%C8' => 'È',
10629
            '%C9' => 'É',
10630
            '%CA' => 'Ê',
10631
            '%CB' => 'Ë',
10632
            '%CC' => 'Ì',
10633
            '%CD' => 'Í',
10634
            '%CE' => 'Î',
10635
            '%CF' => 'Ï',
10636
            '%D0' => 'Ð',
10637
            '%D1' => 'Ñ',
10638
            '%D2' => 'Ò',
10639
            '%D3' => 'Ó',
10640
            '%D4' => 'Ô',
10641
            '%D5' => 'Õ',
10642
            '%D6' => 'Ö',
10643
            '%D7' => '×',
10644
            '%D8' => 'Ø',
10645
            '%D9' => 'Ù',
10646
            '%DA' => 'Ú',
10647
            '%DB' => 'Û',
10648
            '%DC' => 'Ü',
10649
            '%DD' => 'Ý',
10650
            '%DE' => 'Þ',
10651
            '%DF' => 'ß',
10652
            '%E0' => 'à',
10653
            '%E1' => 'á',
10654
            '%E2' => 'â',
10655
            '%E3' => 'ã',
10656
            '%E4' => 'ä',
10657
            '%E5' => 'å',
10658
            '%E6' => 'æ',
10659
            '%E7' => 'ç',
10660
            '%E8' => 'è',
10661
            '%E9' => 'é',
10662
            '%EA' => 'ê',
10663
            '%EB' => 'ë',
10664
            '%EC' => 'ì',
10665
            '%ED' => 'í',
10666
            '%EE' => 'î',
10667
            '%EF' => 'ï',
10668
            '%F0' => 'ð',
10669
            '%F1' => 'ñ',
10670
            '%F2' => 'ò',
10671
            '%F3' => 'ó',
10672
            '%F4' => 'ô',
10673
            '%F5' => 'õ',
10674
            '%F6' => 'ö',
10675
            '%F7' => '÷',
10676
            '%F8' => 'ø',
10677
            '%F9' => 'ù',
10678
            '%FA' => 'ú',
10679
            '%FB' => 'û',
10680
            '%FC' => 'ü',
10681
            '%FD' => 'ý',
10682
            '%FE' => 'þ',
10683
            '%FF' => 'ÿ',
10684
        ];
10685
    }
10686
10687
    /**
10688
     * Decodes an UTF-8 string to ISO-8859-1.
10689
     *
10690
     * @param string $str           <p>The input string.</p>
10691
     * @param bool   $keepUtf8Chars
10692
     *
10693
     * @return string
10694
     */
10695
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10696
    {
10697 13
        if ($str === '') {
10698 5
            return '';
10699
        }
10700
10701 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10702 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10703
10704 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10705 1
            if (self::$WIN1252_TO_UTF8 === null) {
10706
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10707
            }
10708
10709 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10710 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10711
        }
10712
10713
        /** @noinspection PhpInternalEntityUsedInspection */
10714 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10715
10716 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10717
            self::checkForSupport();
10718
        }
10719
10720
        // save for later comparision
10721 13
        $str_backup = $str;
10722 13
        $len = self::strlen_in_byte($str);
10723
10724 13
        if (self::$ORD === null) {
10725
            self::$ORD = self::getData('ord');
10726
        }
10727
10728 13
        if (self::$CHR === null) {
10729
            self::$CHR = self::getData('chr');
10730
        }
10731
10732 13
        $noCharFound = '?';
10733
        /** @noinspection ForeachInvariantsInspection */
10734 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10735 13
            switch ($str[$i] & "\xF0") {
10736 13
                case "\xC0":
10737 12
                case "\xD0":
10738 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10739 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10740
10741 13
                    break;
10742
10743
                /** @noinspection PhpMissingBreakStatementInspection */
10744 12
                case "\xF0":
10745
                    ++$i;
10746
10747
                // no break
10748
10749 12
                case "\xE0":
10750 10
                    $str[$j] = $noCharFound;
10751 10
                    $i += 2;
10752
10753 10
                    break;
10754
10755
                default:
10756 12
                    $str[$j] = $str[$i];
10757
            }
10758
        }
10759
10760 13
        $return = self::substr_in_byte($str, 0, $j);
10761 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10762
            $return = '';
10763
        }
10764
10765
        if (
10766 13
            $keepUtf8Chars === true
10767
            &&
10768 13
            self::strlen($return) >= (int) self::strlen($str_backup)
10769
        ) {
10770 2
            return $str_backup;
10771
        }
10772
10773 13
        return $return;
10774
    }
10775
10776
    /**
10777
     * Encodes an ISO-8859-1 string to UTF-8.
10778
     *
10779
     * @param string $str <p>The input string.</p>
10780
     *
10781
     * @return string
10782
     */
10783
    public static function utf8_encode(string $str): string
10784
    {
10785 14
        if ($str === '') {
10786 13
            return '';
10787
        }
10788
10789 14
        $str = \utf8_encode($str);
10790
10791
        // the polyfill maybe return false
10792
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10793
        /** @psalm-suppress TypeDoesNotContainType */
10794 14
        if ($str === false) {
10795
            return '';
10796
        }
10797
10798 14
        if (\strpos($str, "\xC2") === false) {
10799 6
            return $str;
10800
        }
10801
10802 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10803 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10804
10805 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10806 1
            if (self::$WIN1252_TO_UTF8 === null) {
10807
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10808
            }
10809
10810 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10811 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10812
        }
10813
10814 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10815
    }
10816
10817
    /**
10818
     * fix -> utf8-win1252 chars
10819
     *
10820
     * @param string $str <p>The input string.</p>
10821
     *
10822
     * @return string
10823
     *
10824
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10825
     */
10826
    public static function utf8_fix_win1252_chars(string $str): string
10827
    {
10828 2
        return self::fix_simple_utf8($str);
10829
    }
10830
10831
    /**
10832
     * Returns an array with all utf8 whitespace characters.
10833
     *
10834
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10835
     *
10836
     * @author: Derek E. [email protected]
10837
     *
10838
     * @return string[]
10839
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
10840
     *                  as defined in above URL
10841
     */
10842
    public static function whitespace_table(): array
10843
    {
10844 2
        return self::$WHITESPACE_TABLE;
10845
    }
10846
10847
    /**
10848
     * Limit the number of words in a string.
10849
     *
10850
     * @param string $str      <p>The input string.</p>
10851
     * @param int    $limit    <p>The limit of words as integer.</p>
10852
     * @param string $strAddOn <p>Replacement for the striped string.</p>
10853
     *
10854
     * @return string
10855
     */
10856
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10857
    {
10858 2
        if ($str === '') {
10859 2
            return '';
10860
        }
10861
10862 2
        if ($limit < 1) {
10863 2
            return '';
10864
        }
10865
10866 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10867
10868
        if (
10869 2
            !isset($matches[0])
10870
            ||
10871 2
            self::strlen($str) === (int) self::strlen($matches[0])
10872
        ) {
10873 2
            return $str;
10874
        }
10875
10876 2
        return self::rtrim($matches[0]) . $strAddOn;
10877
    }
10878
10879
    /**
10880
     * Wraps a string to a given number of characters
10881
     *
10882
     * @see  http://php.net/manual/en/function.wordwrap.php
10883
     *
10884
     * @param string $str   <p>The input string.</p>
10885
     * @param int    $width [optional] <p>The column width.</p>
10886
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10887
     * @param bool   $cut   [optional] <p>
10888
     *                      If the cut is set to true, the string is
10889
     *                      always wrapped at or before the specified width. So if you have
10890
     *                      a word that is larger than the given width, it is broken apart.
10891
     *                      </p>
10892
     *
10893
     * @return string
10894
     *                <p>The given string wrapped at the specified column.</p>
10895
     */
10896
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10897
    {
10898 10
        if ($str === '' || $break === '') {
10899 3
            return '';
10900
        }
10901
10902 8
        $w = '';
10903 8
        $strSplit = \explode($break, $str);
10904 8
        if ($strSplit === false) {
10905
            return '';
10906
        }
10907 8
        $chars = [];
10908
10909 8
        foreach ($strSplit as $i => $iValue) {
10910 8
            if ($i) {
10911 1
                $chars[] = $break;
10912 1
                $w .= '#';
10913
            }
10914
10915 8
            $c = $iValue;
10916 8
            unset($strSplit[$i]);
10917
10918 8
            foreach (self::split($c) as $c) {
10919 8
                $chars[] = $c;
10920 8
                $w .= $c === ' ' ? ' ' : '?';
10921
            }
10922
        }
10923
10924 8
        $strReturn = '';
10925 8
        $j = 0;
10926 8
        $b = $i = -1;
10927 8
        $w = \wordwrap($w, $width, '#', $cut);
10928
10929 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
10930 6
            for (++$i; $i < $b; ++$i) {
10931 6
                $strReturn .= $chars[$j];
10932 6
                unset($chars[$j++]);
10933
            }
10934
10935 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
10936 3
                unset($chars[$j++]);
10937
            }
10938
10939 6
            $strReturn .= $break;
10940
        }
10941
10942 8
        return $strReturn . \implode('', $chars);
10943
    }
10944
10945
    /**
10946
     * Line-Wrap the string after $limit, but also after the next word.
10947
     *
10948
     * @param string $str
10949
     * @param int    $limit
10950
     *
10951
     * @return string
10952
     */
10953
    public static function wordwrap_per_line(string $str, int $limit): string
10954
    {
10955 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
10956
10957 1
        $string = '';
10958 1
        foreach ($strings as &$value) {
10959 1
            if ($value === false) {
10960
                continue;
10961
            }
10962
10963 1
            $string .= \wordwrap($value, $limit);
10964 1
            $string .= "\n";
10965
        }
10966
10967 1
        return $string;
10968
    }
10969
10970
    /**
10971
     * Returns an array of Unicode White Space characters.
10972
     *
10973
     * @return string[] an array with numeric code point as key and White Space Character as value
10974
     */
10975
    public static function ws(): array
10976
    {
10977 2
        return self::$WHITESPACE;
10978
    }
10979
10980
    /**
10981
     * Adds the specified amount of left and right padding to the given string.
10982
     * The default character used is a space.
10983
     *
10984
     * @param string $str
10985
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
10986
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
10987
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
10988
     * @param string $encoding [optional] <p>Default: UTF-8</p>
10989
     *
10990
     * @return string string with padding applied
10991
     */
10992
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding = 'UTF-8'): string
10993
    {
10994 25
        $strlen = (int) self::strlen($str, $encoding);
10995
10996 25
        if ($left && $right) {
10997 8
            $length = ($left + $right) + $strlen;
10998 8
            $type = \STR_PAD_BOTH;
10999 17
        } elseif ($left) {
11000 7
            $length = $left + $strlen;
11001 7
            $type = \STR_PAD_LEFT;
11002 10
        } elseif ($right) {
11003 10
            $length = $right + $strlen;
11004 10
            $type = \STR_PAD_RIGHT;
11005
        } else {
11006
            $length = ($left + $right) + $strlen;
11007
            $type = \STR_PAD_BOTH;
11008
        }
11009
11010 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
11011
    }
11012
11013
    /**
11014
     * @param string $str
11015
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
11016
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
11017
     *
11018
     * @return string
11019
     */
11020
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
11021
    {
11022 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
11023 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
11024
11025 54
        if ($useLower === true) {
11026 2
            $str = (string) \str_replace(
11027 2
                $upper,
11028 2
                $lower,
11029 2
                $str
11030
            );
11031
        } else {
11032 52
            $str = (string) \str_replace(
11033 52
                $lower,
11034 52
                $upper,
11035 52
                $str
11036
            );
11037
        }
11038
11039 54
        if ($fullCaseFold) {
11040 52
            static $FULL_CASE_FOLD = null;
11041 52
            if ($FULL_CASE_FOLD === null) {
11042 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
11043
            }
11044
11045 52
            if ($useLower === true) {
11046 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
11047
            } else {
11048 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
11049
            }
11050
        }
11051
11052 54
        return $str;
11053
    }
11054
11055
    /**
11056
     * get data from "/data/*.php"
11057
     *
11058
     * @param string $file
11059
     *
11060
     * @return mixed
11061
     */
11062
    private static function getData(string $file)
11063
    {
11064
        /** @noinspection PhpIncludeInspection */
11065
        /** @psalm-suppress UnresolvableInclude */
11066 5
        return include __DIR__ . '/data/' . $file . '.php';
11067
    }
11068
11069
    /**
11070
     * get data from "/data/*.php"
11071
     *
11072
     * @param string $file
11073
     *
11074
     * @return false|mixed will return false on error
11075
     */
11076
    private static function getDataIfExists(string $file)
11077
    {
11078 9
        $file = __DIR__ . '/data/' . $file . '.php';
11079 9
        if (\file_exists($file)) {
11080
            /** @noinspection PhpIncludeInspection */
11081 8
            return include $file;
11082
        }
11083
11084 2
        return false;
11085
    }
11086
11087
    /**
11088
     * Checks whether mbstring "overloaded" is active on the server.
11089
     *
11090
     * @return bool
11091
     */
11092
    private static function mbstring_overloaded(): bool
11093
    {
11094
        /**
11095
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
11096
         */
11097
11098
        /** @noinspection PhpComposerExtensionStubsInspection */
11099
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
11100
        return \defined('MB_OVERLOAD_STRING')
11101
               &&
11102
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
11103
    }
11104
11105
    /**
11106
     * @param array $strings
11107
     * @param bool  $removeEmptyValues
11108
     * @param int   $removeShortValues
11109
     *
11110
     * @return array
11111
     */
11112
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
11113
    {
11114
        // init
11115 2
        $return = [];
11116
11117 2
        foreach ($strings as &$str) {
11118
            if (
11119 2
                $removeShortValues !== null
11120
                &&
11121 2
                self::strlen($str) <= $removeShortValues
11122
            ) {
11123 2
                continue;
11124
            }
11125
11126
            if (
11127 2
                $removeEmptyValues === true
11128
                &&
11129 2
                \trim($str) === ''
11130
            ) {
11131 2
                continue;
11132
            }
11133
11134 2
            $return[] = $str;
11135
        }
11136
11137 2
        return $return;
11138
    }
11139
11140
    /**
11141
     * rxClass
11142
     *
11143
     * @param string $s
11144
     * @param string $class
11145
     *
11146
     * @return string
11147
     */
11148
    private static function rxClass(string $s, string $class = ''): string
11149
    {
11150 43
        static $RX_CLASSS_CACHE = [];
11151
11152 43
        $cacheKey = $s . $class;
11153
11154 43
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
11155 31
            return $RX_CLASSS_CACHE[$cacheKey];
11156
        }
11157
11158 16
        $class = [$class];
11159
11160
        /** @noinspection SuspiciousLoopInspection */
11161 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
11162 15
            if ($s === '-') {
11163
                $class[0] = '-' . $class[0];
11164 15
            } elseif (!isset($s[2])) {
11165 15
                $class[0] .= \preg_quote($s, '/');
11166 1
            } elseif (self::strlen($s) === 1) {
11167 1
                $class[0] .= $s;
11168
            } else {
11169 15
                $class[] = $s;
11170
            }
11171
        }
11172 16
        unset($s);
11173
11174 16
        if ($class[0]) {
11175 16
            $class[0] = '[' . $class[0] . ']';
11176
        }
11177
11178 16
        if (\count($class) === 1) {
11179 16
            $return = $class[0];
11180
        } else {
11181
            $return = '(?:' . \implode('|', $class) . ')';
11182
        }
11183
11184 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
11185
11186 16
        return $return;
11187
    }
11188
11189
    /**
11190
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
11191
     *
11192
     * @param string $names
11193
     * @param string $delimiter
11194
     * @param string $encoding
11195
     *
11196
     * @return string
11197
     */
11198
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
11199
    {
11200
        // init
11201 1
        $namesArray = \explode($delimiter, $names);
11202
11203 1
        if ($namesArray === false) {
11204
            return '';
11205
        }
11206
11207
        $specialCases = [
11208 1
            'names' => [
11209
                'ab',
11210
                'af',
11211
                'al',
11212
                'and',
11213
                'ap',
11214
                'bint',
11215
                'binte',
11216
                'da',
11217
                'de',
11218
                'del',
11219
                'den',
11220
                'der',
11221
                'di',
11222
                'dit',
11223
                'ibn',
11224
                'la',
11225
                'mac',
11226
                'nic',
11227
                'of',
11228
                'ter',
11229
                'the',
11230
                'und',
11231
                'van',
11232
                'von',
11233
                'y',
11234
                'zu',
11235
            ],
11236
            'prefixes' => [
11237
                'al-',
11238
                "d'",
11239
                'ff',
11240
                "l'",
11241
                'mac',
11242
                'mc',
11243
                'nic',
11244
            ],
11245
        ];
11246
11247 1
        foreach ($namesArray as &$name) {
11248 1
            if (\in_array($name, $specialCases['names'], true)) {
11249 1
                continue;
11250
            }
11251
11252 1
            $continue = false;
11253
11254 1
            if ($delimiter === '-') {
11255 1
                foreach ($specialCases['names'] as &$beginning) {
11256 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11257 1
                        $continue = true;
11258
                    }
11259
                }
11260 1
                unset($beginning);
11261
            }
11262
11263 1
            foreach ($specialCases['prefixes'] as &$beginning) {
11264 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11265 1
                    $continue = true;
11266
                }
11267
            }
11268 1
            unset($beginning);
11269
11270 1
            if ($continue === true) {
11271 1
                continue;
11272
            }
11273
11274 1
            $name = self::str_upper_first($name);
11275
        }
11276
11277 1
        return \implode($delimiter, $namesArray);
11278
    }
11279
11280
    /**
11281
     * Generic case sensitive transformation for collation matching.
11282
     *
11283
     * @param string $str <p>The input string</p>
11284
     *
11285
     * @return string|null
11286
     */
11287
    private static function strtonatfold(string $str)
11288
    {
11289
        /** @noinspection PhpUndefinedClassInspection */
11290 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
11291
    }
11292
11293
    /**
11294
     * @param int|string $input
11295
     *
11296
     * @return string
11297
     */
11298
    private static function to_utf8_convert_helper($input): string
11299
    {
11300
        // init
11301 30
        $buf = '';
11302
11303 30
        if (self::$ORD === null) {
11304 1
            self::$ORD = self::getData('ord');
11305
        }
11306
11307 30
        if (self::$CHR === null) {
11308 1
            self::$CHR = self::getData('chr');
11309
        }
11310
11311 30
        if (self::$WIN1252_TO_UTF8 === null) {
11312 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11313
        }
11314
11315 30
        $ordC1 = self::$ORD[$input];
11316 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
11317 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
11318
        } else {
11319 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
11320 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
11321 2
            $buf .= $cc1 . $cc2;
11322
        }
11323
11324 30
        return $buf;
11325
    }
11326
}
11327