Passed
Push — master ( e5ad93...8e64a6 )
by Lars
09:20 queued 03:11
created

UTF8::str_split_pattern()   B

Complexity

Conditions 11
Paths 12

Size

Total Lines 49
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 17.3808

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 25
c 1
b 0
f 0
nc 12
nop 3
dl 0
loc 49
ccs 15
cts 24
cp 0.625
crap 17.3808
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 13
            if (self::$CHR === null) {
556
                self::$CHR = self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 13
            $chr = self::$CHR[$code_point];
563
564 13
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 13
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regex = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808
        /** @noinspection NotOptimalRegularExpressionsInspection */
809 114
        $str = (string) \preg_replace($regex, '$1', $str);
810
811 114
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 114
        if ($remove_invisible_characters === true) {
816 114
            $str = self::remove_invisible_characters($str);
817
        }
818
819 114
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 114
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 114
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 114
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 19
    public static function decimal_to_chr($int): string
993
    {
994 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1026
     *
1027
     * @param string $str                        <p>The input string.</p>
1028
     * @param bool   $useReversibleStringMapping [optional] <p>
1029
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1030
     *                                           between "emoji_encode" and "emoji_decode".</p>
1031
     *
1032
     * @return string
1033
     */
1034 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1035
    {
1036 9
        self::initEmojiData();
1037
1038 9
        if ($useReversibleStringMapping === true) {
1039 9
            return (string) \str_replace(
1040 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1041 9
                (array) self::$EMOJI_VALUES_CACHE,
1042 9
                $str
1043
            );
1044
        }
1045
1046 1
        return (string) \str_replace(
1047 1
            (array) self::$EMOJI_KEYS_CACHE,
1048 1
            (array) self::$EMOJI_VALUES_CACHE,
1049 1
            $str
1050
        );
1051
    }
1052
1053
    /**
1054
     * Encode a string with emoji chars into a non-emoji string.
1055
     *
1056
     * @param string $str                        <p>The input string</p>
1057
     * @param bool   $useReversibleStringMapping [optional] <p>
1058
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1059
     *                                           between "emoji_encode" and "emoji_decode"</p>
1060
     *
1061
     * @return string
1062
     */
1063 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1064
    {
1065 12
        self::initEmojiData();
1066
1067 12
        if ($useReversibleStringMapping === true) {
1068 9
            return (string) \str_replace(
1069 9
                (array) self::$EMOJI_VALUES_CACHE,
1070 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1071 9
                $str
1072
            );
1073
        }
1074
1075 4
        return (string) \str_replace(
1076 4
            (array) self::$EMOJI_VALUES_CACHE,
1077 4
            (array) self::$EMOJI_KEYS_CACHE,
1078 4
            $str
1079
        );
1080
    }
1081
1082
    /**
1083
     * Encode a string with a new charset-encoding.
1084
     *
1085
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1086
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1087
     *
1088
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1089
     * @param string $str                    <p>The input string</p>
1090
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1091
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1092
     *                                       string-encoding</p>
1093
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1094
     *                                       A empty string will trigger the autodetect anyway.</p>
1095
     *
1096
     * @return string
1097
     *
1098
     * @psalm-suppress InvalidReturnStatement
1099
     */
1100 28
    public static function encode(
1101
        string $toEncoding,
1102
        string $str,
1103
        bool $autodetectFromEncoding = true,
1104
        string $fromEncoding = ''
1105
    ): string {
1106 28
        if ($str === '' || $toEncoding === '') {
1107 13
            return $str;
1108
        }
1109
1110 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1111 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1112
        }
1113
1114 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1115 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1116
        }
1117
1118
        if (
1119 28
            $toEncoding
1120
            &&
1121 28
            $fromEncoding
1122
            &&
1123 28
            $fromEncoding === $toEncoding
1124
        ) {
1125
            return $str;
1126
        }
1127
1128 28
        if ($toEncoding === 'JSON') {
1129 1
            $return = self::json_encode($str);
1130 1
            if ($return === false) {
1131
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1132
            }
1133
1134 1
            return $return;
1135
        }
1136 28
        if ($fromEncoding === 'JSON') {
1137 1
            $str = self::json_decode($str);
1138 1
            $fromEncoding = '';
1139
        }
1140
1141 28
        if ($toEncoding === 'BASE64') {
1142 2
            return \base64_encode($str);
1143
        }
1144 28
        if ($fromEncoding === 'BASE64') {
1145 2
            $str = \base64_decode($str, true);
1146 2
            $fromEncoding = '';
1147
        }
1148
1149 28
        if ($toEncoding === 'HTML-ENTITIES') {
1150 2
            return self::html_encode($str, true, 'UTF-8');
1151
        }
1152 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1153 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1154 2
            $fromEncoding = '';
1155
        }
1156
1157 28
        $fromEncodingDetected = false;
1158
        if (
1159 28
            $autodetectFromEncoding === true
1160
            ||
1161 28
            !$fromEncoding
1162
        ) {
1163 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1164
        }
1165
1166
        // DEBUG
1167
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1168
1169 28
        if ($fromEncodingDetected !== false) {
1170 24
            $fromEncoding = $fromEncodingDetected;
1171 7
        } elseif ($autodetectFromEncoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$fromEncoding
1178
            ||
1179 24
            $fromEncoding === $toEncoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $toEncoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $fromEncoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $fromEncoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $toEncoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $fromEncoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $fromEncoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $toEncoding !== 'UTF-8'
1210
            &&
1211 10
            $toEncoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $toEncoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $strEncoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $toEncoding,
1225 10
                $fromEncoding
1226
            );
1227
1228 10
            if ($strEncoded) {
1229 10
                return $strEncoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($fromEncoding, $toEncoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1244
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $fromCharset = 'UTF-8',
1256
        $toCharset = 'UTF-8',
1257
        $transferEncoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1262
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1263
        }
1264
1265
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1266
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transferEncoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $fromCharset,
1276
                'output-charset'   => $toCharset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                    <p>The input string.</p>
1286
     * @param string   $search                 <p>The searched string.</p>
1287
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1289
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacerForSkippedText = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $stringLength = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $stringLength = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $strSub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $strSub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($strSub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $wordPos = (int) \mb_stripos($str, $search);
1360 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1363
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($halfSide > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $halfText = \mb_substr($str, 0, $halfSide);
1370
            } else {
1371
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1372
            }
1373 1
            if ($halfText !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($halfText, ' '),
1377 1
                        \mb_strrpos($halfText, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($halfText, ' ', 0, $encoding),
1382
                        self::strrpos($halfText, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($wordPos && $halfSide > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $realLength = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $realLength) {
1393
                $offset = $realLength;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                    ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                    ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($strSub !== false) {
1415 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($strSub !== false) {
1426 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $trueLength = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $trueLength) {
1436
                $offset = $trueLength;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($strSub !== false) {
1458 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $maxLength        [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1507
     */
1508 12
    public static function file_get_contents(
1509
        string $filename,
1510
        bool $use_include_path = false,
1511
        $context = null,
1512
        int $offset = null,
1513
        int $maxLength = null,
1514
        int $timeout = 10,
1515
        bool $convertToUtf8 = true,
1516
        string $fromEncoding = ''
1517
    ) {
1518
        // init
1519 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) !== true
1552
                ||
1553 9
                self::is_utf16($data, false) !== false
1554
                ||
1555 12
                self::is_utf32($data, false) !== false
1556
            ) {
1557 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1558 9
                $data = self::cleanup($data);
1559
            }
1560
        }
1561
1562 12
        return $data;
1563
    }
1564
1565
    /**
1566
     * Checks if a file starts with BOM (Byte Order Mark) character.
1567
     *
1568
     * @param string $file_path <p>Path to a valid file.</p>
1569
     *
1570
     * @throws \RuntimeException if file_get_contents() returned false
1571
     *
1572
     * @return bool
1573
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1574
     */
1575 2
    public static function file_has_bom(string $file_path): bool
1576
    {
1577 2
        $file_content = \file_get_contents($file_path);
1578 2
        if ($file_content === false) {
1579
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1580
        }
1581
1582 2
        return self::string_has_bom($file_content);
1583
    }
1584
1585
    /**
1586
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
     *
1588
     * @param mixed  $var
1589
     * @param int    $normalization_form
1590
     * @param string $leading_combining
1591
     *
1592
     * @return mixed
1593
     */
1594 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1595
    {
1596 62
        switch (\gettype($var)) {
1597 62
            case 'array':
1598
                /** @noinspection ForeachSourceInspection */
1599 6
                foreach ($var as $k => &$v) {
1600 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1601
                }
1602 6
                unset($v);
1603
1604 6
                break;
1605 62
            case 'object':
1606
                /** @noinspection ForeachSourceInspection */
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147
        /** @noinspection OffsetOperationsInspection */
2148 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2149
2150
        // DEBUG
2151
        //var_dump($type_code);
2152
2153
        switch ($type_code) {
2154 35
            case 3780:
2155 5
                $ext = 'pdf';
2156 5
                $mime = 'application/pdf';
2157 5
                $type = 'binary';
2158
2159 5
                break;
2160 35
            case 7790:
2161
                $ext = 'exe';
2162
                $mime = 'application/octet-stream';
2163
                $type = 'binary';
2164
2165
                break;
2166 35
            case 7784:
2167
                $ext = 'midi';
2168
                $mime = 'audio/x-midi';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 8075:
2173 7
                $ext = 'zip';
2174 7
                $mime = 'application/zip';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178 35
            case 8297:
2179
                $ext = 'rar';
2180
                $mime = 'application/rar';
2181
                $type = 'binary';
2182
2183
                break;
2184 35
            case 255216:
2185
                $ext = 'jpg';
2186
                $mime = 'image/jpeg';
2187
                $type = 'binary';
2188
2189
                break;
2190 35
            case 7173:
2191
                $ext = 'gif';
2192
                $mime = 'image/gif';
2193
                $type = 'binary';
2194
2195
                break;
2196 35
            case 7373:
2197
                $ext = 'tiff';
2198
                $mime = 'image/tiff';
2199
                $type = 'binary';
2200
2201
                break;
2202 35
            case 6677:
2203
                $ext = 'bmp';
2204
                $mime = 'image/bmp';
2205
                $type = 'binary';
2206
2207
                break;
2208 35
            case 13780:
2209 7
                $ext = 'png';
2210 7
                $mime = 'image/png';
2211 7
                $type = 'binary';
2212
2213 7
                break;
2214
            default:
2215 32
                return $fallback;
2216
        }
2217
2218
        return [
2219 7
            'ext'  => $ext,
2220 7
            'mime' => $mime,
2221 7
            'type' => $type,
2222
        ];
2223
    }
2224
2225
    /**
2226
     * @param int    $length        <p>Length of the random string.</p>
2227
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2228
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2229
     *
2230
     * @return string
2231
     */
2232 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2233
    {
2234
        // init
2235 1
        $i = 0;
2236 1
        $str = '';
2237
2238
        //
2239
        // add random chars
2240
        //
2241
2242 1
        if ($encoding === 'UTF-8') {
2243 1
            $maxlength = (int) \mb_strlen($possibleChars);
2244 1
            if ($maxlength === 0) {
2245 1
                return '';
2246
            }
2247
2248 1
            while ($i < $length) {
2249
                try {
2250 1
                    $randInt = \random_int(0, $maxlength - 1);
2251
                } catch (\Exception $e) {
2252
                    /** @noinspection RandomApiMigrationInspection */
2253
                    $randInt = \mt_rand(0, $maxlength - 1);
2254
                }
2255 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2256 1
                if ($char !== false) {
2257 1
                    $str .= $char;
2258 1
                    ++$i;
2259
                }
2260
            }
2261
        } else {
2262
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2263
2264
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2265
            if ($maxlength === 0) {
2266
                return '';
2267
            }
2268
2269
            while ($i < $length) {
2270
                try {
2271
                    $randInt = \random_int(0, $maxlength - 1);
2272
                } catch (\Exception $e) {
2273
                    /** @noinspection RandomApiMigrationInspection */
2274
                    $randInt = \mt_rand(0, $maxlength - 1);
2275
                }
2276
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2277
                if ($char !== false) {
2278
                    $str .= $char;
2279
                    ++$i;
2280
                }
2281
            }
2282
        }
2283
2284 1
        return $str;
2285
    }
2286
2287
    /**
2288
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2289
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2290
     *
2291
     * @return string
2292
     */
2293 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2294
    {
2295 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2296 1
                        \session_id() .
2297 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2298 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2299 1
                        $entropyExtra;
2300
2301 1
        $uniqueString = \uniqid($uniqueHelper, true);
2302
2303 1
        if ($md5) {
2304 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2305
        }
2306
2307 1
        return $uniqueString;
2308
    }
2309
2310
    /**
2311
     * alias for "UTF8::string_has_bom()"
2312
     *
2313
     * @param string $str
2314
     *
2315
     * @return bool
2316
     *
2317
     * @see UTF8::string_has_bom()
2318
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2319
     */
2320 2
    public static function hasBom(string $str): bool
2321
    {
2322 2
        return self::string_has_bom($str);
2323
    }
2324
2325
    /**
2326
     * Returns true if the string contains a lower case char, false otherwise.
2327
     *
2328
     * @param string $str <p>The input string.</p>
2329
     *
2330
     * @return bool whether or not the string contains a lower case character
2331
     */
2332 47
    public static function has_lowercase(string $str): bool
2333
    {
2334 47
        if (self::$SUPPORT['mbstring'] === true) {
2335
            /** @noinspection PhpComposerExtensionStubsInspection */
2336 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2337
        }
2338
2339
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2340
    }
2341
2342
    /**
2343
     * Returns true if the string contains an upper case char, false otherwise.
2344
     *
2345
     * @param string $str <p>The input string.</p>
2346
     *
2347
     * @return bool whether or not the string contains an upper case character
2348
     */
2349 12
    public static function has_uppercase(string $str): bool
2350
    {
2351 12
        if (self::$SUPPORT['mbstring'] === true) {
2352
            /** @noinspection PhpComposerExtensionStubsInspection */
2353 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2354
        }
2355
2356
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2357
    }
2358
2359
    /**
2360
     * Converts a hexadecimal-value into an UTF-8 character.
2361
     *
2362
     * @param string $hexdec <p>The hexadecimal value.</p>
2363
     *
2364
     * @return false|string one single UTF-8 character
2365
     */
2366 4
    public static function hex_to_chr(string $hexdec)
2367
    {
2368 4
        return self::decimal_to_chr(\hexdec($hexdec));
2369
    }
2370
2371
    /**
2372
     * Converts hexadecimal U+xxxx code point representation to integer.
2373
     *
2374
     * INFO: opposite to UTF8::int_to_hex()
2375
     *
2376
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2377
     *
2378
     * @return false|int the code point, or false on failure
2379
     */
2380 2
    public static function hex_to_int($hexDec)
2381
    {
2382
        // init
2383 2
        $hexDec = (string) $hexDec;
2384
2385 2
        if ($hexDec === '') {
2386 2
            return false;
2387
        }
2388
2389 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2390 2
            return \intval($match[1], 16);
2391
        }
2392
2393 2
        return false;
2394
    }
2395
2396
    /**
2397
     * alias for "UTF8::html_entity_decode()"
2398
     *
2399
     * @param string $str
2400
     * @param int    $flags
2401
     * @param string $encoding
2402
     *
2403
     * @return string
2404
     *
2405
     * @see UTF8::html_entity_decode()
2406
     */
2407 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2408
    {
2409 4
        return self::html_entity_decode($str, $flags, $encoding);
2410
    }
2411
2412
    /**
2413
     * Converts a UTF-8 string to a series of HTML numbered entities.
2414
     *
2415
     * INFO: opposite to UTF8::html_decode()
2416
     *
2417
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2418
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2419
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2420
     *
2421
     * @return string HTML numbered entities
2422
     */
2423 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2424
    {
2425 14
        if ($str === '') {
2426 4
            return '';
2427
        }
2428
2429 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2430 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2431
        }
2432
2433
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2434 14
        if (self::$SUPPORT['mbstring'] === true) {
2435 14
            $startCode = 0x00;
2436 14
            if ($keepAsciiChars === true) {
2437 13
                $startCode = 0x80;
2438
            }
2439
2440 14
            if ($encoding === 'UTF-8') {
2441 14
                return \mb_encode_numericentity(
2442 14
                    $str,
2443 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2444
                );
2445
            }
2446
2447 4
            return \mb_encode_numericentity(
2448 4
                $str,
2449 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2450 4
                $encoding
2451
            );
2452
        }
2453
2454
        //
2455
        // fallback via vanilla php
2456
        //
2457
2458
        return \implode(
2459
            '',
2460
            \array_map(
2461
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2462
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2463
                },
2464
                self::str_split($str)
2465
            )
2466
        );
2467
    }
2468
2469
    /**
2470
     * UTF-8 version of html_entity_decode()
2471
     *
2472
     * The reason we are not using html_entity_decode() by itself is because
2473
     * while it is not technically correct to leave out the semicolon
2474
     * at the end of an entity most browsers will still interpret the entity
2475
     * correctly. html_entity_decode() does not convert entities without
2476
     * semicolons, so we are left with our own little solution here. Bummer.
2477
     *
2478
     * Convert all HTML entities to their applicable characters
2479
     *
2480
     * INFO: opposite to UTF8::html_encode()
2481
     *
2482
     * @see http://php.net/manual/en/function.html-entity-decode.php
2483
     *
2484
     * @param string $str      <p>
2485
     *                         The input string.
2486
     *                         </p>
2487
     * @param int    $flags    [optional] <p>
2488
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2489
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2490
     *                         <table>
2491
     *                         Available <i>flags</i> constants
2492
     *                         <tr valign="top">
2493
     *                         <td>Constant Name</td>
2494
     *                         <td>Description</td>
2495
     *                         </tr>
2496
     *                         <tr valign="top">
2497
     *                         <td><b>ENT_COMPAT</b></td>
2498
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                         </tr>
2500
     *                         <tr valign="top">
2501
     *                         <td><b>ENT_QUOTES</b></td>
2502
     *                         <td>Will convert both double and single quotes.</td>
2503
     *                         </tr>
2504
     *                         <tr valign="top">
2505
     *                         <td><b>ENT_NOQUOTES</b></td>
2506
     *                         <td>Will leave both double and single quotes unconverted.</td>
2507
     *                         </tr>
2508
     *                         <tr valign="top">
2509
     *                         <td><b>ENT_HTML401</b></td>
2510
     *                         <td>
2511
     *                         Handle code as HTML 4.01.
2512
     *                         </td>
2513
     *                         </tr>
2514
     *                         <tr valign="top">
2515
     *                         <td><b>ENT_XML1</b></td>
2516
     *                         <td>
2517
     *                         Handle code as XML 1.
2518
     *                         </td>
2519
     *                         </tr>
2520
     *                         <tr valign="top">
2521
     *                         <td><b>ENT_XHTML</b></td>
2522
     *                         <td>
2523
     *                         Handle code as XHTML.
2524
     *                         </td>
2525
     *                         </tr>
2526
     *                         <tr valign="top">
2527
     *                         <td><b>ENT_HTML5</b></td>
2528
     *                         <td>
2529
     *                         Handle code as HTML 5.
2530
     *                         </td>
2531
     *                         </tr>
2532
     *                         </table>
2533
     *                         </p>
2534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2535
     *
2536
     * @return string the decoded string
2537
     */
2538 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2539
    {
2540
        if (
2541 46
            !isset($str[3]) // examples: &; || &x;
2542
            ||
2543 46
            \strpos($str, '&') === false // no "&"
2544
        ) {
2545 23
            return $str;
2546
        }
2547
2548 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2549 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2550
        }
2551
2552 44
        if ($flags === null) {
2553 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2554
        }
2555
2556
        if (
2557 44
            $encoding !== 'UTF-8'
2558
            &&
2559 44
            $encoding !== 'ISO-8859-1'
2560
            &&
2561 44
            $encoding !== 'WINDOWS-1252'
2562
            &&
2563 44
            self::$SUPPORT['mbstring'] === false
2564
        ) {
2565
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2566
        }
2567
2568
        do {
2569 44
            $str_compare = $str;
2570
2571
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2572 44
            if (self::$SUPPORT['mbstring'] === true) {
2573 44
                if ($encoding === 'UTF-8') {
2574 44
                    $str = \mb_decode_numericentity(
2575 44
                        $str,
2576 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2577
                    );
2578
                } else {
2579 4
                    $str = \mb_decode_numericentity(
2580 4
                        $str,
2581 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2582 44
                        $encoding
2583
                    );
2584
                }
2585
            } else {
2586
                $str = (string) \preg_replace_callback(
2587
                    "/&#\d{2,6};/",
2588
                    /**
2589
                     * @param string[] $matches
2590
                     *
2591
                     * @return string
2592
                     */
2593
                    static function (array $matches) use ($encoding): string {
2594
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2595
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2596
                            return $returnTmp;
2597
                        }
2598
2599
                        return $matches[0];
2600
                    },
2601
                    $str
2602
                );
2603
            }
2604
2605 44
            if (\strpos($str, '&') !== false) {
2606 40
                if (\strpos($str, '&#') !== false) {
2607
                    // decode also numeric & UTF16 two byte entities
2608 32
                    $str = (string) \preg_replace(
2609 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2610 32
                        '$1;',
2611 32
                        $str
2612
                    );
2613
                }
2614
2615 40
                $str = \html_entity_decode(
2616 40
                    $str,
2617 40
                    $flags,
2618 40
                    $encoding
2619
                );
2620
            }
2621 44
        } while ($str_compare !== $str);
2622
2623 44
        return $str;
2624
    }
2625
2626
    /**
2627
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2628
     *
2629
     * @param string $str
2630
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2635
    {
2636 6
        return self::htmlspecialchars(
2637 6
            $str,
2638 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2639 6
            $encoding
2640
        );
2641
    }
2642
2643
    /**
2644
     * Remove empty html-tag.
2645
     *
2646
     * e.g.: <tag></tag>
2647
     *
2648
     * @param string $str
2649
     *
2650
     * @return string
2651
     */
2652 1
    public static function html_stripe_empty_tags(string $str): string
2653
    {
2654 1
        return (string) \preg_replace(
2655 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2656 1
            '',
2657 1
            $str
2658
        );
2659
    }
2660
2661
    /**
2662
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2663
     *
2664
     * @see http://php.net/manual/en/function.htmlentities.php
2665
     *
2666
     * @param string $str           <p>
2667
     *                              The input string.
2668
     *                              </p>
2669
     * @param int    $flags         [optional] <p>
2670
     *                              A bitmask of one or more of the following flags, which specify how to handle
2671
     *                              quotes, invalid code unit sequences and the used document type. The default is
2672
     *                              ENT_COMPAT | ENT_HTML401.
2673
     *                              <table>
2674
     *                              Available <i>flags</i> constants
2675
     *                              <tr valign="top">
2676
     *                              <td>Constant Name</td>
2677
     *                              <td>Description</td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_COMPAT</b></td>
2681
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_QUOTES</b></td>
2685
     *                              <td>Will convert both double and single quotes.</td>
2686
     *                              </tr>
2687
     *                              <tr valign="top">
2688
     *                              <td><b>ENT_NOQUOTES</b></td>
2689
     *                              <td>Will leave both double and single quotes unconverted.</td>
2690
     *                              </tr>
2691
     *                              <tr valign="top">
2692
     *                              <td><b>ENT_IGNORE</b></td>
2693
     *                              <td>
2694
     *                              Silently discard invalid code unit sequences instead of returning
2695
     *                              an empty string. Using this flag is discouraged as it
2696
     *                              may have security implications.
2697
     *                              </td>
2698
     *                              </tr>
2699
     *                              <tr valign="top">
2700
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2701
     *                              <td>
2702
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2703
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2704
     *                              string.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_DISALLOWED</b></td>
2709
     *                              <td>
2710
     *                              Replace invalid code points for the given document type with a
2711
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2712
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2713
     *                              instance, to ensure the well-formedness of XML documents with
2714
     *                              embedded external content.
2715
     *                              </td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_HTML401</b></td>
2719
     *                              <td>
2720
     *                              Handle code as HTML 4.01.
2721
     *                              </td>
2722
     *                              </tr>
2723
     *                              <tr valign="top">
2724
     *                              <td><b>ENT_XML1</b></td>
2725
     *                              <td>
2726
     *                              Handle code as XML 1.
2727
     *                              </td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_XHTML</b></td>
2731
     *                              <td>
2732
     *                              Handle code as XHTML.
2733
     *                              </td>
2734
     *                              </tr>
2735
     *                              <tr valign="top">
2736
     *                              <td><b>ENT_HTML5</b></td>
2737
     *                              <td>
2738
     *                              Handle code as HTML 5.
2739
     *                              </td>
2740
     *                              </tr>
2741
     *                              </table>
2742
     *                              </p>
2743
     * @param string $encoding      [optional] <p>
2744
     *                              Like <b>htmlspecialchars</b>,
2745
     *                              <b>htmlentities</b> takes an optional third argument
2746
     *                              <i>encoding</i> which defines encoding used in
2747
     *                              conversion.
2748
     *                              Although this argument is technically optional, you are highly
2749
     *                              encouraged to specify the correct value for your code.
2750
     *                              </p>
2751
     * @param bool   $double_encode [optional] <p>
2752
     *                              When <i>double_encode</i> is turned off PHP will not
2753
     *                              encode existing html entities. The default is to convert everything.
2754
     *                              </p>
2755
     *
2756
     * @return string
2757
     *                <p>
2758
     *                The encoded string.
2759
     *                <br><br>
2760
     *                If the input <i>string</i> contains an invalid code unit
2761
     *                sequence within the given <i>encoding</i> an empty string
2762
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2763
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2764
     *                </p>
2765
     */
2766 9
    public static function htmlentities(
2767
        string $str,
2768
        int $flags = \ENT_COMPAT,
2769
        string $encoding = 'UTF-8',
2770
        bool $double_encode = true
2771
    ): string {
2772 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2773 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2774
        }
2775
2776 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2777
2778
        /**
2779
         * PHP doesn't replace a backslash to its html entity since this is something
2780
         * that's mostly used to escape characters when inserting in a database. Since
2781
         * we're using a decent database layer, we don't need this shit and we're replacing
2782
         * the double backslashes by its' html entity equivalent.
2783
         *
2784
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2785
         */
2786 9
        $str = \str_replace('\\', '&#92;', $str);
2787
2788 9
        return self::html_encode($str, true, $encoding);
2789
    }
2790
2791
    /**
2792
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2793
     *
2794
     * INFO: Take a look at "UTF8::htmlentities()"
2795
     *
2796
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2797
     *
2798
     * @param string $str           <p>
2799
     *                              The string being converted.
2800
     *                              </p>
2801
     * @param int    $flags         [optional] <p>
2802
     *                              A bitmask of one or more of the following flags, which specify how to handle
2803
     *                              quotes, invalid code unit sequences and the used document type. The default is
2804
     *                              ENT_COMPAT | ENT_HTML401.
2805
     *                              <table>
2806
     *                              Available <i>flags</i> constants
2807
     *                              <tr valign="top">
2808
     *                              <td>Constant Name</td>
2809
     *                              <td>Description</td>
2810
     *                              </tr>
2811
     *                              <tr valign="top">
2812
     *                              <td><b>ENT_COMPAT</b></td>
2813
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2814
     *                              </tr>
2815
     *                              <tr valign="top">
2816
     *                              <td><b>ENT_QUOTES</b></td>
2817
     *                              <td>Will convert both double and single quotes.</td>
2818
     *                              </tr>
2819
     *                              <tr valign="top">
2820
     *                              <td><b>ENT_NOQUOTES</b></td>
2821
     *                              <td>Will leave both double and single quotes unconverted.</td>
2822
     *                              </tr>
2823
     *                              <tr valign="top">
2824
     *                              <td><b>ENT_IGNORE</b></td>
2825
     *                              <td>
2826
     *                              Silently discard invalid code unit sequences instead of returning
2827
     *                              an empty string. Using this flag is discouraged as it
2828
     *                              may have security implications.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2833
     *                              <td>
2834
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2835
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2836
     *                              string.
2837
     *                              </td>
2838
     *                              </tr>
2839
     *                              <tr valign="top">
2840
     *                              <td><b>ENT_DISALLOWED</b></td>
2841
     *                              <td>
2842
     *                              Replace invalid code points for the given document type with a
2843
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2844
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2845
     *                              instance, to ensure the well-formedness of XML documents with
2846
     *                              embedded external content.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              <tr valign="top">
2850
     *                              <td><b>ENT_HTML401</b></td>
2851
     *                              <td>
2852
     *                              Handle code as HTML 4.01.
2853
     *                              </td>
2854
     *                              </tr>
2855
     *                              <tr valign="top">
2856
     *                              <td><b>ENT_XML1</b></td>
2857
     *                              <td>
2858
     *                              Handle code as XML 1.
2859
     *                              </td>
2860
     *                              </tr>
2861
     *                              <tr valign="top">
2862
     *                              <td><b>ENT_XHTML</b></td>
2863
     *                              <td>
2864
     *                              Handle code as XHTML.
2865
     *                              </td>
2866
     *                              </tr>
2867
     *                              <tr valign="top">
2868
     *                              <td><b>ENT_HTML5</b></td>
2869
     *                              <td>
2870
     *                              Handle code as HTML 5.
2871
     *                              </td>
2872
     *                              </tr>
2873
     *                              </table>
2874
     *                              </p>
2875
     * @param string $encoding      [optional] <p>
2876
     *                              Defines encoding used in conversion.
2877
     *                              </p>
2878
     *                              <p>
2879
     *                              For the purposes of this function, the encodings
2880
     *                              ISO-8859-1, ISO-8859-15,
2881
     *                              UTF-8, cp866,
2882
     *                              cp1251, cp1252, and
2883
     *                              KOI8-R are effectively equivalent, provided the
2884
     *                              <i>string</i> itself is valid for the encoding, as
2885
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2886
     *                              the same positions in all of these encodings.
2887
     *                              </p>
2888
     * @param bool   $double_encode [optional] <p>
2889
     *                              When <i>double_encode</i> is turned off PHP will not
2890
     *                              encode existing html entities, the default is to convert everything.
2891
     *                              </p>
2892
     *
2893
     * @return string the converted string.
2894
     *                </p>
2895
     *                <p>
2896
     *                If the input <i>string</i> contains an invalid code unit
2897
     *                sequence within the given <i>encoding</i> an empty string
2898
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2899
     *                <b>ENT_SUBSTITUTE</b> flags are set
2900
     */
2901 8
    public static function htmlspecialchars(
2902
        string $str,
2903
        int $flags = \ENT_COMPAT,
2904
        string $encoding = 'UTF-8',
2905
        bool $double_encode = true
2906
    ): string {
2907 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2908 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2909
        }
2910
2911 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2912
    }
2913
2914
    /**
2915
     * Checks whether iconv is available on the server.
2916
     *
2917
     * @return bool
2918
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2919
     */
2920
    public static function iconv_loaded(): bool
2921
    {
2922
        return \extension_loaded('iconv');
2923
    }
2924
2925
    /**
2926
     * alias for "UTF8::decimal_to_chr()"
2927
     *
2928
     * @param mixed $int
2929
     *
2930
     * @return string
2931
     *
2932
     * @see UTF8::decimal_to_chr()
2933
     */
2934 4
    public static function int_to_chr($int): string
2935
    {
2936 4
        return self::decimal_to_chr($int);
2937
    }
2938
2939
    /**
2940
     * Converts Integer to hexadecimal U+xxxx code point representation.
2941
     *
2942
     * INFO: opposite to UTF8::hex_to_int()
2943
     *
2944
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2945
     * @param string $pfix [optional]
2946
     *
2947
     * @return string the code point, or empty string on failure
2948
     */
2949 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2950
    {
2951 6
        $hex = \dechex($int);
2952
2953 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2954
2955 6
        return $pfix . $hex . '';
2956
    }
2957
2958
    /**
2959
     * Checks whether intl-char is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964
    public static function intlChar_loaded(): bool
2965
    {
2966
        return \class_exists('IntlChar');
2967
    }
2968
2969
    /**
2970
     * Checks whether intl is available on the server.
2971
     *
2972
     * @return bool
2973
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2974
     */
2975 5
    public static function intl_loaded(): bool
2976
    {
2977 5
        return \extension_loaded('intl');
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_ascii()"
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @see UTF8::is_ascii()
2988
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2989
     */
2990 2
    public static function isAscii(string $str): bool
2991
    {
2992 2
        return self::is_ascii($str);
2993
    }
2994
2995
    /**
2996
     * alias for "UTF8::is_base64()"
2997
     *
2998
     * @param string $str
2999
     *
3000
     * @return bool
3001
     *
3002
     * @see UTF8::is_base64()
3003
     * @deprecated <p>use "UTF8::is_base64()"</p>
3004
     */
3005 2
    public static function isBase64($str): bool
3006
    {
3007 2
        return self::is_base64($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_binary()"
3012
     *
3013
     * @param mixed $str
3014
     * @param bool  $strict
3015
     *
3016
     * @return bool
3017
     *
3018
     * @see UTF8::is_binary()
3019
     * @deprecated <p>use "UTF8::is_binary()"</p>
3020
     */
3021 4
    public static function isBinary($str, $strict = false): bool
3022
    {
3023 4
        return self::is_binary($str, $strict);
3024
    }
3025
3026
    /**
3027
     * alias for "UTF8::is_bom()"
3028
     *
3029
     * @param string $utf8_chr
3030
     *
3031
     * @return bool
3032
     *
3033
     * @see UTF8::is_bom()
3034
     * @deprecated <p>use "UTF8::is_bom()"</p>
3035
     */
3036 2
    public static function isBom(string $utf8_chr): bool
3037
    {
3038 2
        return self::is_bom($utf8_chr);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_html()"
3043
     *
3044
     * @param string $str
3045
     *
3046
     * @return bool
3047
     *
3048
     * @see UTF8::is_html()
3049
     * @deprecated <p>use "UTF8::is_html()"</p>
3050
     */
3051 2
    public static function isHtml(string $str): bool
3052
    {
3053 2
        return self::is_html($str);
3054
    }
3055
3056
    /**
3057
     * alias for "UTF8::is_json()"
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *
3063
     * @see UTF8::is_json()
3064
     * @deprecated <p>use "UTF8::is_json()"</p>
3065
     */
3066
    public static function isJson(string $str): bool
3067
    {
3068
        return self::is_json($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf16()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,<br>
3078
     *                   <strong>1</strong> for UTF-16LE,<br>
3079
     *                   <strong>2</strong> for UTF-16BE
3080
     *
3081
     * @see UTF8::is_utf16()
3082
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3083
     */
3084 2
    public static function isUtf16($str)
3085
    {
3086 2
        return self::is_utf16($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf32()"
3091
     *
3092
     * @param mixed $str
3093
     *
3094
     * @return false|int
3095
     *                   <strong>false</strong> if is't not UTF16,
3096
     *                   <strong>1</strong> for UTF-32LE,
3097
     *                   <strong>2</strong> for UTF-32BE
3098
     *
3099
     * @see UTF8::is_utf32()
3100
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3101
     */
3102 2
    public static function isUtf32($str)
3103
    {
3104 2
        return self::is_utf32($str);
3105
    }
3106
3107
    /**
3108
     * alias for "UTF8::is_utf8()"
3109
     *
3110
     * @param string $str
3111
     * @param bool   $strict
3112
     *
3113
     * @return bool
3114
     *
3115
     * @see UTF8::is_utf8()
3116
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3117
     */
3118 17
    public static function isUtf8($str, $strict = false): bool
3119
    {
3120 17
        return self::is_utf8($str, $strict);
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphabetic chars
3130
     */
3131 10
    public static function is_alpha(string $str): bool
3132
    {
3133 10
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3139
    }
3140
3141
    /**
3142
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3143
     *
3144
     * @param string $str
3145
     *
3146
     * @return bool
3147
     *              Whether or not $str contains only alphanumeric chars
3148
     */
3149 13
    public static function is_alphanumeric(string $str): bool
3150
    {
3151 13
        if (self::$SUPPORT['mbstring'] === true) {
3152
            /** @noinspection PhpComposerExtensionStubsInspection */
3153 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3154
        }
3155
3156
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3157
    }
3158
3159
    /**
3160
     * Checks if a string is 7 bit ASCII.
3161
     *
3162
     * @param string $str <p>The string to check.</p>
3163
     *
3164
     * @return bool
3165
     *              <strong>true</strong> if it is ASCII<br>
3166
     *              <strong>false</strong> otherwise
3167
     */
3168 137
    public static function is_ascii(string $str): bool
3169
    {
3170 137
        if ($str === '') {
3171 10
            return true;
3172
        }
3173
3174 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3175
    }
3176
3177
    /**
3178
     * Returns true if the string is base64 encoded, false otherwise.
3179
     *
3180
     * @param mixed|string $str                <p>The input string.</p>
3181
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3182
     *
3183
     * @return bool whether or not $str is base64 encoded
3184
     */
3185 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3186
    {
3187 16
        if ($emptyStringIsValid === false && $str === '') {
3188 3
            return false;
3189
        }
3190
3191
        /**
3192
         * @psalm-suppress RedundantConditionGivenDocblockType
3193
         */
3194 15
        if (\is_string($str) === false) {
3195 2
            return false;
3196
        }
3197
3198 15
        $base64String = \base64_decode($str, true);
3199
3200 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3201
    }
3202
3203
    /**
3204
     * Check if the input is binary... (is look like a hack).
3205
     *
3206
     * @param mixed $input
3207
     * @param bool  $strict
3208
     *
3209
     * @return bool
3210
     */
3211 39
    public static function is_binary($input, bool $strict = false): bool
3212
    {
3213 39
        $input = (string) $input;
3214 39
        if ($input === '') {
3215 10
            return false;
3216
        }
3217
3218 39
        if (\preg_match('~^[01]+$~', $input)) {
3219 13
            return true;
3220
        }
3221
3222 39
        $ext = self::get_file_type($input);
3223 39
        if ($ext['type'] === 'binary') {
3224 7
            return true;
3225
        }
3226
3227 36
        $testLength = \strlen($input);
3228 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3229 36
        if (($testNull / $testLength) > 0.25) {
3230 13
            return true;
3231
        }
3232
3233 34
        if ($strict === true) {
3234 34
            if (self::$SUPPORT['finfo'] === false) {
3235
                throw new \RuntimeException('ext-fileinfo: is not installed');
3236
            }
3237
3238
            /** @noinspection PhpComposerExtensionStubsInspection */
3239 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3240 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3241 15
                return true;
3242
            }
3243
        }
3244
3245 30
        return false;
3246
    }
3247
3248
    /**
3249
     * Check if the file is binary.
3250
     *
3251
     * @param string $file
3252
     *
3253
     * @return bool
3254
     */
3255 6
    public static function is_binary_file($file): bool
3256
    {
3257
        // init
3258 6
        $block = '';
3259
3260 6
        $fp = \fopen($file, 'rb');
3261 6
        if (\is_resource($fp)) {
3262 6
            $block = \fread($fp, 512);
3263 6
            \fclose($fp);
3264
        }
3265
3266 6
        if ($block === '') {
3267 2
            return false;
3268
        }
3269
3270 6
        return self::is_binary($block, true);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only whitespace chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only whitespace characters
3280
     */
3281 15
    public static function is_blank(string $str): bool
3282
    {
3283 15
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3289
    }
3290
3291
    /**
3292
     * Checks if the given string is equal to any "Byte Order Mark".
3293
     *
3294
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3295
     *
3296
     * @param string $str <p>The input string.</p>
3297
     *
3298
     * @return bool
3299
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3300
     */
3301 2
    public static function is_bom($str): bool
3302
    {
3303
        /** @noinspection PhpUnusedLocalVariableInspection */
3304 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3305 2
            if ($str === $bomString) {
3306 2
                return true;
3307
            }
3308
        }
3309
3310 2
        return false;
3311
    }
3312
3313
    /**
3314
     * Determine whether the string is considered to be empty.
3315
     *
3316
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3317
     * empty() does not generate a warning if the variable does not exist.
3318
     *
3319
     * @param mixed $str
3320
     *
3321
     * @return bool whether or not $str is empty()
3322
     */
3323
    public static function is_empty($str): bool
3324
    {
3325
        return empty($str);
3326
    }
3327
3328
    /**
3329
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3330
     *
3331
     * @param string $str
3332
     *
3333
     * @return bool
3334
     *              Whether or not $str contains only hexadecimal chars
3335
     */
3336 13
    public static function is_hexadecimal(string $str): bool
3337
    {
3338 13
        if (self::$SUPPORT['mbstring'] === true) {
3339
            /** @noinspection PhpComposerExtensionStubsInspection */
3340 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3341
        }
3342
3343
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3344
    }
3345
3346
    /**
3347
     * Check if the string contains any html-tags <lall>.
3348
     *
3349
     * @param string $str <p>The input string.</p>
3350
     *
3351
     * @return bool
3352
     */
3353 3
    public static function is_html(string $str): bool
3354
    {
3355 3
        if ($str === '') {
3356 3
            return false;
3357
        }
3358
3359
        // init
3360 3
        $matches = [];
3361
3362 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3363
3364 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3365
3366 3
        return \count($matches) !== 0;
3367
    }
3368
3369
    /**
3370
     * Try to check if "$str" is an json-string.
3371
     *
3372
     * @param string $str                              <p>The input string.</p>
3373
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3374
     *
3375
     * @return bool
3376
     */
3377 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3378
    {
3379 42
        if ($str === '') {
3380 4
            return false;
3381
        }
3382
3383 40
        if (self::$SUPPORT['json'] === false) {
3384
            throw new \RuntimeException('ext-json: is not installed');
3385
        }
3386
3387 40
        $json = self::json_decode($str);
3388 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3389 18
            return false;
3390
        }
3391
3392
        if (
3393 24
            $onlyArrayOrObjectResultsAreValid === true
3394
            &&
3395 24
            \is_object($json) === false
3396
            &&
3397 24
            \is_array($json) === false
3398
        ) {
3399 5
            return false;
3400
        }
3401
3402
        /** @noinspection PhpComposerExtensionStubsInspection */
3403 19
        return \json_last_error() === \JSON_ERROR_NONE;
3404
    }
3405
3406
    /**
3407
     * @param string $str
3408
     *
3409
     * @return bool
3410
     */
3411 8
    public static function is_lowercase(string $str): bool
3412
    {
3413 8
        if (self::$SUPPORT['mbstring'] === true) {
3414
            /** @noinspection PhpComposerExtensionStubsInspection */
3415 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3416
        }
3417
3418
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3419
    }
3420
3421
    /**
3422
     * Returns true if the string is serialized, false otherwise.
3423
     *
3424
     * @param string $str
3425
     *
3426
     * @return bool whether or not $str is serialized
3427
     */
3428 7
    public static function is_serialized(string $str): bool
3429
    {
3430 7
        if ($str === '') {
3431 1
            return false;
3432
        }
3433
3434
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3435
        /** @noinspection UnserializeExploitsInspection */
3436 6
        return $str === 'b:0;'
3437
               ||
3438 6
               @\unserialize($str) !== false;
3439
    }
3440
3441
    /**
3442
     * Returns true if the string contains only lower case chars, false
3443
     * otherwise.
3444
     *
3445
     * @param string $str <p>The input string.</p>
3446
     *
3447
     * @return bool
3448
     *              Whether or not $str contains only lower case characters
3449
     */
3450 8
    public static function is_uppercase(string $str): bool
3451
    {
3452 8
        if (self::$SUPPORT['mbstring'] === true) {
3453
            /** @noinspection PhpComposerExtensionStubsInspection */
3454 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3455
        }
3456
3457
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3458
    }
3459
3460
    /**
3461
     * Check if the string is UTF-16.
3462
     *
3463
     * @param mixed $str                   <p>The input string.</p>
3464
     * @param bool  $checkIfStringIsBinary
3465
     *
3466
     * @return false|int
3467
     *                   <strong>false</strong> if is't not UTF-16,<br>
3468
     *                   <strong>1</strong> for UTF-16LE,<br>
3469
     *                   <strong>2</strong> for UTF-16BE
3470
     */
3471 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3472
    {
3473
        // init
3474 22
        $str = (string) $str;
3475 22
        $strChars = [];
3476
3477
        if (
3478 22
            $checkIfStringIsBinary === true
3479
            &&
3480 22
            self::is_binary($str, true) === false
3481
        ) {
3482 2
            return false;
3483
        }
3484
3485 22
        if (self::$SUPPORT['mbstring'] === false) {
3486 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3487
        }
3488
3489 22
        $str = self::remove_bom($str);
3490
3491 22
        $maybeUTF16LE = 0;
3492 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3493 22
        if ($test) {
3494 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3495 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3496 15
            if ($test3 === $test) {
3497 15
                if (\count($strChars) === 0) {
3498 15
                    $strChars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $strChars, true) === true) {
3502 15
                        ++$maybeUTF16LE;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybeUTF16BE = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if (\count($strChars) === 0) {
3516 7
                    $strChars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $strChars, true) === true) {
3520 15
                        ++$maybeUTF16BE;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3528 7
            if ($maybeUTF16LE > $maybeUTF16BE) {
3529 5
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                   <p>The input string.</p>
3542
     * @param bool  $checkIfStringIsBinary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 20
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3550
    {
3551
        // init
3552 20
        $str = (string) $str;
3553 20
        $strChars = [];
3554
3555
        if (
3556 20
            $checkIfStringIsBinary === true
3557
            &&
3558 20
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 20
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 20
        $str = self::remove_bom($str);
3568
3569 20
        $maybeUTF32LE = 0;
3570 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 20
        if ($test) {
3572 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 13
            if ($test3 === $test) {
3575 13
                if (\count($strChars) === 0) {
3576 13
                    $strChars = self::count_chars($str, true, false);
3577
                }
3578 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3579 13
                    if (\in_array($test3char, $strChars, true) === true) {
3580 13
                        ++$maybeUTF32LE;
3581
                    }
3582
                }
3583 13
                unset($test3charEmpty);
3584
            }
3585
        }
3586
3587 20
        $maybeUTF32BE = 0;
3588 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3589 20
        if ($test) {
3590 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3591 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3592 13
            if ($test3 === $test) {
3593 13
                if (\count($strChars) === 0) {
3594 7
                    $strChars = self::count_chars($str, true, false);
3595
                }
3596 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3597 13
                    if (\in_array($test3char, $strChars, true) === true) {
3598 13
                        ++$maybeUTF32BE;
3599
                    }
3600
                }
3601 13
                unset($test3charEmpty);
3602
            }
3603
        }
3604
3605 20
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3606 3
            if ($maybeUTF32LE > $maybeUTF32BE) {
3607 2
                return 1;
3608
            }
3609
3610 3
            return 2;
3611
        }
3612
3613 20
        return false;
3614
    }
3615
3616
    /**
3617
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3618
     *
3619
     * @see http://hsivonen.iki.fi/php-utf8/
3620
     *
3621
     * @param string $str    <p>The string to be checked.</p>
3622
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3623
     *
3624
     * @return bool
3625
     */
3626 108
    private static function is_utf8_string(string $str, bool $strict = false): bool
3627
    {
3628 108
        if ($str === '') {
3629 14
            return true;
3630
        }
3631
3632 102
        if ($strict === true) {
3633 2
            $isBinary = self::is_binary($str, true);
3634
3635 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3636 2
                return false;
3637
            }
3638
3639
            if ($isBinary && self::is_utf32($str, false) !== false) {
3640
                return false;
3641
            }
3642
        }
3643
3644 102
        if (self::pcre_utf8_support() !== true) {
3645
            // If even just the first character can be matched, when the /u
3646
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3647
            // invalid, nothing at all will match, even if the string contains
3648
            // some valid sequences
3649
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3650
        }
3651
3652 102
        $mState = 0; // cached expected number of octets after the current octet
3653
        // until the beginning of the next UTF8 character sequence
3654 102
        $mUcs4 = 0; // cached Unicode character
3655 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3656
3657 102
        if (self::$ORD === null) {
3658
            self::$ORD = self::getData('ord');
3659
        }
3660
3661 102
        $len = \strlen((string) $str);
3662
        /** @noinspection ForeachInvariantsInspection */
3663 102
        for ($i = 0; $i < $len; ++$i) {
3664 102
            $in = self::$ORD[$str[$i]];
3665
3666 102
            if ($mState === 0) {
3667
                // When mState is zero we expect either a US-ASCII character or a
3668
                // multi-octet sequence.
3669 102
                if ((0x80 & $in) === 0) {
3670
                    // US-ASCII, pass straight through.
3671 97
                    $mBytes = 1;
3672 83
                } elseif ((0xE0 & $in) === 0xC0) {
3673
                    // First octet of 2 octet sequence.
3674 73
                    $mUcs4 = $in;
3675 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3676 73
                    $mState = 1;
3677 73
                    $mBytes = 2;
3678 58
                } elseif ((0xF0 & $in) === 0xE0) {
3679
                    // First octet of 3 octet sequence.
3680 42
                    $mUcs4 = $in;
3681 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3682 42
                    $mState = 2;
3683 42
                    $mBytes = 3;
3684 29
                } elseif ((0xF8 & $in) === 0xF0) {
3685
                    // First octet of 4 octet sequence.
3686 18
                    $mUcs4 = $in;
3687 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3688 18
                    $mState = 3;
3689 18
                    $mBytes = 4;
3690 13
                } elseif ((0xFC & $in) === 0xF8) {
3691
                    /* First octet of 5 octet sequence.
3692
                     *
3693
                     * This is illegal because the encoded codepoint must be either
3694
                     * (a) not the shortest form or
3695
                     * (b) outside the Unicode range of 0-0x10FFFF.
3696
                     * Rather than trying to resynchronize, we will carry on until the end
3697
                     * of the sequence and let the later error handling code catch it.
3698
                     */
3699 5
                    $mUcs4 = $in;
3700 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3701 5
                    $mState = 4;
3702 5
                    $mBytes = 5;
3703 10
                } elseif ((0xFE & $in) === 0xFC) {
3704
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3705 5
                    $mUcs4 = $in;
3706 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3707 5
                    $mState = 5;
3708 5
                    $mBytes = 6;
3709
                } else {
3710
                    // Current octet is neither in the US-ASCII range nor a legal first
3711
                    // octet of a multi-octet sequence.
3712 102
                    return false;
3713
                }
3714 83
            } elseif ((0xC0 & $in) === 0x80) {
3715
3716
                // When mState is non-zero, we expect a continuation of the multi-octet
3717
                // sequence
3718
3719
                // Legal continuation.
3720 75
                $shift = ($mState - 1) * 6;
3721 75
                $tmp = $in;
3722 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3723 75
                $mUcs4 |= $tmp;
3724
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3725
                // Unicode code point to be output.
3726 75
                if (--$mState === 0) {
3727
                    // Check for illegal sequences and code points.
3728
                    //
3729
                    // From Unicode 3.1, non-shortest form is illegal
3730
                    if (
3731 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3732
                        ||
3733 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3734
                        ||
3735 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3736
                        ||
3737 75
                        ($mBytes > 4)
3738
                        ||
3739
                        // From Unicode 3.2, surrogate characters are illegal.
3740 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3741
                        ||
3742
                        // Code points outside the Unicode range are illegal.
3743 75
                        ($mUcs4 > 0x10FFFF)
3744
                    ) {
3745 8
                        return false;
3746
                    }
3747
                    // initialize UTF8 cache
3748 75
                    $mState = 0;
3749 75
                    $mUcs4 = 0;
3750 75
                    $mBytes = 1;
3751
                }
3752
            } else {
3753
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3754
                // Incomplete multi-octet sequence.
3755 35
                return false;
3756
            }
3757
        }
3758
3759 67
        return true;
3760
    }
3761
3762
    /**
3763
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3764
     *
3765
     * @param null|int|string|string[] $str <p>The input to be checked.</p>
3766
     * @param bool                     $strict  <p>Check also if the string is not UTF-16 or UTF-32.</p>
3767
     *
3768
     * @return bool
3769
     */
3770 82
    public static function is_utf8($str, bool $strict = false): bool
3771
    {
3772 82
        if (\is_array($str) === true) {
3773 2
            foreach ($str as &$v) {
3774 2
                if (self::is_utf8($v, $strict) === false) {
3775 2
                    return false;
3776
                }
3777
            }
3778
3779
            return true;
3780
        }
3781
3782 82
        return self::is_utf8_string((string)$str, $strict);
3783
    }
3784
3785
    /**
3786
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3787
     * Decodes a JSON string
3788
     *
3789
     * @see http://php.net/manual/en/function.json-decode.php
3790
     *
3791
     * @param string $json    <p>
3792
     *                        The <i>json</i> string being decoded.
3793
     *                        </p>
3794
     *                        <p>
3795
     *                        This function only works with UTF-8 encoded strings.
3796
     *                        </p>
3797
     *                        <p>PHP implements a superset of
3798
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3799
     *                        only supports these values when they are nested inside an array or an object.
3800
     *                        </p>
3801
     * @param bool   $assoc   [optional] <p>
3802
     *                        When <b>TRUE</b>, returned objects will be converted into
3803
     *                        associative arrays.
3804
     *                        </p>
3805
     * @param int    $depth   [optional] <p>
3806
     *                        User specified recursion depth.
3807
     *                        </p>
3808
     * @param int    $options [optional] <p>
3809
     *                        Bitmask of JSON decode options. Currently only
3810
     *                        <b>JSON_BIGINT_AS_STRING</b>
3811
     *                        is supported (default is to cast large integers as floats)
3812
     *                        </p>
3813
     *
3814
     * @return mixed
3815
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3816
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3817
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3818
     *               is deeper than the recursion limit.
3819
     */
3820 43
    public static function json_decode(
3821
        string $json,
3822
        bool $assoc = false,
3823
        int $depth = 512,
3824
        int $options = 0
3825
    ) {
3826 43
        $json = self::filter($json);
3827
3828 43
        if (self::$SUPPORT['json'] === false) {
3829
            throw new \RuntimeException('ext-json: is not installed');
3830
        }
3831
3832
        /** @noinspection PhpComposerExtensionStubsInspection */
3833 43
        return \json_decode($json, $assoc, $depth, $options);
3834
    }
3835
3836
    /**
3837
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3838
     * Returns the JSON representation of a value.
3839
     *
3840
     * @see http://php.net/manual/en/function.json-encode.php
3841
     *
3842
     * @param mixed $value   <p>
3843
     *                       The <i>value</i> being encoded. Can be any type except
3844
     *                       a resource.
3845
     *                       </p>
3846
     *                       <p>
3847
     *                       All string data must be UTF-8 encoded.
3848
     *                       </p>
3849
     *                       <p>PHP implements a superset of
3850
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3851
     *                       only supports these values when they are nested inside an array or an object.
3852
     *                       </p>
3853
     * @param int   $options [optional] <p>
3854
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3855
     *                       <b>JSON_HEX_TAG</b>,
3856
     *                       <b>JSON_HEX_AMP</b>,
3857
     *                       <b>JSON_HEX_APOS</b>,
3858
     *                       <b>JSON_NUMERIC_CHECK</b>,
3859
     *                       <b>JSON_PRETTY_PRINT</b>,
3860
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3861
     *                       <b>JSON_FORCE_OBJECT</b>,
3862
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3863
     *                       constants is described on
3864
     *                       the JSON constants page.
3865
     *                       </p>
3866
     * @param int   $depth   [optional] <p>
3867
     *                       Set the maximum depth. Must be greater than zero.
3868
     *                       </p>
3869
     *
3870
     * @return false|string
3871
     *                      A JSON encoded <strong>string</strong> on success or<br>
3872
     *                      <strong>FALSE</strong> on failure
3873
     */
3874 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3875
    {
3876 5
        $value = self::filter($value);
3877
3878 5
        if (self::$SUPPORT['json'] === false) {
3879
            throw new \RuntimeException('ext-json: is not installed');
3880
        }
3881
3882
        /** @noinspection PhpComposerExtensionStubsInspection */
3883 5
        return \json_encode($value, $options, $depth);
3884
    }
3885
3886
    /**
3887
     * Checks whether JSON is available on the server.
3888
     *
3889
     * @return bool
3890
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3891
     */
3892
    public static function json_loaded(): bool
3893
    {
3894
        return \function_exists('json_decode');
3895
    }
3896
3897
    /**
3898
     * Makes string's first char lowercase.
3899
     *
3900
     * @param string      $str                   <p>The input string</p>
3901
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3902
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3903
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3904
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3905
     *
3906
     * @return string the resulting string
3907
     */
3908 46
    public static function lcfirst(
3909
        string $str,
3910
        string $encoding = 'UTF-8',
3911
        bool $cleanUtf8 = false,
3912
        string $lang = null,
3913
        bool $tryToKeepStringLength = false
3914
    ): string {
3915 46
        if ($cleanUtf8 === true) {
3916
            $str = self::clean($str);
3917
        }
3918
3919 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3920
3921 46
        if ($encoding === 'UTF-8') {
3922 43
            $strPartTwo = (string) \mb_substr($str, 1);
3923
3924 43
            if ($useMbFunction === true) {
3925 43
                $strPartOne = \mb_strtolower(
3926 43
                    (string) \mb_substr($str, 0, 1)
3927
                );
3928
            } else {
3929
                $strPartOne = self::strtolower(
3930
                    (string) \mb_substr($str, 0, 1),
3931
                    $encoding,
3932
                    false,
3933
                    $lang,
3934 43
                    $tryToKeepStringLength
3935
                );
3936
            }
3937
        } else {
3938 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3939
3940 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3941
3942 3
            $strPartOne = self::strtolower(
3943 3
                (string) self::substr($str, 0, 1, $encoding),
3944 3
                $encoding,
3945 3
                false,
3946 3
                $lang,
3947 3
                $tryToKeepStringLength
3948
            );
3949
        }
3950
3951 46
        return $strPartOne . $strPartTwo;
3952
    }
3953
3954
    /**
3955
     * alias for "UTF8::lcfirst()"
3956
     *
3957
     * @param string      $str
3958
     * @param string      $encoding
3959
     * @param bool        $cleanUtf8
3960
     * @param string|null $lang
3961
     * @param bool        $tryToKeepStringLength
3962
     *
3963
     * @return string
3964
     *
3965
     * @see UTF8::lcfirst()
3966
     */
3967 2
    public static function lcword(
3968
        string $str,
3969
        string $encoding = 'UTF-8',
3970
        bool $cleanUtf8 = false,
3971
        string $lang = null,
3972
        bool $tryToKeepStringLength = false
3973
    ): string {
3974 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3975
    }
3976
3977
    /**
3978
     * Lowercase for all words in the string.
3979
     *
3980
     * @param string      $str                   <p>The input string.</p>
3981
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3982
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3983
     *                                           a new word.</p>
3984
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3985
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3986
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3987
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3988
     *
3989
     * @return string
3990
     */
3991 2
    public static function lcwords(
3992
        string $str,
3993
        array $exceptions = [],
3994
        string $charlist = '',
3995
        string $encoding = 'UTF-8',
3996
        bool $cleanUtf8 = false,
3997
        string $lang = null,
3998
        bool $tryToKeepStringLength = false
3999
    ): string {
4000 2
        if (!$str) {
4001 2
            return '';
4002
        }
4003
4004 2
        $words = self::str_to_words($str, $charlist);
4005 2
        $useExceptions = \count($exceptions) > 0;
4006
4007 2
        foreach ($words as &$word) {
4008 2
            if (!$word) {
4009 2
                continue;
4010
            }
4011
4012
            if (
4013 2
                $useExceptions === false
4014
                ||
4015 2
                !\in_array($word, $exceptions, true)
4016
            ) {
4017 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4018
            }
4019
        }
4020
4021 2
        return \implode('', $words);
4022
    }
4023
4024
    /**
4025
     * alias for "UTF8::lcfirst()"
4026
     *
4027
     * @param string      $str
4028
     * @param string      $encoding
4029
     * @param bool        $cleanUtf8
4030
     * @param string|null $lang
4031
     * @param bool        $tryToKeepStringLength
4032
     *
4033
     * @return string
4034
     *
4035
     * @see UTF8::lcfirst()
4036
     */
4037 5
    public static function lowerCaseFirst(
4038
        string $str,
4039
        string $encoding = 'UTF-8',
4040
        bool $cleanUtf8 = false,
4041
        string $lang = null,
4042
        bool $tryToKeepStringLength = false
4043
    ): string {
4044 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4045
    }
4046
4047
    /**
4048
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4049
     *
4050
     * @param string      $str   <p>The string to be trimmed</p>
4051
     * @param string|null $chars <p>Optional characters to be stripped</p>
4052
     *
4053
     * @return string the string with unwanted characters stripped from the left
4054
     */
4055 22
    public static function ltrim(string $str = '', string $chars = null): string
4056
    {
4057 22
        if ($str === '') {
4058 3
            return '';
4059
        }
4060
4061 21
        if ($chars) {
4062 10
            $chars = \preg_quote($chars, '/');
4063 10
            $pattern = "^[${chars}]+";
4064
        } else {
4065
            $pattern = '^[\\s]+';
4066
        }
4067
4068
        if (self::$SUPPORT['mbstring'] === true) {
4069
            /** @noinspection PhpComposerExtensionStubsInspection */
4070
            return (string) \mb_ereg_replace($pattern, '', $str);
4071
        }
4072
4073
        return self::regex_replace($str, $pattern, '', '', '/');
4074
    }
4075
4076
    /**
4077
     * Returns the UTF-8 character with the maximum code point in the given data.
4078
     *
4079
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4080
     *
4081
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4082
     */
4083
    public static function max($arg)
4084
    {
4085 2
        if (\is_array($arg) === true) {
4086 2
            $arg = \implode('', $arg);
4087
        }
4088
4089 2
        $codepoints = self::codepoints($arg, false);
4090 2
        if (\count($codepoints) === 0) {
4091 2
            return null;
4092
        }
4093
4094 2
        $codepoint_max = \max($codepoints);
4095
4096 2
        return self::chr($codepoint_max);
4097
    }
4098
4099
    /**
4100
     * Calculates and returns the maximum number of bytes taken by any
4101
     * UTF-8 encoded character in the given string.
4102
     *
4103
     * @param string $str <p>The original Unicode string.</p>
4104
     *
4105
     * @return int max byte lengths of the given chars
4106
     */
4107
    public static function max_chr_width(string $str): int
4108
    {
4109 2
        $bytes = self::chr_size_list($str);
4110 2
        if (\count($bytes) > 0) {
4111 2
            return (int) \max($bytes);
4112
        }
4113
4114 2
        return 0;
4115
    }
4116
4117
    /**
4118
     * Checks whether mbstring is available on the server.
4119
     *
4120
     * @return bool
4121
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4122
     */
4123
    public static function mbstring_loaded(): bool
4124
    {
4125 27
        return \extension_loaded('mbstring');
4126
    }
4127
4128
    /**
4129
     * Returns the UTF-8 character with the minimum code point in the given data.
4130
     *
4131
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4132
     *
4133
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4134
     */
4135
    public static function min($arg)
4136
    {
4137 2
        if (\is_array($arg) === true) {
4138 2
            $arg = \implode('', $arg);
4139
        }
4140
4141 2
        $codepoints = self::codepoints($arg, false);
4142 2
        if (\count($codepoints) === 0) {
4143 2
            return null;
4144
        }
4145
4146 2
        $codepoint_min = \min($codepoints);
4147
4148 2
        return self::chr($codepoint_min);
4149
    }
4150
4151
    /**
4152
     * alias for "UTF8::normalize_encoding()"
4153
     *
4154
     * @param mixed $encoding
4155
     * @param mixed $fallback
4156
     *
4157
     * @return mixed
4158
     *
4159
     * @see UTF8::normalize_encoding()
4160
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4161
     */
4162
    public static function normalizeEncoding($encoding, $fallback = '')
4163
    {
4164 2
        return self::normalize_encoding($encoding, $fallback);
4165
    }
4166
4167
    /**
4168
     * Normalize the encoding-"name" input.
4169
     *
4170
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4171
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4172
     *
4173
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4174
     */
4175
    public static function normalize_encoding($encoding, $fallback = '')
4176
    {
4177 327
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4178
4179
        // init
4180 327
        $encoding = (string) $encoding;
4181
4182 327
        if (!$encoding) {
4183 281
            return $fallback;
4184
        }
4185
4186
        if (
4187 51
            $encoding === 'UTF-8'
4188
            ||
4189 51
            $encoding === 'UTF8'
4190
        ) {
4191 26
            return 'UTF-8';
4192
        }
4193
4194
        if (
4195 43
            $encoding === '8BIT'
4196
            ||
4197 43
            $encoding === 'BINARY'
4198
        ) {
4199
            return 'CP850';
4200
        }
4201
4202
        if (
4203 43
            $encoding === 'HTML'
4204
            ||
4205 43
            $encoding === 'HTML-ENTITIES'
4206
        ) {
4207 2
            return 'HTML-ENTITIES';
4208
        }
4209
4210
        if (
4211 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4212
            ||
4213 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4214
        ) {
4215 1
            return $fallback;
4216
        }
4217
4218 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4219 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4220
        }
4221
4222 6
        if (self::$ENCODINGS === null) {
4223 1
            self::$ENCODINGS = self::getData('encodings');
4224
        }
4225
4226 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4227 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4228
4229 4
            return $encoding;
4230
        }
4231
4232 5
        $encodingOrig = $encoding;
4233 5
        $encoding = \strtoupper($encoding);
4234 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4235
4236
        $equivalences = [
4237 5
            'ISO8859'     => 'ISO-8859-1',
4238
            'ISO88591'    => 'ISO-8859-1',
4239
            'ISO'         => 'ISO-8859-1',
4240
            'LATIN'       => 'ISO-8859-1',
4241
            'LATIN1'      => 'ISO-8859-1', // Western European
4242
            'ISO88592'    => 'ISO-8859-2',
4243
            'LATIN2'      => 'ISO-8859-2', // Central European
4244
            'ISO88593'    => 'ISO-8859-3',
4245
            'LATIN3'      => 'ISO-8859-3', // Southern European
4246
            'ISO88594'    => 'ISO-8859-4',
4247
            'LATIN4'      => 'ISO-8859-4', // Northern European
4248
            'ISO88595'    => 'ISO-8859-5',
4249
            'ISO88596'    => 'ISO-8859-6', // Greek
4250
            'ISO88597'    => 'ISO-8859-7',
4251
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4252
            'ISO88599'    => 'ISO-8859-9',
4253
            'LATIN5'      => 'ISO-8859-9', // Turkish
4254
            'ISO885911'   => 'ISO-8859-11',
4255
            'TIS620'      => 'ISO-8859-11', // Thai
4256
            'ISO885910'   => 'ISO-8859-10',
4257
            'LATIN6'      => 'ISO-8859-10', // Nordic
4258
            'ISO885913'   => 'ISO-8859-13',
4259
            'LATIN7'      => 'ISO-8859-13', // Baltic
4260
            'ISO885914'   => 'ISO-8859-14',
4261
            'LATIN8'      => 'ISO-8859-14', // Celtic
4262
            'ISO885915'   => 'ISO-8859-15',
4263
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4264
            'ISO885916'   => 'ISO-8859-16',
4265
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4266
            'CP1250'      => 'WINDOWS-1250',
4267
            'WIN1250'     => 'WINDOWS-1250',
4268
            'WINDOWS1250' => 'WINDOWS-1250',
4269
            'CP1251'      => 'WINDOWS-1251',
4270
            'WIN1251'     => 'WINDOWS-1251',
4271
            'WINDOWS1251' => 'WINDOWS-1251',
4272
            'CP1252'      => 'WINDOWS-1252',
4273
            'WIN1252'     => 'WINDOWS-1252',
4274
            'WINDOWS1252' => 'WINDOWS-1252',
4275
            'CP1253'      => 'WINDOWS-1253',
4276
            'WIN1253'     => 'WINDOWS-1253',
4277
            'WINDOWS1253' => 'WINDOWS-1253',
4278
            'CP1254'      => 'WINDOWS-1254',
4279
            'WIN1254'     => 'WINDOWS-1254',
4280
            'WINDOWS1254' => 'WINDOWS-1254',
4281
            'CP1255'      => 'WINDOWS-1255',
4282
            'WIN1255'     => 'WINDOWS-1255',
4283
            'WINDOWS1255' => 'WINDOWS-1255',
4284
            'CP1256'      => 'WINDOWS-1256',
4285
            'WIN1256'     => 'WINDOWS-1256',
4286
            'WINDOWS1256' => 'WINDOWS-1256',
4287
            'CP1257'      => 'WINDOWS-1257',
4288
            'WIN1257'     => 'WINDOWS-1257',
4289
            'WINDOWS1257' => 'WINDOWS-1257',
4290
            'CP1258'      => 'WINDOWS-1258',
4291
            'WIN1258'     => 'WINDOWS-1258',
4292
            'WINDOWS1258' => 'WINDOWS-1258',
4293
            'UTF16'       => 'UTF-16',
4294
            'UTF32'       => 'UTF-32',
4295
            'UTF8'        => 'UTF-8',
4296
            'UTF'         => 'UTF-8',
4297
            'UTF7'        => 'UTF-7',
4298
            '8BIT'        => 'CP850',
4299
            'BINARY'      => 'CP850',
4300
        ];
4301
4302 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4303 4
            $encoding = $equivalences[$encodingUpperHelper];
4304
        }
4305
4306 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4307
4308 5
        return $encoding;
4309
    }
4310
4311
    /**
4312
     * Standardize line ending to unix-like.
4313
     *
4314
     * @param string $str
4315
     *
4316
     * @return string
4317
     */
4318
    public static function normalize_line_ending(string $str): string
4319
    {
4320 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4321
    }
4322
4323
    /**
4324
     * Normalize some MS Word special characters.
4325
     *
4326
     * @param string $str <p>The string to be normalized.</p>
4327
     *
4328
     * @return string
4329
     */
4330
    public static function normalize_msword(string $str): string
4331
    {
4332 38
        if ($str === '') {
4333 2
            return '';
4334
        }
4335
4336
        $keys = [
4337 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4338
            "\xc2\xbb", // » (U+00BB) in UTF-8
4339
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4340
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4341
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4342
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4343
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4344
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4345
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4346
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4347
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4348
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4349
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4350
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4351
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4352
        ];
4353
4354
        $values = [
4355 38
            '"', // « (U+00AB) in UTF-8
4356
            '"', // » (U+00BB) in UTF-8
4357
            "'", // ‘ (U+2018) in UTF-8
4358
            "'", // ’ (U+2019) in UTF-8
4359
            "'", // ‚ (U+201A) in UTF-8
4360
            "'", // ‛ (U+201B) in UTF-8
4361
            '"', // “ (U+201C) in UTF-8
4362
            '"', // ” (U+201D) in UTF-8
4363
            '"', // „ (U+201E) in UTF-8
4364
            '"', // ‟ (U+201F) in UTF-8
4365
            "'", // ‹ (U+2039) in UTF-8
4366
            "'", // › (U+203A) in UTF-8
4367
            '-', // – (U+2013) in UTF-8
4368
            '-', // — (U+2014) in UTF-8
4369
            '...', // … (U+2026) in UTF-8
4370
        ];
4371
4372 38
        return \str_replace($keys, $values, $str);
4373
    }
4374
4375
    /**
4376
     * Normalize the whitespace.
4377
     *
4378
     * @param string $str                     <p>The string to be normalized.</p>
4379
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4380
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4381
     *                                        bidirectional text chars.</p>
4382
     *
4383
     * @return string
4384
     */
4385
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4386
    {
4387 88
        if ($str === '') {
4388 9
            return '';
4389
        }
4390
4391 88
        static $WHITESPACE_CACHE = [];
4392 88
        $cacheKey = (int) $keepNonBreakingSpace;
4393
4394 88
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4395 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4396
4397 2
            if ($keepNonBreakingSpace === true) {
4398 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4399
            }
4400
4401 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4402
        }
4403
4404 88
        if ($keepBidiUnicodeControls === false) {
4405 88
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4406
4407 88
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4408 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4409
            }
4410
4411 88
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4412
        }
4413
4414 88
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4415
    }
4416
4417
    /**
4418
     * Calculates Unicode code point of the given UTF-8 encoded character.
4419
     *
4420
     * INFO: opposite to UTF8::chr()
4421
     *
4422
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4423
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4424
     *
4425
     * @return int
4426
     *             Unicode code point of the given character,<br>
4427
     *             0 on invalid UTF-8 byte sequence
4428
     */
4429
    public static function ord($chr, string $encoding = 'UTF-8'): int
4430
    {
4431 30
        static $CHAR_CACHE = [];
4432
4433
        // init
4434 30
        $chr = (string) $chr;
4435
4436 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4437 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4438
        }
4439
4440 30
        $cacheKey = $chr . $encoding;
4441 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4442 30
            return $CHAR_CACHE[$cacheKey];
4443
        }
4444
4445
        // check again, if it's still not UTF-8
4446 12
        if ($encoding !== 'UTF-8') {
4447 3
            $chr = self::encode($encoding, $chr);
4448
        }
4449
4450 12
        if (self::$ORD === null) {
4451
            self::$ORD = self::getData('ord');
4452
        }
4453
4454 12
        if (isset(self::$ORD[$chr])) {
4455 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4456
        }
4457
4458
        //
4459
        // fallback via "IntlChar"
4460
        //
4461
4462 6
        if (self::$SUPPORT['intlChar'] === true) {
4463
            /** @noinspection PhpComposerExtensionStubsInspection */
4464 5
            $code = \IntlChar::ord($chr);
4465 5
            if ($code) {
4466 5
                return $CHAR_CACHE[$cacheKey] = $code;
4467
            }
4468
        }
4469
4470
        //
4471
        // fallback via vanilla php
4472
        //
4473
4474
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4475 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4476
        /** @noinspection OffsetOperationsInspection */
4477 1
        $code = $chr ? $chr[1] : 0;
4478
4479
        /** @noinspection OffsetOperationsInspection */
4480 1
        if ($code >= 0xF0 && isset($chr[4])) {
4481
            /** @noinspection UnnecessaryCastingInspection */
4482
            /** @noinspection OffsetOperationsInspection */
4483
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4484
        }
4485
4486
        /** @noinspection OffsetOperationsInspection */
4487 1
        if ($code >= 0xE0 && isset($chr[3])) {
4488
            /** @noinspection UnnecessaryCastingInspection */
4489
            /** @noinspection OffsetOperationsInspection */
4490 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4491
        }
4492
4493
        /** @noinspection OffsetOperationsInspection */
4494 1
        if ($code >= 0xC0 && isset($chr[2])) {
4495
            /** @noinspection UnnecessaryCastingInspection */
4496
            /** @noinspection OffsetOperationsInspection */
4497 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4498
        }
4499
4500
        return $CHAR_CACHE[$cacheKey] = $code;
4501
    }
4502
4503
    /**
4504
     * Parses the string into an array (into the the second parameter).
4505
     *
4506
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4507
     *          if the second parameter is not set!
4508
     *
4509
     * @see http://php.net/manual/en/function.parse-str.php
4510
     *
4511
     * @param string $str       <p>The input string.</p>
4512
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4513
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4514
     *
4515
     * @return bool
4516
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4517
     */
4518
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4519
    {
4520 2
        if ($cleanUtf8 === true) {
4521 2
            $str = self::clean($str);
4522
        }
4523
4524 2
        if (self::$SUPPORT['mbstring'] === true) {
4525 2
            $return = \mb_parse_str($str, $result);
4526
4527 2
            return $return !== false && $result !== [];
4528
        }
4529
4530
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4531
        \parse_str($str, $result);
4532
4533
        return $result !== [];
4534
    }
4535
4536
    /**
4537
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4538
     *
4539
     * @return bool
4540
     *              <strong>true</strong> if support is available,<br>
4541
     *              <strong>false</strong> otherwise
4542
     */
4543
    public static function pcre_utf8_support(): bool
4544
    {
4545
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4546 102
        return (bool) @\preg_match('//u', '');
4547
    }
4548
4549
    /**
4550
     * Create an array containing a range of UTF-8 characters.
4551
     *
4552
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4553
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4554
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4555
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4556
     * @param float|int $step      [optional] <p>
4557
     *                             If a step value is given, it will be used as the
4558
     *                             increment between elements in the sequence. step
4559
     *                             should be given as a positive number. If not specified,
4560
     *                             step will default to 1.
4561
     *                             </p>
4562
     *
4563
     * @return string[]
4564
     */
4565
    public static function range(
4566
        $var1,
4567
        $var2,
4568
        bool $use_ctype = true,
4569
        string $encoding = 'UTF-8',
4570
        $step = 1
4571
    ): array {
4572 2
        if (!$var1 || !$var2) {
4573 2
            return [];
4574
        }
4575
4576 2
        if ($step !== 1) {
4577 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4578
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4579
            }
4580
4581 1
            if ($step <= 0) {
4582
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4583
            }
4584
        }
4585
4586 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4587
            throw new \RuntimeException('ext-ctype: is not installed');
4588
        }
4589
4590 2
        $is_digit = false;
4591 2
        $is_xdigit = false;
4592
4593
        /** @noinspection PhpComposerExtensionStubsInspection */
4594 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4595 2
            $is_digit = true;
4596 2
            $start = (int) $var1;
4597 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4598
            $is_xdigit = true;
4599
            $start = (int) self::hex_to_int($var1);
4600 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4601 1
            $start = (int) $var1;
4602
        } else {
4603 2
            $start = self::ord($var1);
4604
        }
4605
4606 2
        if (!$start) {
4607
            return [];
4608
        }
4609
4610 2
        if ($is_digit) {
4611 2
            $end = (int) $var2;
4612 2
        } elseif ($is_xdigit) {
4613
            $end = (int) self::hex_to_int($var2);
4614 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4615 1
            $end = (int) $var2;
4616
        } else {
4617 2
            $end = self::ord($var2);
4618
        }
4619
4620 2
        if (!$end) {
4621
            return [];
4622
        }
4623
4624 2
        $array = [];
4625 2
        foreach (\range($start, $end, $step) as $i) {
4626 2
            $array[] = (string) self::chr((int) $i, $encoding);
4627
        }
4628
4629 2
        return $array;
4630
    }
4631
4632
    /**
4633
     * Multi decode html entity & fix urlencoded-win1252-chars.
4634
     *
4635
     * e.g:
4636
     * 'test+test'                     => 'test+test'
4637
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4638
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4639
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4640
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4641
     * 'Düsseldorf'                   => 'Düsseldorf'
4642
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4643
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4644
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4645
     *
4646
     * @param string $str          <p>The input string.</p>
4647
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4648
     *
4649
     * @return string
4650
     */
4651
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4652
    {
4653 6
        if ($str === '') {
4654 4
            return '';
4655
        }
4656
4657
        if (
4658 6
            \strpos($str, '&') === false
4659
            &&
4660 6
            \strpos($str, '%') === false
4661
            &&
4662 6
            \strpos($str, '+') === false
4663
            &&
4664 6
            \strpos($str, '\u') === false
4665
        ) {
4666 4
            return self::fix_simple_utf8($str);
4667
        }
4668
4669 6
        $str = self::urldecode_unicode_helper($str);
4670
4671
        do {
4672 6
            $str_compare = $str;
4673
4674
            /**
4675
             * @psalm-suppress PossiblyInvalidArgument
4676
             */
4677 6
            $str = self::fix_simple_utf8(
4678 6
                \rawurldecode(
4679 6
                    self::html_entity_decode(
4680 6
                        self::to_utf8($str),
4681 6
                        \ENT_QUOTES | \ENT_HTML5
4682
                    )
4683
                )
4684
            );
4685 6
        } while ($multi_decode === true && $str_compare !== $str);
4686
4687 6
        return $str;
4688
    }
4689
4690
    /**
4691
     * Replaces all occurrences of $pattern in $str by $replacement.
4692
     *
4693
     * @param string $str         <p>The input string.</p>
4694
     * @param string $pattern     <p>The regular expression pattern.</p>
4695
     * @param string $replacement <p>The string to replace with.</p>
4696
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4697
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4698
     *
4699
     * @return string
4700
     */
4701
    public static function regex_replace(
4702
        string $str,
4703
        string $pattern,
4704
        string $replacement,
4705
        string $options = '',
4706
        string $delimiter = '/'
4707
    ): string {
4708 18
        if ($options === 'msr') {
4709 9
            $options = 'ms';
4710
        }
4711
4712
        // fallback
4713 18
        if (!$delimiter) {
4714
            $delimiter = '/';
4715
        }
4716
4717 18
        return (string) \preg_replace(
4718 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4719 18
            $replacement,
4720 18
            $str
4721
        );
4722
    }
4723
4724
    /**
4725
     * alias for "UTF8::remove_bom()"
4726
     *
4727
     * @param string $str
4728
     *
4729
     * @return string
4730
     *
4731
     * @see UTF8::remove_bom()
4732
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4733
     */
4734
    public static function removeBOM(string $str): string
4735
    {
4736
        return self::remove_bom($str);
4737
    }
4738
4739
    /**
4740
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4741
     *
4742
     * @param string $str <p>The input string.</p>
4743
     *
4744
     * @return string string without UTF-BOM
4745
     */
4746
    public static function remove_bom(string $str): string
4747
    {
4748 82
        if ($str === '') {
4749 9
            return '';
4750
        }
4751
4752 82
        $strLength = \strlen($str);
4753 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4754 82
            if (\strpos($str, $bomString, 0) === 0) {
4755 11
                $strTmp = \substr($str, $bomByteLength, $strLength);
4756 11
                if ($strTmp === false) {
4757
                    return '';
4758
                }
4759
4760 11
                $strLength -= (int) $bomByteLength;
4761
4762 82
                $str = (string) $strTmp;
4763
            }
4764
        }
4765
4766 82
        return $str;
4767
    }
4768
4769
    /**
4770
     * Removes duplicate occurrences of a string in another string.
4771
     *
4772
     * @param string          $str  <p>The base string.</p>
4773
     * @param string|string[] $what <p>String to search for in the base string.</p>
4774
     *
4775
     * @return string the result string with removed duplicates
4776
     */
4777
    public static function remove_duplicates(string $str, $what = ' '): string
4778
    {
4779 2
        if (\is_string($what) === true) {
4780 2
            $what = [$what];
4781
        }
4782
4783 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4784
            /** @noinspection ForeachSourceInspection */
4785 2
            foreach ($what as $item) {
4786 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4787
            }
4788
        }
4789
4790 2
        return $str;
4791
    }
4792
4793
    /**
4794
     * Remove html via "strip_tags()" from the string.
4795
     *
4796
     * @param string $str
4797
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4798
     *                              not be stripped. Default: null
4799
     *                              </p>
4800
     *
4801
     * @return string
4802
     */
4803
    public static function remove_html(string $str, string $allowableTags = ''): string
4804
    {
4805 6
        return \strip_tags($str, $allowableTags);
4806
    }
4807
4808
    /**
4809
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4810
     *
4811
     * @param string $str
4812
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4813
     *
4814
     * @return string
4815
     */
4816
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4817
    {
4818 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4819
    }
4820
4821
    /**
4822
     * Remove invisible characters from a string.
4823
     *
4824
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4825
     *
4826
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4827
     *
4828
     * @param string $str
4829
     * @param bool   $url_encoded
4830
     * @param string $replacement
4831
     *
4832
     * @return string
4833
     */
4834
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4835
    {
4836
        // init
4837 116
        $non_displayables = [];
4838
4839
        // every control character except newline (dec 10),
4840
        // carriage return (dec 13) and horizontal tab (dec 09)
4841 116
        if ($url_encoded) {
4842 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4843 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4844
        }
4845
4846 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4847
4848
        do {
4849 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4850 116
        } while ($count !== 0);
4851
4852 116
        return $str;
4853
    }
4854
4855
    /**
4856
     * Returns a new string with the prefix $substring removed, if present.
4857
     *
4858
     * @param string $str
4859
     * @param string $substring <p>The prefix to remove.</p>
4860
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4861
     *
4862
     * @return string string without the prefix $substring
4863
     */
4864
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4865
    {
4866 12
        if ($substring && \strpos($str, $substring) === 0) {
4867 6
            if ($encoding === 'UTF-8') {
4868 4
                return (string) \mb_substr(
4869 4
                    $str,
4870 4
                    (int) \mb_strlen($substring)
4871
                );
4872
            }
4873
4874 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4875
4876 2
            return (string) self::substr(
4877 2
                $str,
4878 2
                (int) self::strlen($substring, $encoding),
4879 2
                null,
4880 2
                $encoding
4881
            );
4882
        }
4883
4884 6
        return $str;
4885
    }
4886
4887
    /**
4888
     * Returns a new string with the suffix $substring removed, if present.
4889
     *
4890
     * @param string $str
4891
     * @param string $substring <p>The suffix to remove.</p>
4892
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4893
     *
4894
     * @return string string having a $str without the suffix $substring
4895
     */
4896
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4897
    {
4898 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4899 6
            if ($encoding === 'UTF-8') {
4900 4
                return (string) \mb_substr(
4901 4
                    $str,
4902 4
                    0,
4903 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4904
                );
4905
            }
4906
4907 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4908
4909 2
            return (string) self::substr(
4910 2
                $str,
4911 2
                0,
4912 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4913 2
                $encoding
4914
            );
4915
        }
4916
4917 6
        return $str;
4918
    }
4919
4920
    /**
4921
     * Replaces all occurrences of $search in $str by $replacement.
4922
     *
4923
     * @param string $str           <p>The input string.</p>
4924
     * @param string $search        <p>The needle to search for.</p>
4925
     * @param string $replacement   <p>The string to replace with.</p>
4926
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4927
     *
4928
     * @return string string after the replacements
4929
     */
4930
    public static function replace(
4931
        string $str,
4932
        string $search,
4933
        string $replacement,
4934
        bool $caseSensitive = true
4935
    ): string {
4936 29
        if ($caseSensitive) {
4937 22
            return \str_replace($search, $replacement, $str);
4938
        }
4939
4940 7
        return self::str_ireplace($search, $replacement, $str);
4941
    }
4942
4943
    /**
4944
     * Replaces all occurrences of $search in $str by $replacement.
4945
     *
4946
     * @param string       $str           <p>The input string.</p>
4947
     * @param array        $search        <p>The elements to search for.</p>
4948
     * @param array|string $replacement   <p>The string to replace with.</p>
4949
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4950
     *
4951
     * @return string string after the replacements
4952
     */
4953
    public static function replace_all(
4954
        string $str,
4955
        array $search,
4956
        $replacement,
4957
        bool $caseSensitive = true
4958
    ): string {
4959 30
        if ($caseSensitive) {
4960 23
            return \str_replace($search, $replacement, $str);
4961
        }
4962
4963 7
        return self::str_ireplace($search, $replacement, $str);
4964
    }
4965
4966
    /**
4967
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4968
     *
4969
     * @param string $str                <p>The input string</p>
4970
     * @param string $replacementChar    <p>The replacement character.</p>
4971
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4972
     *
4973
     * @return string
4974
     */
4975
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4976
    {
4977 62
        if ($str === '') {
4978 9
            return '';
4979
        }
4980
4981 62
        if ($processInvalidUtf8 === true) {
4982 62
            $replacementCharHelper = $replacementChar;
4983 62
            if ($replacementChar === '') {
4984 62
                $replacementCharHelper = 'none';
4985
            }
4986
4987 62
            if (self::$SUPPORT['mbstring'] === false) {
4988
                // if there is no native support for "mbstring",
4989
                // then we need to clean the string before ...
4990
                $str = self::clean($str);
4991
            }
4992
4993 62
            $save = \mb_substitute_character();
4994 62
            \mb_substitute_character($replacementCharHelper);
4995
            // the polyfill maybe return false, so cast to string
4996 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4997 62
            \mb_substitute_character($save);
4998
        }
4999
5000 62
        return \str_replace(
5001
            [
5002 62
                "\xEF\xBF\xBD",
5003
                '�',
5004
            ],
5005
            [
5006 62
                $replacementChar,
5007 62
                $replacementChar,
5008
            ],
5009 62
            $str
5010
        );
5011
    }
5012
5013
    /**
5014
     * Strip whitespace or other characters from end of a UTF-8 string.
5015
     *
5016
     * @param string      $str   <p>The string to be trimmed.</p>
5017
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5018
     *
5019
     * @return string the string with unwanted characters stripped from the right
5020
     */
5021
    public static function rtrim(string $str = '', string $chars = null): string
5022
    {
5023 20
        if ($str === '') {
5024 3
            return '';
5025
        }
5026
5027 19
        if ($chars) {
5028 8
            $chars = \preg_quote($chars, '/');
5029 8
            $pattern = "[${chars}]+$";
5030
        } else {
5031 14
            $pattern = '[\\s]+$';
5032
        }
5033
5034 19
        if (self::$SUPPORT['mbstring'] === true) {
5035
            /** @noinspection PhpComposerExtensionStubsInspection */
5036 19
            return (string) \mb_ereg_replace($pattern, '', $str);
5037
        }
5038
5039
        return self::regex_replace($str, $pattern, '', '', '/');
5040
    }
5041
5042
    /**
5043
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5044
     *
5045
     * @psalm-suppress MissingReturnType
5046
     */
5047
    public static function showSupport()
5048
    {
5049 2
        echo '<pre>';
5050 2
        foreach (self::$SUPPORT as $key => &$value) {
5051 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5052
        }
5053 2
        unset($value);
5054 2
        echo '</pre>';
5055 2
    }
5056
5057
    /**
5058
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5059
     *
5060
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5061
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5062
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5063
     *
5064
     * @return string the HTML numbered entity
5065
     */
5066
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5067
    {
5068 2
        if ($char === '') {
5069 2
            return '';
5070
        }
5071
5072
        if (
5073 2
            $keepAsciiChars === true
5074
            &&
5075 2
            self::is_ascii($char) === true
5076
        ) {
5077 2
            return $char;
5078
        }
5079
5080 2
        return '&#' . self::ord($char, $encoding) . ';';
5081
    }
5082
5083
    /**
5084
     * @param string $str
5085
     * @param int    $tabLength
5086
     *
5087
     * @return string
5088
     */
5089
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5090
    {
5091 5
        if ($tabLength === 4) {
5092 3
            $tab = '    ';
5093 2
        } elseif ($tabLength === 2) {
5094 1
            $tab = '  ';
5095
        } else {
5096 1
            $tab = \str_repeat(' ', $tabLength);
5097
        }
5098
5099 5
        return \str_replace($tab, "\t", $str);
5100
    }
5101
5102
    /**
5103
     * alias for "UTF8::str_split()"
5104
     *
5105
     * @param string|string[] $str
5106
     * @param int             $length
5107
     * @param bool            $cleanUtf8
5108
     *
5109
     * @return string[]
5110
     *
5111
     * @see UTF8::str_split()
5112
     */
5113
    public static function split(
5114
        $str,
5115
        int $length = 1,
5116
        bool $cleanUtf8 = false
5117
    ): array {
5118 9
        return self::str_split($str, $length, $cleanUtf8);
5119
    }
5120
5121
    /**
5122
     * alias for "UTF8::str_starts_with()"
5123
     *
5124
     * @param string $haystack
5125
     * @param string $needle
5126
     *
5127
     * @return bool
5128
     *
5129
     * @see UTF8::str_starts_with()
5130
     */
5131
    public static function str_begins(string $haystack, string $needle): bool
5132
    {
5133
        return self::str_starts_with($haystack, $needle);
5134
    }
5135
5136
    /**
5137
     * Returns a camelCase version of the string. Trims surrounding spaces,
5138
     * capitalizes letters following digits, spaces, dashes and underscores,
5139
     * and removes spaces, dashes, as well as underscores.
5140
     *
5141
     * @param string      $str                   <p>The input string.</p>
5142
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5143
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5144
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5145
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5146
     *
5147
     * @return string
5148
     */
5149
    public static function str_camelize(
5150
        string $str,
5151
        string $encoding = 'UTF-8',
5152
        bool $cleanUtf8 = false,
5153
        string $lang = null,
5154
        bool $tryToKeepStringLength = false
5155
    ): string {
5156 32
        if ($cleanUtf8 === true) {
5157
            $str = self::clean($str);
5158
        }
5159
5160 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5161 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5162
        }
5163
5164 32
        $str = self::lcfirst(
5165 32
            \trim($str),
5166 32
            $encoding,
5167 32
            false,
5168 32
            $lang,
5169 32
            $tryToKeepStringLength
5170
        );
5171 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5172
5173 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5174
5175 32
        $str = (string) \preg_replace_callback(
5176 32
            '/[-_\\s]+(.)?/u',
5177
            /**
5178
             * @param array $match
5179
             *
5180
             * @return string
5181
             */
5182
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5183 27
                if (isset($match[1])) {
5184 27
                    if ($useMbFunction === true) {
5185 27
                        if ($encoding === 'UTF-8') {
5186 27
                            return \mb_strtoupper($match[1]);
5187
                        }
5188
5189
                        return \mb_strtoupper($match[1], $encoding);
5190
                    }
5191
5192
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5193
                }
5194
5195 1
                return '';
5196 32
            },
5197 32
            $str
5198
        );
5199
5200 32
        return (string) \preg_replace_callback(
5201 32
            '/[\\p{N}]+(.)?/u',
5202
            /**
5203
             * @param array $match
5204
             *
5205
             * @return string
5206
             */
5207
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5208 6
                if ($useMbFunction === true) {
5209 6
                    if ($encoding === 'UTF-8') {
5210 6
                        return \mb_strtoupper($match[0]);
5211
                    }
5212
5213
                    return \mb_strtoupper($match[0], $encoding);
5214
                }
5215
5216
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5217 32
            },
5218 32
            $str
5219
        );
5220
    }
5221
5222
    /**
5223
     * Returns the string with the first letter of each word capitalized,
5224
     * except for when the word is a name which shouldn't be capitalized.
5225
     *
5226
     * @param string $str
5227
     *
5228
     * @return string string with $str capitalized
5229
     */
5230
    public static function str_capitalize_name(string $str): string
5231
    {
5232 1
        return self::str_capitalize_name_helper(
5233 1
            self::str_capitalize_name_helper(
5234 1
                self::collapse_whitespace($str),
5235 1
                ' '
5236
            ),
5237 1
            '-'
5238
        );
5239
    }
5240
5241
    /**
5242
     * Returns true if the string contains $needle, false otherwise. By default
5243
     * the comparison is case-sensitive, but can be made insensitive by setting
5244
     * $caseSensitive to false.
5245
     *
5246
     * @param string $haystack      <p>The input string.</p>
5247
     * @param string $needle        <p>Substring to look for.</p>
5248
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5249
     *
5250
     * @return bool whether or not $haystack contains $needle
5251
     */
5252
    public static function str_contains(
5253
        string $haystack,
5254
        string $needle,
5255
        bool $caseSensitive = true
5256
    ): bool {
5257 21
        if ($caseSensitive) {
5258 11
            return \strpos($haystack, $needle) !== false;
5259
        }
5260
5261 10
        return \mb_stripos($haystack, $needle) !== false;
5262
    }
5263
5264
    /**
5265
     * Returns true if the string contains all $needles, false otherwise. By
5266
     * default the comparison is case-sensitive, but can be made insensitive by
5267
     * setting $caseSensitive to false.
5268
     *
5269
     * @param string $haystack      <p>The input string.</p>
5270
     * @param array  $needles       <p>SubStrings to look for.</p>
5271
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5272
     *
5273
     * @return bool whether or not $haystack contains $needle
5274
     */
5275
    public static function str_contains_all(
5276
        string $haystack,
5277
        array $needles,
5278
        bool $caseSensitive = true
5279
    ): bool {
5280 44
        if ($haystack === '' || $needles === []) {
5281 1
            return false;
5282
        }
5283
5284
        /** @noinspection LoopWhichDoesNotLoopInspection */
5285 43
        foreach ($needles as &$needle) {
5286 43
            if (!$needle) {
5287 1
                return false;
5288
            }
5289
5290 42
            if ($caseSensitive) {
5291 22
                return \strpos($haystack, $needle) !== false;
5292
            }
5293
5294 20
            return \mb_stripos($haystack, $needle) !== false;
5295
        }
5296
5297
        return true;
5298
    }
5299
5300
    /**
5301
     * Returns true if the string contains any $needles, false otherwise. By
5302
     * default the comparison is case-sensitive, but can be made insensitive by
5303
     * setting $caseSensitive to false.
5304
     *
5305
     * @param string $haystack      <p>The input string.</p>
5306
     * @param array  $needles       <p>SubStrings to look for.</p>
5307
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5308
     *
5309
     * @return bool
5310
     *              Whether or not $str contains $needle
5311
     */
5312
    public static function str_contains_any(
5313
        string $haystack,
5314
        array $needles,
5315
        bool $caseSensitive = true
5316
    ): bool {
5317 46
        if ($haystack === '' || $needles === []) {
5318 1
            return false;
5319
        }
5320
5321
        /** @noinspection LoopWhichDoesNotLoopInspection */
5322 45
        foreach ($needles as &$needle) {
5323 45
            if (!$needle) {
5324
                continue;
5325
            }
5326
5327 45
            if ($caseSensitive) {
5328 25
                if (\strpos($haystack, $needle) !== false) {
5329 14
                    return true;
5330
                }
5331
5332 13
                continue;
5333
            }
5334
5335 20
            if (\mb_stripos($haystack, $needle) !== false) {
5336 20
                return true;
5337
            }
5338
        }
5339
5340 19
        return false;
5341
    }
5342
5343
    /**
5344
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5345
     * inserted before uppercase characters (with the exception of the first
5346
     * character of the string), and in place of spaces as well as underscores.
5347
     *
5348
     * @param string $str      <p>The input string.</p>
5349
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5350
     *
5351
     * @return string
5352
     */
5353
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5354
    {
5355 19
        return self::str_delimit($str, '-', $encoding);
5356
    }
5357
5358
    /**
5359
     * Returns a lowercase and trimmed string separated by the given delimiter.
5360
     * Delimiters are inserted before uppercase characters (with the exception
5361
     * of the first character of the string), and in place of spaces, dashes,
5362
     * and underscores. Alpha delimiters are not converted to lowercase.
5363
     *
5364
     * @param string      $str                   <p>The input string.</p>
5365
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5366
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5367
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5368
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5369
     *                                           tr</p>
5370
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5371
     *                                           ß</p>
5372
     *
5373
     * @return string
5374
     */
5375
    public static function str_delimit(
5376
        string $str,
5377
        string $delimiter,
5378
        string $encoding = 'UTF-8',
5379
        bool $cleanUtf8 = false,
5380
        string $lang = null,
5381
        bool $tryToKeepStringLength = false
5382
    ): string {
5383 49
        if (self::$SUPPORT['mbstring'] === true) {
5384
            /** @noinspection PhpComposerExtensionStubsInspection */
5385 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5386
5387 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5388 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5389 22
                $str = \mb_strtolower($str);
5390
            } else {
5391 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5392
            }
5393
5394
            /** @noinspection PhpComposerExtensionStubsInspection */
5395 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5396
        }
5397
5398
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5399
5400
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5401
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5402
            $str = \mb_strtolower($str);
5403
        } else {
5404
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5405
        }
5406
5407
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5408
    }
5409
5410
    /**
5411
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5412
     *
5413
     * @param string $str <p>The input string.</p>
5414
     *
5415
     * @return false|string
5416
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5417
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5418
     */
5419
    public static function str_detect_encoding($str)
5420
    {
5421
        // init
5422 30
        $str = (string) $str;
5423
5424
        //
5425
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5426
        //
5427
5428 30
        if (self::is_binary($str, true) === true) {
5429 11
            $isUtf32 = self::is_utf32($str, false);
5430 11
            if ($isUtf32 === 1) {
5431
                return 'UTF-32LE';
5432
            }
5433 11
            if ($isUtf32 === 2) {
5434 1
                return 'UTF-32BE';
5435
            }
5436
5437 11
            $isUtf16 = self::is_utf16($str, false);
5438 11
            if ($isUtf16 === 1) {
5439 3
                return 'UTF-16LE';
5440
            }
5441 11
            if ($isUtf16 === 2) {
5442 2
                return 'UTF-16BE';
5443
            }
5444
5445
            // is binary but not "UTF-16" or "UTF-32"
5446 9
            return false;
5447
        }
5448
5449
        //
5450
        // 2.) simple check for ASCII chars
5451
        //
5452
5453 26
        if (self::is_ascii($str) === true) {
5454 10
            return 'ASCII';
5455
        }
5456
5457
        //
5458
        // 3.) simple check for UTF-8 chars
5459
        //
5460
5461 26
        if (self::is_utf8_string($str) === true) {
5462 19
            return 'UTF-8';
5463
        }
5464
5465
        //
5466
        // 4.) check via "mb_detect_encoding()"
5467
        //
5468
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5469
5470
        $detectOrder = [
5471 15
            'ISO-8859-1',
5472
            'ISO-8859-2',
5473
            'ISO-8859-3',
5474
            'ISO-8859-4',
5475
            'ISO-8859-5',
5476
            'ISO-8859-6',
5477
            'ISO-8859-7',
5478
            'ISO-8859-8',
5479
            'ISO-8859-9',
5480
            'ISO-8859-10',
5481
            'ISO-8859-13',
5482
            'ISO-8859-14',
5483
            'ISO-8859-15',
5484
            'ISO-8859-16',
5485
            'WINDOWS-1251',
5486
            'WINDOWS-1252',
5487
            'WINDOWS-1254',
5488
            'CP932',
5489
            'CP936',
5490
            'CP950',
5491
            'CP866',
5492
            'CP850',
5493
            'CP51932',
5494
            'CP50220',
5495
            'CP50221',
5496
            'CP50222',
5497
            'ISO-2022-JP',
5498
            'ISO-2022-KR',
5499
            'JIS',
5500
            'JIS-ms',
5501
            'EUC-CN',
5502
            'EUC-JP',
5503
        ];
5504
5505 15
        if (self::$SUPPORT['mbstring'] === true) {
5506
            // info: do not use the symfony polyfill here
5507 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5508 15
            if ($encoding) {
5509 15
                return $encoding;
5510
            }
5511
        }
5512
5513
        //
5514
        // 5.) check via "iconv()"
5515
        //
5516
5517
        if (self::$ENCODINGS === null) {
5518
            self::$ENCODINGS = self::getData('encodings');
5519
        }
5520
5521
        foreach (self::$ENCODINGS as $encodingTmp) {
5522
            // INFO: //IGNORE but still throw notice
5523
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5524
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5525
                return $encodingTmp;
5526
            }
5527
        }
5528
5529
        return false;
5530
    }
5531
5532
    /**
5533
     * alias for "UTF8::str_ends_with()"
5534
     *
5535
     * @param string $haystack
5536
     * @param string $needle
5537
     *
5538
     * @return bool
5539
     *
5540
     * @see UTF8::str_ends_with()
5541
     */
5542
    public static function str_ends(string $haystack, string $needle): bool
5543
    {
5544
        return self::str_ends_with($haystack, $needle);
5545
    }
5546
5547
    /**
5548
     * Check if the string ends with the given substring.
5549
     *
5550
     * @param string $haystack <p>The string to search in.</p>
5551
     * @param string $needle   <p>The substring to search for.</p>
5552
     *
5553
     * @return bool
5554
     */
5555
    public static function str_ends_with(string $haystack, string $needle): bool
5556
    {
5557 9
        if ($needle === '') {
5558 2
            return true;
5559
        }
5560
5561 9
        if ($haystack === '') {
5562
            return false;
5563
        }
5564
5565 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5566
    }
5567
5568
    /**
5569
     * Returns true if the string ends with any of $substrings, false otherwise.
5570
     *
5571
     * - case-sensitive
5572
     *
5573
     * @param string   $str        <p>The input string.</p>
5574
     * @param string[] $substrings <p>Substrings to look for.</p>
5575
     *
5576
     * @return bool whether or not $str ends with $substring
5577
     */
5578
    public static function str_ends_with_any(string $str, array $substrings): bool
5579
    {
5580 7
        if ($substrings === []) {
5581
            return false;
5582
        }
5583
5584 7
        foreach ($substrings as &$substring) {
5585 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5586 7
                return true;
5587
            }
5588
        }
5589
5590 6
        return false;
5591
    }
5592
5593
    /**
5594
     * Ensures that the string begins with $substring. If it doesn't, it's
5595
     * prepended.
5596
     *
5597
     * @param string $str       <p>The input string.</p>
5598
     * @param string $substring <p>The substring to add if not present.</p>
5599
     *
5600
     * @return string
5601
     */
5602
    public static function str_ensure_left(string $str, string $substring): string
5603
    {
5604
        if (
5605 10
            $substring !== ''
5606
            &&
5607 10
            \strpos($str, $substring) === 0
5608
        ) {
5609 6
            return $str;
5610
        }
5611
5612 4
        return $substring . $str;
5613
    }
5614
5615
    /**
5616
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5617
     *
5618
     * @param string $str       <p>The input string.</p>
5619
     * @param string $substring <p>The substring to add if not present.</p>
5620
     *
5621
     * @return string
5622
     */
5623
    public static function str_ensure_right(string $str, string $substring): string
5624
    {
5625
        if (
5626 10
            $str === ''
5627
            ||
5628 10
            $substring === ''
5629
            ||
5630 10
            \substr($str, -\strlen($substring)) !== $substring
5631
        ) {
5632 4
            $str .= $substring;
5633
        }
5634
5635 10
        return $str;
5636
    }
5637
5638
    /**
5639
     * Capitalizes the first word of the string, replaces underscores with
5640
     * spaces, and strips '_id'.
5641
     *
5642
     * @param string $str
5643
     *
5644
     * @return string
5645
     */
5646
    public static function str_humanize($str): string
5647
    {
5648 3
        $str = \str_replace(
5649
            [
5650 3
                '_id',
5651
                '_',
5652
            ],
5653
            [
5654 3
                '',
5655
                ' ',
5656
            ],
5657 3
            $str
5658
        );
5659
5660 3
        return self::ucfirst(\trim($str));
5661
    }
5662
5663
    /**
5664
     * alias for "UTF8::str_istarts_with()"
5665
     *
5666
     * @param string $haystack
5667
     * @param string $needle
5668
     *
5669
     * @return bool
5670
     *
5671
     * @see UTF8::str_istarts_with()
5672
     */
5673
    public static function str_ibegins(string $haystack, string $needle): bool
5674
    {
5675
        return self::str_istarts_with($haystack, $needle);
5676
    }
5677
5678
    /**
5679
     * alias for "UTF8::str_iends_with()"
5680
     *
5681
     * @param string $haystack
5682
     * @param string $needle
5683
     *
5684
     * @return bool
5685
     *
5686
     * @see UTF8::str_iends_with()
5687
     */
5688
    public static function str_iends(string $haystack, string $needle): bool
5689
    {
5690
        return self::str_iends_with($haystack, $needle);
5691
    }
5692
5693
    /**
5694
     * Check if the string ends with the given substring, case insensitive.
5695
     *
5696
     * @param string $haystack <p>The string to search in.</p>
5697
     * @param string $needle   <p>The substring to search for.</p>
5698
     *
5699
     * @return bool
5700
     */
5701
    public static function str_iends_with(string $haystack, string $needle): bool
5702
    {
5703 12
        if ($needle === '') {
5704 2
            return true;
5705
        }
5706
5707 12
        if ($haystack === '') {
5708
            return false;
5709
        }
5710
5711 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5712
    }
5713
5714
    /**
5715
     * Returns true if the string ends with any of $substrings, false otherwise.
5716
     *
5717
     * - case-insensitive
5718
     *
5719
     * @param string   $str        <p>The input string.</p>
5720
     * @param string[] $substrings <p>Substrings to look for.</p>
5721
     *
5722
     * @return bool whether or not $str ends with $substring
5723
     */
5724
    public static function str_iends_with_any(string $str, array $substrings): bool
5725
    {
5726 4
        if ($substrings === []) {
5727
            return false;
5728
        }
5729
5730 4
        foreach ($substrings as &$substring) {
5731 4
            if (self::str_iends_with($str, $substring)) {
5732 4
                return true;
5733
            }
5734
        }
5735
5736
        return false;
5737
    }
5738
5739
    /**
5740
     * Returns the index of the first occurrence of $needle in the string,
5741
     * and false if not found. Accepts an optional offset from which to begin
5742
     * the search.
5743
     *
5744
     * @param string $str      <p>The input string.</p>
5745
     * @param string $needle   <p>Substring to look for.</p>
5746
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5747
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5748
     *
5749
     * @return false|int
5750
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5751
     */
5752
    public static function str_iindex_first(
5753
        string $str,
5754
        string $needle,
5755
        int $offset = 0,
5756
        string $encoding = 'UTF-8'
5757
    ) {
5758 2
        return self::stripos(
5759 2
            $str,
5760 2
            $needle,
5761 2
            $offset,
5762 2
            $encoding
5763
        );
5764
    }
5765
5766
    /**
5767
     * Returns the index of the last occurrence of $needle in the string,
5768
     * and false if not found. Accepts an optional offset from which to begin
5769
     * the search. Offsets may be negative to count from the last character
5770
     * in the string.
5771
     *
5772
     * @param string $str      <p>The input string.</p>
5773
     * @param string $needle   <p>Substring to look for.</p>
5774
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5775
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5776
     *
5777
     * @return false|int
5778
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5779
     */
5780
    public static function str_iindex_last(
5781
        string $str,
5782
        string $needle,
5783
        int $offset = 0,
5784
        string $encoding = 'UTF-8'
5785
    ) {
5786
        return self::strripos(
5787
            $str,
5788
            $needle,
5789
            $offset,
5790
            $encoding
5791
        );
5792
    }
5793
5794
    /**
5795
     * Returns the index of the first occurrence of $needle in the string,
5796
     * and false if not found. Accepts an optional offset from which to begin
5797
     * the search.
5798
     *
5799
     * @param string $str      <p>The input string.</p>
5800
     * @param string $needle   <p>Substring to look for.</p>
5801
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5802
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5803
     *
5804
     * @return false|int
5805
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5806
     */
5807
    public static function str_index_first(
5808
        string $str,
5809
        string $needle,
5810
        int $offset = 0,
5811
        string $encoding = 'UTF-8'
5812
    ) {
5813 10
        return self::strpos(
5814 10
            $str,
5815 10
            $needle,
5816 10
            $offset,
5817 10
            $encoding
5818
        );
5819
    }
5820
5821
    /**
5822
     * Returns the index of the last occurrence of $needle in the string,
5823
     * and false if not found. Accepts an optional offset from which to begin
5824
     * the search. Offsets may be negative to count from the last character
5825
     * in the string.
5826
     *
5827
     * @param string $str      <p>The input string.</p>
5828
     * @param string $needle   <p>Substring to look for.</p>
5829
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5830
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5831
     *
5832
     * @return false|int
5833
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5834
     */
5835
    public static function str_index_last(
5836
        string $str,
5837
        string $needle,
5838
        int $offset = 0,
5839
        string $encoding = 'UTF-8'
5840
    ) {
5841 10
        return self::strrpos(
5842 10
            $str,
5843 10
            $needle,
5844 10
            $offset,
5845 10
            $encoding
5846
        );
5847
    }
5848
5849
    /**
5850
     * Inserts $substring into the string at the $index provided.
5851
     *
5852
     * @param string $str       <p>The input string.</p>
5853
     * @param string $substring <p>String to be inserted.</p>
5854
     * @param int    $index     <p>The index at which to insert the substring.</p>
5855
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5856
     *
5857
     * @return string
5858
     */
5859
    public static function str_insert(
5860
        string $str,
5861
        string $substring,
5862
        int $index,
5863
        string $encoding = 'UTF-8'
5864
    ): string {
5865 8
        if ($encoding === 'UTF-8') {
5866 4
            $len = (int) \mb_strlen($str);
5867 4
            if ($index > $len) {
5868
                return $str;
5869
            }
5870
5871
            /** @noinspection UnnecessaryCastingInspection */
5872 4
            return (string) \mb_substr($str, 0, $index) .
5873 4
                   $substring .
5874 4
                   (string) \mb_substr($str, $index, $len);
5875
        }
5876
5877 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5878
5879 4
        $len = (int) self::strlen($str, $encoding);
5880 4
        if ($index > $len) {
5881 1
            return $str;
5882
        }
5883
5884 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5885 3
               $substring .
5886 3
               ((string) self::substr($str, $index, $len, $encoding));
5887
    }
5888
5889
    /**
5890
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5891
     *
5892
     * @see http://php.net/manual/en/function.str-ireplace.php
5893
     *
5894
     * @param mixed $search  <p>
5895
     *                       Every replacement with search array is
5896
     *                       performed on the result of previous replacement.
5897
     *                       </p>
5898
     * @param mixed $replace <p>
5899
     *                       </p>
5900
     * @param mixed $subject <p>
5901
     *                       If subject is an array, then the search and
5902
     *                       replace is performed with every entry of
5903
     *                       subject, and the return value is an array as
5904
     *                       well.
5905
     *                       </p>
5906
     * @param int   $count   [optional] <p>
5907
     *                       The number of matched and replaced needles will
5908
     *                       be returned in count which is passed by
5909
     *                       reference.
5910
     *                       </p>
5911
     *
5912
     * @return mixed a string or an array of replacements
5913
     */
5914
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5915
    {
5916 29
        $search = (array) $search;
5917
5918
        /** @noinspection AlterInForeachInspection */
5919 29
        foreach ($search as &$s) {
5920 29
            $s = (string) $s;
5921 29
            if ($s === '') {
5922 6
                $s = '/^(?<=.)$/';
5923
            } else {
5924 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5925
            }
5926
        }
5927
5928 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5929 29
        $count = $replace; // used as reference parameter
5930
5931 29
        return $subject;
5932
    }
5933
5934
    /**
5935
     * Replaces $search from the beginning of string with $replacement.
5936
     *
5937
     * @param string $str         <p>The input string.</p>
5938
     * @param string $search      <p>The string to search for.</p>
5939
     * @param string $replacement <p>The replacement.</p>
5940
     *
5941
     * @return string string after the replacements
5942
     */
5943
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5944
    {
5945 17
        if ($str === '') {
5946 4
            if ($replacement === '') {
5947 2
                return '';
5948
            }
5949
5950 2
            if ($search === '') {
5951 2
                return $replacement;
5952
            }
5953
        }
5954
5955 13
        if ($search === '') {
5956 2
            return $str . $replacement;
5957
        }
5958
5959 11
        if (\stripos($str, $search) === 0) {
5960 10
            return $replacement . \substr($str, \strlen($search));
5961
        }
5962
5963 1
        return $str;
5964
    }
5965
5966
    /**
5967
     * Replaces $search from the ending of string with $replacement.
5968
     *
5969
     * @param string $str         <p>The input string.</p>
5970
     * @param string $search      <p>The string to search for.</p>
5971
     * @param string $replacement <p>The replacement.</p>
5972
     *
5973
     * @return string string after the replacements
5974
     */
5975
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5976
    {
5977 17
        if ($str === '') {
5978 4
            if ($replacement === '') {
5979 2
                return '';
5980
            }
5981
5982 2
            if ($search === '') {
5983 2
                return $replacement;
5984
            }
5985
        }
5986
5987 13
        if ($search === '') {
5988 2
            return $str . $replacement;
5989
        }
5990
5991 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5992 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5993
        }
5994
5995 11
        return $str;
5996
    }
5997
5998
    /**
5999
     * Check if the string starts with the given substring, case insensitive.
6000
     *
6001
     * @param string $haystack <p>The string to search in.</p>
6002
     * @param string $needle   <p>The substring to search for.</p>
6003
     *
6004
     * @return bool
6005
     */
6006
    public static function str_istarts_with(string $haystack, string $needle): bool
6007
    {
6008 12
        if ($needle === '') {
6009 2
            return true;
6010
        }
6011
6012 12
        if ($haystack === '') {
6013
            return false;
6014
        }
6015
6016 12
        return self::stripos($haystack, $needle) === 0;
6017
    }
6018
6019
    /**
6020
     * Returns true if the string begins with any of $substrings, false otherwise.
6021
     *
6022
     * - case-insensitive
6023
     *
6024
     * @param string $str        <p>The input string.</p>
6025
     * @param array  $substrings <p>Substrings to look for.</p>
6026
     *
6027
     * @return bool whether or not $str starts with $substring
6028
     */
6029
    public static function str_istarts_with_any(string $str, array $substrings): bool
6030
    {
6031 4
        if ($str === '') {
6032
            return false;
6033
        }
6034
6035 4
        if ($substrings === []) {
6036
            return false;
6037
        }
6038
6039 4
        foreach ($substrings as &$substring) {
6040 4
            if (self::str_istarts_with($str, $substring)) {
6041 4
                return true;
6042
            }
6043
        }
6044
6045
        return false;
6046
    }
6047
6048
    /**
6049
     * Gets the substring after the first occurrence of a separator.
6050
     *
6051
     * @param string $str       <p>The input string.</p>
6052
     * @param string $separator <p>The string separator.</p>
6053
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6054
     *
6055
     * @return string
6056
     */
6057
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6058
    {
6059 1
        if ($separator === '' || $str === '') {
6060 1
            return '';
6061
        }
6062
6063 1
        $offset = self::str_iindex_first($str, $separator);
6064 1
        if ($offset === false) {
6065 1
            return '';
6066
        }
6067
6068 1
        if ($encoding === 'UTF-8') {
6069 1
            return (string) \mb_substr(
6070 1
                $str,
6071 1
                $offset + (int) \mb_strlen($separator)
6072
            );
6073
        }
6074
6075
        return (string) self::substr(
6076
            $str,
6077
            $offset + (int) self::strlen($separator, $encoding),
6078
            null,
6079
            $encoding
6080
        );
6081
    }
6082
6083
    /**
6084
     * Gets the substring after the last occurrence of a separator.
6085
     *
6086
     * @param string $str       <p>The input string.</p>
6087
     * @param string $separator <p>The string separator.</p>
6088
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6089
     *
6090
     * @return string
6091
     */
6092
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6093
    {
6094 1
        if ($separator === '' || $str === '') {
6095 1
            return '';
6096
        }
6097
6098 1
        $offset = self::strripos($str, $separator);
6099 1
        if ($offset === false) {
6100 1
            return '';
6101
        }
6102
6103 1
        if ($encoding === 'UTF-8') {
6104 1
            return (string) \mb_substr(
6105 1
                $str,
6106 1
                $offset + (int) self::strlen($separator)
6107
            );
6108
        }
6109
6110
        return (string) self::substr(
6111
            $str,
6112
            $offset + (int) self::strlen($separator, $encoding),
6113
            null,
6114
            $encoding
6115
        );
6116
    }
6117
6118
    /**
6119
     * Gets the substring before the first occurrence of a separator.
6120
     *
6121
     * @param string $str       <p>The input string.</p>
6122
     * @param string $separator <p>The string separator.</p>
6123
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6124
     *
6125
     * @return string
6126
     */
6127
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6128
    {
6129 1
        if ($separator === '' || $str === '') {
6130 1
            return '';
6131
        }
6132
6133 1
        $offset = self::str_iindex_first($str, $separator);
6134 1
        if ($offset === false) {
6135 1
            return '';
6136
        }
6137
6138 1
        if ($encoding === 'UTF-8') {
6139 1
            return (string) \mb_substr($str, 0, $offset);
6140
        }
6141
6142
        return (string) self::substr($str, 0, $offset, $encoding);
6143
    }
6144
6145
    /**
6146
     * Gets the substring before the last occurrence of a separator.
6147
     *
6148
     * @param string $str       <p>The input string.</p>
6149
     * @param string $separator <p>The string separator.</p>
6150
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6151
     *
6152
     * @return string
6153
     */
6154
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6155
    {
6156 1
        if ($separator === '' || $str === '') {
6157 1
            return '';
6158
        }
6159
6160 1
        if ($encoding === 'UTF-8') {
6161 1
            $offset = \mb_strripos($str, $separator);
6162 1
            if ($offset === false) {
6163 1
                return '';
6164
            }
6165
6166 1
            return (string) \mb_substr($str, 0, $offset);
6167
        }
6168
6169
        $offset = self::strripos($str, $separator, 0, $encoding);
6170
        if ($offset === false) {
6171
            return '';
6172
        }
6173
6174
        return (string) self::substr($str, 0, $offset, $encoding);
6175
    }
6176
6177
    /**
6178
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6179
     *
6180
     * @param string $str          <p>The input string.</p>
6181
     * @param string $needle       <p>The string to look for.</p>
6182
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6183
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6184
     *
6185
     * @return string
6186
     */
6187
    public static function str_isubstr_first(
6188
        string $str,
6189
        string $needle,
6190
        bool $beforeNeedle = false,
6191
        string $encoding = 'UTF-8'
6192
    ): string {
6193
        if (
6194 2
            $needle === ''
6195
            ||
6196 2
            $str === ''
6197
        ) {
6198 2
            return '';
6199
        }
6200
6201 2
        $part = self::stristr(
6202 2
            $str,
6203 2
            $needle,
6204 2
            $beforeNeedle,
6205 2
            $encoding
6206
        );
6207 2
        if ($part === false) {
6208 2
            return '';
6209
        }
6210
6211 2
        return $part;
6212
    }
6213
6214
    /**
6215
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6216
     *
6217
     * @param string $str          <p>The input string.</p>
6218
     * @param string $needle       <p>The string to look for.</p>
6219
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6220
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6221
     *
6222
     * @return string
6223
     */
6224
    public static function str_isubstr_last(
6225
        string $str,
6226
        string $needle,
6227
        bool $beforeNeedle = false,
6228
        string $encoding = 'UTF-8'
6229
    ): string {
6230
        if (
6231 1
            $needle === ''
6232
            ||
6233 1
            $str === ''
6234
        ) {
6235 1
            return '';
6236
        }
6237
6238 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6239 1
        if ($part === false) {
6240 1
            return '';
6241
        }
6242
6243 1
        return $part;
6244
    }
6245
6246
    /**
6247
     * Returns the last $n characters of the string.
6248
     *
6249
     * @param string $str      <p>The input string.</p>
6250
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6251
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6252
     *
6253
     * @return string
6254
     */
6255
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6256
    {
6257 12
        if ($str === '' || $n <= 0) {
6258 4
            return '';
6259
        }
6260
6261 8
        if ($encoding === 'UTF-8') {
6262 4
            return (string) \mb_substr($str, -$n);
6263
        }
6264
6265 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6266
6267 4
        return (string) self::substr($str, -$n, null, $encoding);
6268
    }
6269
6270
    /**
6271
     * Limit the number of characters in a string.
6272
     *
6273
     * @param string $str      <p>The input string.</p>
6274
     * @param int    $length   [optional] <p>Default: 100</p>
6275
     * @param string $strAddOn [optional] <p>Default: …</p>
6276
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6277
     *
6278
     * @return string
6279
     */
6280
    public static function str_limit(
6281
        string $str,
6282
        int $length = 100,
6283
        string $strAddOn = '…',
6284
        string $encoding = 'UTF-8'
6285
    ): string {
6286 2
        if ($str === '' || $length <= 0) {
6287 2
            return '';
6288
        }
6289
6290 2
        if ($encoding === 'UTF-8') {
6291 2
            if ((int) \mb_strlen($str) <= $length) {
6292 2
                return $str;
6293
            }
6294
6295
            /** @noinspection UnnecessaryCastingInspection */
6296 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6297
        }
6298
6299
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6300
6301
        if ((int) self::strlen($str, $encoding) <= $length) {
6302
            return $str;
6303
        }
6304
6305
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6306
    }
6307
6308
    /**
6309
     * Limit the number of characters in a string, but also after the next word.
6310
     *
6311
     * @param string $str      <p>The input string.</p>
6312
     * @param int    $length   [optional] <p>Default: 100</p>
6313
     * @param string $strAddOn [optional] <p>Default: …</p>
6314
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6315
     *
6316
     * @return string
6317
     */
6318
    public static function str_limit_after_word(
6319
        string $str,
6320
        int $length = 100,
6321
        string $strAddOn = '…',
6322
        string $encoding = 'UTF-8'
6323
    ): string {
6324 6
        if ($str === '' || $length <= 0) {
6325 2
            return '';
6326
        }
6327
6328 6
        if ($encoding === 'UTF-8') {
6329
            /** @noinspection UnnecessaryCastingInspection */
6330 2
            if ((int) \mb_strlen($str) <= $length) {
6331 2
                return $str;
6332
            }
6333
6334 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6335 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6336
            }
6337
6338 2
            $str = \mb_substr($str, 0, $length);
6339
6340 2
            $array = \explode(' ', $str);
6341 2
            \array_pop($array);
6342 2
            $new_str = \implode(' ', $array);
6343
6344 2
            if ($new_str === '') {
6345 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6346
            }
6347
        } else {
6348 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6349
                return $str;
6350
            }
6351
6352 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6353 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6354
            }
6355
6356 1
            $str = self::substr($str, 0, $length, $encoding);
6357 1
            if ($str === false) {
6358
                return '' . $strAddOn;
6359
            }
6360
6361 1
            $array = \explode(' ', $str);
6362 1
            \array_pop($array);
6363 1
            $new_str = \implode(' ', $array);
6364
6365 1
            if ($new_str === '') {
6366
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6367
            }
6368
        }
6369
6370 3
        return $new_str . $strAddOn;
6371
    }
6372
6373
    /**
6374
     * Returns the longest common prefix between the string and $otherStr.
6375
     *
6376
     * @param string $str      <p>The input sting.</p>
6377
     * @param string $otherStr <p>Second string for comparison.</p>
6378
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6379
     *
6380
     * @return string
6381
     */
6382
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6383
    {
6384
        // init
6385 10
        $longestCommonPrefix = '';
6386
6387 10
        if ($encoding === 'UTF-8') {
6388 5
            $maxLength = (int) \min(
6389 5
                \mb_strlen($str),
6390 5
                \mb_strlen($otherStr)
6391
            );
6392
6393 5
            for ($i = 0; $i < $maxLength; ++$i) {
6394 4
                $char = \mb_substr($str, $i, 1);
6395
6396
                if (
6397 4
                    $char !== false
6398
                    &&
6399 4
                    $char === \mb_substr($otherStr, $i, 1)
6400
                ) {
6401 3
                    $longestCommonPrefix .= $char;
6402
                } else {
6403 3
                    break;
6404
                }
6405
            }
6406
        } else {
6407 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6408
6409 5
            $maxLength = (int) \min(
6410 5
                self::strlen($str, $encoding),
6411 5
                self::strlen($otherStr, $encoding)
6412
            );
6413
6414 5
            for ($i = 0; $i < $maxLength; ++$i) {
6415 4
                $char = self::substr($str, $i, 1, $encoding);
6416
6417
                if (
6418 4
                    $char !== false
6419
                    &&
6420 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6421
                ) {
6422 3
                    $longestCommonPrefix .= $char;
6423
                } else {
6424 3
                    break;
6425
                }
6426
            }
6427
        }
6428
6429 10
        return $longestCommonPrefix;
6430
    }
6431
6432
    /**
6433
     * Returns the longest common substring between the string and $otherStr.
6434
     * In the case of ties, it returns that which occurs first.
6435
     *
6436
     * @param string $str
6437
     * @param string $otherStr <p>Second string for comparison.</p>
6438
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6439
     *
6440
     * @return string string with its $str being the longest common substring
6441
     */
6442
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6443
    {
6444 11
        if ($str === '' || $otherStr === '') {
6445 2
            return '';
6446
        }
6447
6448
        // Uses dynamic programming to solve
6449
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6450
6451 9
        if ($encoding === 'UTF-8') {
6452 4
            $strLength = (int) \mb_strlen($str);
6453 4
            $otherLength = (int) \mb_strlen($otherStr);
6454
        } else {
6455 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6456
6457 5
            $strLength = (int) self::strlen($str, $encoding);
6458 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6459
        }
6460
6461
        // Return if either string is empty
6462 9
        if ($strLength === 0 || $otherLength === 0) {
6463
            return '';
6464
        }
6465
6466 9
        $len = 0;
6467 9
        $end = 0;
6468 9
        $table = \array_fill(
6469 9
            0,
6470 9
            $strLength + 1,
6471 9
            \array_fill(0, $otherLength + 1, 0)
6472
        );
6473
6474 9
        if ($encoding === 'UTF-8') {
6475 9
            for ($i = 1; $i <= $strLength; ++$i) {
6476 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6477 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6478 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6479
6480 9
                    if ($strChar === $otherChar) {
6481 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6482 8
                        if ($table[$i][$j] > $len) {
6483 8
                            $len = $table[$i][$j];
6484 8
                            $end = $i;
6485
                        }
6486
                    } else {
6487 9
                        $table[$i][$j] = 0;
6488
                    }
6489
                }
6490
            }
6491
        } else {
6492
            for ($i = 1; $i <= $strLength; ++$i) {
6493
                for ($j = 1; $j <= $otherLength; ++$j) {
6494
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6495
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6496
6497
                    if ($strChar === $otherChar) {
6498
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6499
                        if ($table[$i][$j] > $len) {
6500
                            $len = $table[$i][$j];
6501
                            $end = $i;
6502
                        }
6503
                    } else {
6504
                        $table[$i][$j] = 0;
6505
                    }
6506
                }
6507
            }
6508
        }
6509
6510 9
        if ($encoding === 'UTF-8') {
6511 9
            return (string) \mb_substr($str, $end - $len, $len);
6512
        }
6513
6514
        return (string) self::substr($str, $end - $len, $len, $encoding);
6515
    }
6516
6517
    /**
6518
     * Returns the longest common suffix between the string and $otherStr.
6519
     *
6520
     * @param string $str
6521
     * @param string $otherStr <p>Second string for comparison.</p>
6522
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6523
     *
6524
     * @return string
6525
     */
6526
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6527
    {
6528 10
        if ($str === '' || $otherStr === '') {
6529 2
            return '';
6530
        }
6531
6532 8
        if ($encoding === 'UTF-8') {
6533 4
            $maxLength = (int) \min(
6534 4
                \mb_strlen($str, $encoding),
6535 4
                \mb_strlen($otherStr, $encoding)
6536
            );
6537
6538 4
            $longestCommonSuffix = '';
6539 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6540 4
                $char = \mb_substr($str, -$i, 1);
6541
6542
                if (
6543 4
                    $char !== false
6544
                    &&
6545 4
                    $char === \mb_substr($otherStr, -$i, 1)
6546
                ) {
6547 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6548
                } else {
6549 3
                    break;
6550
                }
6551
            }
6552
        } else {
6553 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6554
6555 4
            $maxLength = (int) \min(
6556 4
                self::strlen($str, $encoding),
6557 4
                self::strlen($otherStr, $encoding)
6558
            );
6559
6560 4
            $longestCommonSuffix = '';
6561 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6562 4
                $char = self::substr($str, -$i, 1, $encoding);
6563
6564
                if (
6565 4
                    $char !== false
6566
                    &&
6567 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6568
                ) {
6569 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6570
                } else {
6571 3
                    break;
6572
                }
6573
            }
6574
        }
6575
6576 8
        return $longestCommonSuffix;
6577
    }
6578
6579
    /**
6580
     * Returns true if $str matches the supplied pattern, false otherwise.
6581
     *
6582
     * @param string $str     <p>The input string.</p>
6583
     * @param string $pattern <p>Regex pattern to match against.</p>
6584
     *
6585
     * @return bool whether or not $str matches the pattern
6586
     */
6587
    public static function str_matches_pattern(string $str, string $pattern): bool
6588
    {
6589
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6590
    }
6591
6592
    /**
6593
     * Returns whether or not a character exists at an index. Offsets may be
6594
     * negative to count from the last character in the string. Implements
6595
     * part of the ArrayAccess interface.
6596
     *
6597
     * @param string $str      <p>The input string.</p>
6598
     * @param int    $offset   <p>The index to check.</p>
6599
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6600
     *
6601
     * @return bool whether or not the index exists
6602
     */
6603
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6604
    {
6605
        // init
6606 6
        $length = (int) self::strlen($str, $encoding);
6607
6608 6
        if ($offset >= 0) {
6609 3
            return $length > $offset;
6610
        }
6611
6612 3
        return $length >= \abs($offset);
6613
    }
6614
6615
    /**
6616
     * Returns the character at the given index. Offsets may be negative to
6617
     * count from the last character in the string. Implements part of the
6618
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6619
     * does not exist.
6620
     *
6621
     * @param string $str      <p>The input string.</p>
6622
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6623
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6624
     *
6625
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6626
     *
6627
     * @return string the character at the specified index
6628
     */
6629
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6630
    {
6631
        // init
6632 2
        $length = (int) self::strlen($str);
6633
6634
        if (
6635 2
            ($index >= 0 && $length <= $index)
6636
            ||
6637 2
            $length < \abs($index)
6638
        ) {
6639 1
            throw new \OutOfBoundsException('No character exists at the index');
6640
        }
6641
6642 1
        return self::char_at($str, $index, $encoding);
6643
    }
6644
6645
    /**
6646
     * Pad a UTF-8 string to given length with another string.
6647
     *
6648
     * @param string     $str        <p>The input string.</p>
6649
     * @param int        $pad_length <p>The length of return string.</p>
6650
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6651
     * @param int|string $pad_type   [optional] <p>
6652
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6653
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6654
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6655
     *                               </p>
6656
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6657
     *
6658
     * @return string returns the padded string
6659
     */
6660
    public static function str_pad(
6661
        string $str,
6662
        int $pad_length,
6663
        string $pad_string = ' ',
6664
        $pad_type = \STR_PAD_RIGHT,
6665
        string $encoding = 'UTF-8'
6666
    ): string {
6667 41
        if ($pad_length === 0 || $pad_string === '') {
6668 1
            return $str;
6669
        }
6670
6671 41
        if ($pad_type !== (int) $pad_type) {
6672 13
            if ($pad_type === 'left') {
6673 3
                $pad_type = \STR_PAD_LEFT;
6674 10
            } elseif ($pad_type === 'right') {
6675 6
                $pad_type = \STR_PAD_RIGHT;
6676 4
            } elseif ($pad_type === 'both') {
6677 3
                $pad_type = \STR_PAD_BOTH;
6678
            } else {
6679 1
                throw new \InvalidArgumentException(
6680 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6681
                );
6682
            }
6683
        }
6684
6685 40
        if ($encoding === 'UTF-8') {
6686 25
            $str_length = (int) \mb_strlen($str);
6687
6688 25
            if ($pad_length >= $str_length) {
6689
                switch ($pad_type) {
6690 25
                    case \STR_PAD_LEFT:
6691 8
                        $ps_length = (int) \mb_strlen($pad_string);
6692
6693 8
                        $diff = ($pad_length - $str_length);
6694
6695 8
                        $pre = (string) \mb_substr(
6696 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6697 8
                            0,
6698 8
                            $diff
6699
                        );
6700 8
                        $post = '';
6701
6702 8
                        break;
6703
6704 20
                    case \STR_PAD_BOTH:
6705 14
                        $diff = ($pad_length - $str_length);
6706
6707 14
                        $ps_length_left = (int) \floor($diff / 2);
6708
6709 14
                        $ps_length_right = (int) \ceil($diff / 2);
6710
6711 14
                        $pre = (string) \mb_substr(
6712 14
                            \str_repeat($pad_string, $ps_length_left),
6713 14
                            0,
6714 14
                            $ps_length_left
6715
                        );
6716 14
                        $post = (string) \mb_substr(
6717 14
                            \str_repeat($pad_string, $ps_length_right),
6718 14
                            0,
6719 14
                            $ps_length_right
6720
                        );
6721
6722 14
                        break;
6723
6724 9
                    case \STR_PAD_RIGHT:
6725
                    default:
6726 9
                        $ps_length = (int) \mb_strlen($pad_string);
6727
6728 9
                        $diff = ($pad_length - $str_length);
6729
6730 9
                        $post = (string) \mb_substr(
6731 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6732 9
                            0,
6733 9
                            $diff
6734
                        );
6735 9
                        $pre = '';
6736
                }
6737
6738 25
                return $pre . $str . $post;
6739
            }
6740
6741 3
            return $str;
6742
        }
6743
6744 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6745
6746 15
        $str_length = (int) self::strlen($str, $encoding);
6747
6748 15
        if ($pad_length >= $str_length) {
6749
            switch ($pad_type) {
6750 14
                case \STR_PAD_LEFT:
6751 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6752
6753 5
                    $diff = ($pad_length - $str_length);
6754
6755 5
                    $pre = (string) self::substr(
6756 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6757 5
                        0,
6758 5
                        $diff,
6759 5
                        $encoding
6760
                    );
6761 5
                    $post = '';
6762
6763 5
                    break;
6764
6765 9
                case \STR_PAD_BOTH:
6766 3
                    $diff = ($pad_length - $str_length);
6767
6768 3
                    $ps_length_left = (int) \floor($diff / 2);
6769
6770 3
                    $ps_length_right = (int) \ceil($diff / 2);
6771
6772 3
                    $pre = (string) self::substr(
6773 3
                        \str_repeat($pad_string, $ps_length_left),
6774 3
                        0,
6775 3
                        $ps_length_left,
6776 3
                        $encoding
6777
                    );
6778 3
                    $post = (string) self::substr(
6779 3
                        \str_repeat($pad_string, $ps_length_right),
6780 3
                        0,
6781 3
                        $ps_length_right,
6782 3
                        $encoding
6783
                    );
6784
6785 3
                    break;
6786
6787 6
                case \STR_PAD_RIGHT:
6788
                default:
6789 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6790
6791 6
                    $diff = ($pad_length - $str_length);
6792
6793 6
                    $post = (string) self::substr(
6794 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6795 6
                        0,
6796 6
                        $diff,
6797 6
                        $encoding
6798
                    );
6799 6
                    $pre = '';
6800
            }
6801
6802 14
            return $pre . $str . $post;
6803
        }
6804
6805 1
        return $str;
6806
    }
6807
6808
    /**
6809
     * Returns a new string of a given length such that both sides of the
6810
     * string are padded. Alias for pad() with a $padType of 'both'.
6811
     *
6812
     * @param string $str
6813
     * @param int    $length   <p>Desired string length after padding.</p>
6814
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6815
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6816
     *
6817
     * @return string string with padding applied
6818
     */
6819
    public static function str_pad_both(
6820
        string $str,
6821
        int $length,
6822
        string $padStr = ' ',
6823
        string $encoding = 'UTF-8'
6824
    ): string {
6825 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6826
    }
6827
6828
    /**
6829
     * Returns a new string of a given length such that the beginning of the
6830
     * string is padded. Alias for pad() with a $padType of 'left'.
6831
     *
6832
     * @param string $str
6833
     * @param int    $length   <p>Desired string length after padding.</p>
6834
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6835
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6836
     *
6837
     * @return string string with left padding
6838
     */
6839
    public static function str_pad_left(
6840
        string $str,
6841
        int $length,
6842
        string $padStr = ' ',
6843
        string $encoding = 'UTF-8'
6844
    ): string {
6845 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6846
    }
6847
6848
    /**
6849
     * Returns a new string of a given length such that the end of the string
6850
     * is padded. Alias for pad() with a $padType of 'right'.
6851
     *
6852
     * @param string $str
6853
     * @param int    $length   <p>Desired string length after padding.</p>
6854
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6855
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6856
     *
6857
     * @return string string with right padding
6858
     */
6859
    public static function str_pad_right(
6860
        string $str,
6861
        int $length,
6862
        string $padStr = ' ',
6863
        string $encoding = 'UTF-8'
6864
    ): string {
6865 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6866
    }
6867
6868
    /**
6869
     * Repeat a string.
6870
     *
6871
     * @param string $str        <p>
6872
     *                           The string to be repeated.
6873
     *                           </p>
6874
     * @param int    $multiplier <p>
6875
     *                           Number of time the input string should be
6876
     *                           repeated.
6877
     *                           </p>
6878
     *                           <p>
6879
     *                           multiplier has to be greater than or equal to 0.
6880
     *                           If the multiplier is set to 0, the function
6881
     *                           will return an empty string.
6882
     *                           </p>
6883
     *
6884
     * @return string the repeated string
6885
     */
6886
    public static function str_repeat(string $str, int $multiplier): string
6887
    {
6888 9
        $str = self::filter($str);
6889
6890 9
        return \str_repeat($str, $multiplier);
6891
    }
6892
6893
    /**
6894
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6895
     *
6896
     * Replace all occurrences of the search string with the replacement string
6897
     *
6898
     * @see http://php.net/manual/en/function.str-replace.php
6899
     *
6900
     * @param mixed $search  <p>
6901
     *                       The value being searched for, otherwise known as the needle.
6902
     *                       An array may be used to designate multiple needles.
6903
     *                       </p>
6904
     * @param mixed $replace <p>
6905
     *                       The replacement value that replaces found search
6906
     *                       values. An array may be used to designate multiple replacements.
6907
     *                       </p>
6908
     * @param mixed $subject <p>
6909
     *                       The string or array being searched and replaced on,
6910
     *                       otherwise known as the haystack.
6911
     *                       </p>
6912
     *                       <p>
6913
     *                       If subject is an array, then the search and
6914
     *                       replace is performed with every entry of
6915
     *                       subject, and the return value is an array as
6916
     *                       well.
6917
     *                       </p>
6918
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6919
     *
6920
     * @return mixed this function returns a string or an array with the replaced values
6921
     */
6922
    public static function str_replace(
6923
        $search,
6924
        $replace,
6925
        $subject,
6926
        int &$count = null
6927
    ) {
6928
        /**
6929
         * @psalm-suppress PossiblyNullArgument
6930
         */
6931 12
        return \str_replace(
6932 12
            $search,
6933 12
            $replace,
6934 12
            $subject,
6935 12
            $count
6936
        );
6937
    }
6938
6939
    /**
6940
     * Replaces $search from the beginning of string with $replacement.
6941
     *
6942
     * @param string $str         <p>The input string.</p>
6943
     * @param string $search      <p>The string to search for.</p>
6944
     * @param string $replacement <p>The replacement.</p>
6945
     *
6946
     * @return string string after the replacements
6947
     */
6948
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6949
    {
6950 17
        if ($str === '') {
6951 4
            if ($replacement === '') {
6952 2
                return '';
6953
            }
6954
6955 2
            if ($search === '') {
6956 2
                return $replacement;
6957
            }
6958
        }
6959
6960 13
        if ($search === '') {
6961 2
            return $str . $replacement;
6962
        }
6963
6964 11
        if (\strpos($str, $search) === 0) {
6965 9
            return $replacement . \substr($str, \strlen($search));
6966
        }
6967
6968 2
        return $str;
6969
    }
6970
6971
    /**
6972
     * Replaces $search from the ending of string with $replacement.
6973
     *
6974
     * @param string $str         <p>The input string.</p>
6975
     * @param string $search      <p>The string to search for.</p>
6976
     * @param string $replacement <p>The replacement.</p>
6977
     *
6978
     * @return string string after the replacements
6979
     */
6980
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6981
    {
6982 17
        if ($str === '') {
6983 4
            if ($replacement === '') {
6984 2
                return '';
6985
            }
6986
6987 2
            if ($search === '') {
6988 2
                return $replacement;
6989
            }
6990
        }
6991
6992 13
        if ($search === '') {
6993 2
            return $str . $replacement;
6994
        }
6995
6996 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6997 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6998
        }
6999
7000 11
        return $str;
7001
    }
7002
7003
    /**
7004
     * Replace the first "$search"-term with the "$replace"-term.
7005
     *
7006
     * @param string $search
7007
     * @param string $replace
7008
     * @param string $subject
7009
     *
7010
     * @return string
7011
     *
7012
     * @psalm-suppress InvalidReturnType
7013
     */
7014
    public static function str_replace_first(string $search, string $replace, string $subject): string
7015
    {
7016 2
        $pos = self::strpos($subject, $search);
7017
7018 2
        if ($pos !== false) {
7019
            /**
7020
             * @psalm-suppress InvalidReturnStatement
7021
             */
7022 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7023 2
                $subject,
7024 2
                $replace,
7025 2
                $pos,
7026 2
                (int) self::strlen($search)
7027
            );
7028
        }
7029
7030 2
        return $subject;
7031
    }
7032
7033
    /**
7034
     * Replace the last "$search"-term with the "$replace"-term.
7035
     *
7036
     * @param string $search
7037
     * @param string $replace
7038
     * @param string $subject
7039
     *
7040
     * @return string
7041
     *
7042
     * @psalm-suppress InvalidReturnType
7043
     */
7044
    public static function str_replace_last(
7045
        string $search,
7046
        string $replace,
7047
        string $subject
7048
    ): string {
7049 2
        $pos = self::strrpos($subject, $search);
7050 2
        if ($pos !== false) {
7051
            /**
7052
             * @psalm-suppress InvalidReturnStatement
7053
             */
7054 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7055 2
                $subject,
7056 2
                $replace,
7057 2
                $pos,
7058 2
                (int) self::strlen($search)
7059
            );
7060
        }
7061
7062 2
        return $subject;
7063
    }
7064
7065
    /**
7066
     * Shuffles all the characters in the string.
7067
     *
7068
     * PS: uses random algorithm which is weak for cryptography purposes
7069
     *
7070
     * @param string $str      <p>The input string</p>
7071
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7072
     *
7073
     * @return string the shuffled string
7074
     */
7075
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7076
    {
7077 5
        if ($encoding === 'UTF-8') {
7078 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7079
            /** @noinspection NonSecureShuffleUsageInspection */
7080 5
            \shuffle($indexes);
7081
7082
            // init
7083 5
            $shuffledStr = '';
7084
7085 5
            foreach ($indexes as &$i) {
7086 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7087 5
                if ($tmpSubStr !== false) {
7088 5
                    $shuffledStr .= $tmpSubStr;
7089
                }
7090
            }
7091
        } else {
7092
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7093
7094
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7095
            /** @noinspection NonSecureShuffleUsageInspection */
7096
            \shuffle($indexes);
7097
7098
            // init
7099
            $shuffledStr = '';
7100
7101
            foreach ($indexes as &$i) {
7102
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7103
                if ($tmpSubStr !== false) {
7104
                    $shuffledStr .= $tmpSubStr;
7105
                }
7106
            }
7107
        }
7108
7109 5
        return $shuffledStr;
7110
    }
7111
7112
    /**
7113
     * Returns the substring beginning at $start, and up to, but not including
7114
     * the index specified by $end. If $end is omitted, the function extracts
7115
     * the remaining string. If $end is negative, it is computed from the end
7116
     * of the string.
7117
     *
7118
     * @param string $str
7119
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7120
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7121
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7122
     *
7123
     * @return false|string
7124
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7125
     *                      characters long, <b>FALSE</b> will be returned.
7126
     */
7127
    public static function str_slice(
7128
        string $str,
7129
        int $start,
7130
        int $end = null,
7131
        string $encoding = 'UTF-8'
7132
    ) {
7133 18
        if ($encoding === 'UTF-8') {
7134 7
            if ($end === null) {
7135 1
                $length = (int) \mb_strlen($str);
7136 6
            } elseif ($end >= 0 && $end <= $start) {
7137 2
                return '';
7138 4
            } elseif ($end < 0) {
7139 1
                $length = (int) \mb_strlen($str) + $end - $start;
7140
            } else {
7141 3
                $length = $end - $start;
7142
            }
7143
7144 5
            return \mb_substr($str, $start, $length);
7145
        }
7146
7147 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7148
7149 11
        if ($end === null) {
7150 5
            $length = (int) self::strlen($str, $encoding);
7151 6
        } elseif ($end >= 0 && $end <= $start) {
7152 2
            return '';
7153 4
        } elseif ($end < 0) {
7154 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7155
        } else {
7156 3
            $length = $end - $start;
7157
        }
7158
7159 9
        return self::substr($str, $start, $length, $encoding);
7160
    }
7161
7162
    /**
7163
     * Convert a string to e.g.: "snake_case"
7164
     *
7165
     * @param string $str
7166
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7167
     *
7168
     * @return string string in snake_case
7169
     */
7170
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7171
    {
7172 22
        if ($str === '') {
7173
            return '';
7174
        }
7175
7176 22
        $str = \str_replace(
7177 22
            '-',
7178 22
            '_',
7179 22
            self::normalize_whitespace($str)
7180
        );
7181
7182 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7183 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7184
        }
7185
7186 22
        $str = (string) \preg_replace_callback(
7187 22
            '/([\\p{N}|\\p{Lu}])/u',
7188
            /**
7189
             * @param string[] $matches
7190
             *
7191
             * @return string
7192
             */
7193
            static function (array $matches) use ($encoding): string {
7194 9
                $match = $matches[1];
7195 9
                $matchInt = (int) $match;
7196
7197 9
                if ((string) $matchInt === $match) {
7198 4
                    return '_' . $match . '_';
7199
                }
7200
7201 5
                if ($encoding === 'UTF-8') {
7202 5
                    return '_' . \mb_strtolower($match);
7203
                }
7204
7205
                return '_' . self::strtolower($match, $encoding);
7206 22
            },
7207 22
            $str
7208
        );
7209
7210 22
        $str = (string) \preg_replace(
7211
            [
7212 22
                '/\\s+/u',           // convert spaces to "_"
7213
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7214
                '/_+/',                 // remove double "_"
7215
            ],
7216
            [
7217 22
                '_',
7218
                '',
7219
                '_',
7220
            ],
7221 22
            $str
7222
        );
7223
7224 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7225
    }
7226
7227
    /**
7228
     * Sort all characters according to code points.
7229
     *
7230
     * @param string $str    <p>A UTF-8 string.</p>
7231
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7232
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7233
     *
7234
     * @return string string of sorted characters
7235
     */
7236
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7237
    {
7238 2
        $array = self::codepoints($str);
7239
7240 2
        if ($unique) {
7241 2
            $array = \array_flip(\array_flip($array));
7242
        }
7243
7244 2
        if ($desc) {
7245 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7245
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7246
        } else {
7247 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7247
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7248
        }
7249
7250 2
        return self::string($array);
7251
    }
7252
7253
    /**
7254
     * Convert a string to an array of Unicode characters.
7255
     *
7256
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7257
     * @param int                       $length             [optional] <p>Max character length of each array
7258
     *                                                      element.</p>
7259
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7260
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7261
     *                                                      "mb_substr"</p>
7262
     *
7263
     * @return array
7264
     *               <p>An array containing chunks of the input.</p>
7265
     */
7266
    public static function str_split(
7267
        $str,
7268
        int $length = 1,
7269
        bool $cleanUtf8 = false,
7270
        bool $tryToUseMbFunction = true
7271
    ): array {
7272 89
        if ($length <= 0) {
7273 3
            return [];
7274
        }
7275
7276 88
        if (\is_array($str) === true) {
7277 2
            foreach ($str as $k => &$v) {
7278 2
                $v = self::str_split(
7279 2
                    $v,
7280 2
                    $length,
7281 2
                    $cleanUtf8,
7282 2
                    $tryToUseMbFunction
7283
                );
7284
            }
7285
7286 2
            return $str;
7287
        }
7288
7289
        // init
7290 88
        $str = (string) $str;
7291
7292 88
        if ($str === '') {
7293 13
            return [];
7294
        }
7295
7296 85
        if ($cleanUtf8 === true) {
7297 19
            $str = self::clean($str);
7298
        }
7299
7300
        if (
7301 85
            $tryToUseMbFunction === true
7302
            &&
7303 85
            self::$SUPPORT['mbstring'] === true
7304
        ) {
7305 81
            $iMax = \mb_strlen($str);
7306 81
            if ($iMax <= 127) {
7307 75
                $ret = [];
7308 75
                for ($i = 0; $i < $iMax; ++$i) {
7309 75
                    $ret[] = \mb_substr($str, $i, 1);
7310
                }
7311
            } else {
7312 16
                $retArray = [];
7313 16
                \preg_match_all('/./us', $str, $retArray);
7314 81
                $ret = $retArray[0] ?? [];
7315
            }
7316 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7317 17
            $retArray = [];
7318 17
            \preg_match_all('/./us', $str, $retArray);
7319 17
            $ret = $retArray[0] ?? [];
7320
        } else {
7321
7322
            // fallback
7323
7324 8
            $ret = [];
7325 8
            $len = \strlen($str);
7326
7327
            /** @noinspection ForeachInvariantsInspection */
7328 8
            for ($i = 0; $i < $len; ++$i) {
7329 8
                if (($str[$i] & "\x80") === "\x00") {
7330 8
                    $ret[] = $str[$i];
7331
                } elseif (
7332 8
                    isset($str[$i + 1])
7333
                    &&
7334 8
                    ($str[$i] & "\xE0") === "\xC0"
7335
                ) {
7336 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7337 4
                        $ret[] = $str[$i] . $str[$i + 1];
7338
7339 4
                        ++$i;
7340
                    }
7341
                } elseif (
7342 6
                    isset($str[$i + 2])
7343
                    &&
7344 6
                    ($str[$i] & "\xF0") === "\xE0"
7345
                ) {
7346
                    if (
7347 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7348
                        &&
7349 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7350
                    ) {
7351 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7352
7353 6
                        $i += 2;
7354
                    }
7355
                } elseif (
7356
                    isset($str[$i + 3])
7357
                    &&
7358
                    ($str[$i] & "\xF8") === "\xF0"
7359
                ) {
7360
                    if (
7361
                        ($str[$i + 1] & "\xC0") === "\x80"
7362
                        &&
7363
                        ($str[$i + 2] & "\xC0") === "\x80"
7364
                        &&
7365
                        ($str[$i + 3] & "\xC0") === "\x80"
7366
                    ) {
7367
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7368
7369
                        $i += 3;
7370
                    }
7371
                }
7372
            }
7373
        }
7374
7375 85
        if ($length > 1) {
7376 11
            $ret = \array_chunk($ret, $length);
7377
7378 11
            return \array_map(
7379
                static function (array &$item): string {
7380 11
                    return \implode('', $item);
7381 11
                },
7382 11
                $ret
7383
            );
7384
        }
7385
7386 78
        if (isset($ret[0]) && $ret[0] === '') {
7387
            return [];
7388
        }
7389
7390 78
        return $ret;
7391
    }
7392
7393
    /**
7394
     * Splits the string with the provided regular expression, returning an
7395
     * array of Stringy objects. An optional integer $limit will truncate the
7396
     * results.
7397
     *
7398
     * @param string $str
7399
     * @param string $pattern <p>The regex with which to split the string.</p>
7400
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7401
     *
7402
     * @return string[] an array of strings
7403
     */
7404
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7405
    {
7406 16
        if ($limit === 0) {
7407 2
            return [];
7408
        }
7409
7410 14
        if ($pattern === '') {
7411 1
            return [$str];
7412
        }
7413
7414 13
        if (self::$SUPPORT['mbstring'] === true) {
7415 13
            if ($limit >= 0) {
7416
                /** @noinspection PhpComposerExtensionStubsInspection */
7417 8
                $resultTmp = \mb_split($pattern, $str);
7418
7419 8
                $result = [];
7420 8
                foreach ($resultTmp as $itemTmp) {
7421 8
                    if ($limit === 0) {
7422 4
                        break;
7423
                    }
7424 8
                    --$limit;
7425
7426 8
                    $result[] = $itemTmp;
7427
                }
7428
7429 8
                return $result;
7430
            }
7431
7432
            /** @noinspection PhpComposerExtensionStubsInspection */
7433 5
            return \mb_split($pattern, $str);
7434
        }
7435
7436
        if ($limit > 0) {
7437
            ++$limit;
7438
        } else {
7439
            $limit = -1;
7440
        }
7441
7442
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7443
7444
        if ($array === false) {
7445
            return [];
7446
        }
7447
7448
        if ($limit > 0 && \count($array) === $limit) {
7449
            \array_pop($array);
7450
        }
7451
7452
        return $array;
7453
    }
7454
7455
    /**
7456
     * Check if the string starts with the given substring.
7457
     *
7458
     * @param string $haystack <p>The string to search in.</p>
7459
     * @param string $needle   <p>The substring to search for.</p>
7460
     *
7461
     * @return bool
7462
     */
7463
    public static function str_starts_with(string $haystack, string $needle): bool
7464
    {
7465 19
        if ($needle === '') {
7466 2
            return true;
7467
        }
7468
7469 19
        if ($haystack === '') {
7470
            return false;
7471
        }
7472
7473 19
        return \strpos($haystack, $needle) === 0;
7474
    }
7475
7476
    /**
7477
     * Returns true if the string begins with any of $substrings, false otherwise.
7478
     *
7479
     * - case-sensitive
7480
     *
7481
     * @param string $str        <p>The input string.</p>
7482
     * @param array  $substrings <p>Substrings to look for.</p>
7483
     *
7484
     * @return bool whether or not $str starts with $substring
7485
     */
7486
    public static function str_starts_with_any(string $str, array $substrings): bool
7487
    {
7488 8
        if ($str === '') {
7489
            return false;
7490
        }
7491
7492 8
        if ($substrings === []) {
7493
            return false;
7494
        }
7495
7496 8
        foreach ($substrings as &$substring) {
7497 8
            if (self::str_starts_with($str, $substring)) {
7498 8
                return true;
7499
            }
7500
        }
7501
7502 6
        return false;
7503
    }
7504
7505
    /**
7506
     * Gets the substring after the first occurrence of a separator.
7507
     *
7508
     * @param string $str       <p>The input string.</p>
7509
     * @param string $separator <p>The string separator.</p>
7510
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7511
     *
7512
     * @return string
7513
     */
7514
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7515
    {
7516 1
        if ($separator === '' || $str === '') {
7517 1
            return '';
7518
        }
7519
7520 1
        if ($encoding === 'UTF-8') {
7521 1
            $offset = \mb_strpos($str, $separator);
7522 1
            if ($offset === false) {
7523 1
                return '';
7524
            }
7525
7526 1
            return (string) \mb_substr(
7527 1
                $str,
7528 1
                $offset + (int) \mb_strlen($separator)
7529
            );
7530
        }
7531
7532
        $offset = self::strpos($str, $separator, 0, $encoding);
7533
        if ($offset === false) {
7534
            return '';
7535
        }
7536
7537
        return (string) \mb_substr(
7538
            $str,
7539
            $offset + (int) self::strlen($separator, $encoding),
7540
            null,
7541
            $encoding
7542
        );
7543
    }
7544
7545
    /**
7546
     * Gets the substring after the last occurrence of a separator.
7547
     *
7548
     * @param string $str       <p>The input string.</p>
7549
     * @param string $separator <p>The string separator.</p>
7550
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7551
     *
7552
     * @return string
7553
     */
7554
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7555
    {
7556 1
        if ($separator === '' || $str === '') {
7557 1
            return '';
7558
        }
7559
7560 1
        if ($encoding === 'UTF-8') {
7561 1
            $offset = \mb_strrpos($str, $separator);
7562 1
            if ($offset === false) {
7563 1
                return '';
7564
            }
7565
7566 1
            return (string) \mb_substr(
7567 1
                $str,
7568 1
                $offset + (int) \mb_strlen($separator)
7569
            );
7570
        }
7571
7572
        $offset = self::strrpos($str, $separator, 0, $encoding);
7573
        if ($offset === false) {
7574
            return '';
7575
        }
7576
7577
        return (string) self::substr(
7578
            $str,
7579
            $offset + (int) self::strlen($separator, $encoding),
7580
            null,
7581
            $encoding
7582
        );
7583
    }
7584
7585
    /**
7586
     * Gets the substring before the first occurrence of a separator.
7587
     *
7588
     * @param string $str       <p>The input string.</p>
7589
     * @param string $separator <p>The string separator.</p>
7590
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7591
     *
7592
     * @return string
7593
     */
7594
    public static function str_substr_before_first_separator(
7595
        string $str,
7596
        string $separator,
7597
        string $encoding = 'UTF-8'
7598
    ): string {
7599 1
        if ($separator === '' || $str === '') {
7600 1
            return '';
7601
        }
7602
7603 1
        if ($encoding === 'UTF-8') {
7604 1
            $offset = \mb_strpos($str, $separator);
7605 1
            if ($offset === false) {
7606 1
                return '';
7607
            }
7608
7609 1
            return (string) \mb_substr(
7610 1
                $str,
7611 1
                0,
7612 1
                $offset
7613
            );
7614
        }
7615
7616
        $offset = self::strpos($str, $separator, 0, $encoding);
7617
        if ($offset === false) {
7618
            return '';
7619
        }
7620
7621
        return (string) self::substr(
7622
            $str,
7623
            0,
7624
            $offset,
7625
            $encoding
7626
        );
7627
    }
7628
7629
    /**
7630
     * Gets the substring before the last occurrence of a separator.
7631
     *
7632
     * @param string $str       <p>The input string.</p>
7633
     * @param string $separator <p>The string separator.</p>
7634
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7635
     *
7636
     * @return string
7637
     */
7638
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7639
    {
7640 1
        if ($separator === '' || $str === '') {
7641 1
            return '';
7642
        }
7643
7644 1
        if ($encoding === 'UTF-8') {
7645 1
            $offset = \mb_strrpos($str, $separator);
7646 1
            if ($offset === false) {
7647 1
                return '';
7648
            }
7649
7650 1
            return (string) \mb_substr(
7651 1
                $str,
7652 1
                0,
7653 1
                $offset
7654
            );
7655
        }
7656
7657
        $offset = self::strrpos($str, $separator, 0, $encoding);
7658
        if ($offset === false) {
7659
            return '';
7660
        }
7661
7662
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7663
7664
        return (string) self::substr(
7665
            $str,
7666
            0,
7667
            $offset,
7668
            $encoding
7669
        );
7670
    }
7671
7672
    /**
7673
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7674
     *
7675
     * @param string $str          <p>The input string.</p>
7676
     * @param string $needle       <p>The string to look for.</p>
7677
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7678
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7679
     *
7680
     * @return string
7681
     */
7682
    public static function str_substr_first(
7683
        string $str,
7684
        string $needle,
7685
        bool $beforeNeedle = false,
7686
        string $encoding = 'UTF-8'
7687
    ): string {
7688 2
        if ($str === '' || $needle === '') {
7689 2
            return '';
7690
        }
7691
7692 2
        if ($encoding === 'UTF-8') {
7693 2
            if ($beforeNeedle === true) {
7694 1
                $part = \mb_strstr(
7695 1
                    $str,
7696 1
                    $needle,
7697 1
                    $beforeNeedle
7698
                );
7699
            } else {
7700 1
                $part = \mb_strstr(
7701 1
                    $str,
7702 2
                    $needle
7703
                );
7704
            }
7705
        } else {
7706
            $part = self::strstr(
7707
                $str,
7708
                $needle,
7709
                $beforeNeedle,
7710
                $encoding
7711
            );
7712
        }
7713
7714 2
        return $part === false ? '' : $part;
7715
    }
7716
7717
    /**
7718
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7719
     *
7720
     * @param string $str          <p>The input string.</p>
7721
     * @param string $needle       <p>The string to look for.</p>
7722
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7723
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7724
     *
7725
     * @return string
7726
     */
7727
    public static function str_substr_last(
7728
        string $str,
7729
        string $needle,
7730
        bool $beforeNeedle = false,
7731
        string $encoding = 'UTF-8'
7732
    ): string {
7733 2
        if ($str === '' || $needle === '') {
7734 2
            return '';
7735
        }
7736
7737 2
        if ($encoding === 'UTF-8') {
7738 2
            if ($beforeNeedle === true) {
7739 1
                $part = \mb_strrchr(
7740 1
                    $str,
7741 1
                    $needle,
7742 1
                    $beforeNeedle
7743
                );
7744
            } else {
7745 1
                $part = \mb_strrchr(
7746 1
                    $str,
7747 2
                    $needle
7748
                );
7749
            }
7750
        } else {
7751
            $part = self::strrchr(
7752
                $str,
7753
                $needle,
7754
                $beforeNeedle,
7755
                $encoding
7756
            );
7757
        }
7758
7759 2
        return $part === false ? '' : $part;
7760
    }
7761
7762
    /**
7763
     * Surrounds $str with the given substring.
7764
     *
7765
     * @param string $str
7766
     * @param string $substring <p>The substring to add to both sides.</P>
7767
     *
7768
     * @return string string with the substring both prepended and appended
7769
     */
7770
    public static function str_surround(string $str, string $substring): string
7771
    {
7772 5
        return $substring . $str . $substring;
7773
    }
7774
7775
    /**
7776
     * Returns a trimmed string with the first letter of each word capitalized.
7777
     * Also accepts an array, $ignore, allowing you to list words not to be
7778
     * capitalized.
7779
     *
7780
     * @param string              $str
7781
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7782
     *                                                   Default: null</p>
7783
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7784
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7785
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7786
     *                                                   tr</p>
7787
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7788
     *                                                   ß</p>
7789
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7790
     *
7791
     * @return string the titleized string
7792
     */
7793
    public static function str_titleize(
7794
        string $str,
7795
        array $ignore = null,
7796
        string $encoding = 'UTF-8',
7797
        bool $cleanUtf8 = false,
7798
        string $lang = null,
7799
        bool $tryToKeepStringLength = false,
7800
        bool $useTrimFirst = true
7801
    ): string {
7802 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7803 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7804
        }
7805
7806 6
        if ($useTrimFirst === true) {
7807 6
            $str = \trim($str);
7808
        }
7809
7810 6
        if ($cleanUtf8 === true) {
7811
            $str = self::clean($str);
7812
        }
7813
7814 6
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7815
7816 6
        return (string) \preg_replace_callback(
7817 6
            '/([^\\s]+)/u',
7818
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7819 6
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7820 2
                    return $match[0];
7821
                }
7822
7823 6
                if ($useMbFunction === true) {
7824 6
                    if ($encoding === 'UTF-8') {
7825 6
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7826 6
                               . \mb_strtolower(\mb_substr($match[0], 1));
7827
                    }
7828
7829
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7830
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7831
                }
7832
7833
                return self::ucfirst(
7834
                    self::strtolower(
7835
                        $match[0],
7836
                        $encoding,
7837
                        false,
7838
                        $lang,
7839
                        $tryToKeepStringLength
7840
                    ),
7841
                    $encoding,
7842
                    false,
7843
                    $lang,
7844
                    $tryToKeepStringLength
7845
                );
7846 6
            },
7847 6
            $str
7848
        );
7849
    }
7850
7851
    /**
7852
     * Returns a trimmed string in proper title case.
7853
     *
7854
     * Also accepts an array, $ignore, allowing you to list words not to be
7855
     * capitalized.
7856
     *
7857
     * Adapted from John Gruber's script.
7858
     *
7859
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7860
     *
7861
     * @param string $str
7862
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7863
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7864
     *
7865
     * @return string the titleized string
7866
     */
7867
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7868
    {
7869 35
        $smallWords = \array_merge(
7870
            [
7871 35
                '(?<!q&)a',
7872
                'an',
7873
                'and',
7874
                'as',
7875
                'at(?!&t)',
7876
                'but',
7877
                'by',
7878
                'en',
7879
                'for',
7880
                'if',
7881
                'in',
7882
                'of',
7883
                'on',
7884
                'or',
7885
                'the',
7886
                'to',
7887
                'v[.]?',
7888
                'via',
7889
                'vs[.]?',
7890
            ],
7891 35
            $ignore
7892
        );
7893
7894 35
        $smallWordsRx = \implode('|', $smallWords);
7895 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7896
7897 35
        $str = \trim($str);
7898
7899 35
        if (self::has_lowercase($str) === false) {
7900 2
            $str = self::strtolower($str, $encoding);
7901
        }
7902
7903
        // the main substitutions
7904 35
        $str = (string) \preg_replace_callback(
7905
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7906
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7907 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7908
                        |
7909 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7910
                        |
7911 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7912
                        |
7913 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7914
                      ) (_*) \\b                                                          # 6. With trailing underscore
7915
                    ~ux',
7916
            /**
7917
             * @param string[] $matches
7918
             *
7919
             * @return string
7920
             */
7921
            static function (array $matches) use ($encoding): string {
7922
                // preserve leading underscore
7923 35
                $str = $matches[1];
7924 35
                if ($matches[2]) {
7925
                    // preserve URLs, domains, emails and file paths
7926 5
                    $str .= $matches[2];
7927 35
                } elseif ($matches[3]) {
7928
                    // lower-case small words
7929 25
                    $str .= self::strtolower($matches[3], $encoding);
7930 35
                } elseif ($matches[4]) {
7931
                    // capitalize word w/o internal caps
7932 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7933
                } else {
7934
                    // preserve other kinds of word (iPhone)
7935 7
                    $str .= $matches[5];
7936
                }
7937
                // Preserve trailing underscore
7938 35
                $str .= $matches[6];
7939
7940 35
                return $str;
7941 35
            },
7942 35
            $str
7943
        );
7944
7945
        // Exceptions for small words: capitalize at start of title...
7946 35
        $str = (string) \preg_replace_callback(
7947
            '~(  \\A [[:punct:]]*            # start of title...
7948
                      |  [:.;?!][ ]+                # or of subsentence...
7949
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7950 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7951
                     ~uxi',
7952
            /**
7953
             * @param string[] $matches
7954
             *
7955
             * @return string
7956
             */
7957
            static function (array $matches) use ($encoding): string {
7958 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7959 35
            },
7960 35
            $str
7961
        );
7962
7963
        // ...and end of title
7964 35
        $str = (string) \preg_replace_callback(
7965 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7966
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7967
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7968
                     ~uxi',
7969
            /**
7970
             * @param string[] $matches
7971
             *
7972
             * @return string
7973
             */
7974
            static function (array $matches) use ($encoding): string {
7975 3
                return static::str_upper_first($matches[1], $encoding);
7976 35
            },
7977 35
            $str
7978
        );
7979
7980
        // Exceptions for small words in hyphenated compound words.
7981
        // e.g. "in-flight" -> In-Flight
7982 35
        $str = (string) \preg_replace_callback(
7983
            '~\\b
7984
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7985 35
                        ( ' . $smallWordsRx . ' )
7986
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7987
                       ~uxi',
7988
            /**
7989
             * @param string[] $matches
7990
             *
7991
             * @return string
7992
             */
7993
            static function (array $matches) use ($encoding): string {
7994
                return static::str_upper_first($matches[1], $encoding);
7995 35
            },
7996 35
            $str
7997
        );
7998
7999
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8000 35
        $str = (string) \preg_replace_callback(
8001
            '~\\b
8002
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8003
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8004 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
8005
                      (?!	- )                 # Negative lookahead for another -
8006
                     ~uxi',
8007
            /**
8008
             * @param string[] $matches
8009
             *
8010
             * @return string
8011
             */
8012
            static function (array $matches) use ($encoding): string {
8013
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
8014 35
            },
8015 35
            $str
8016
        );
8017
8018 35
        return $str;
8019
    }
8020
8021
    /**
8022
     * Get a binary representation of a specific string.
8023
     *
8024
     * @param string $str <p>The input string.</p>
8025
     *
8026
     * @return false|string
8027
     *                      <p>false on error</p>
8028
     */
8029
    public static function str_to_binary(string $str)
8030
    {
8031 2
        $value = \unpack('H*', $str);
8032 2
        if ($value === false) {
8033
            return false;
8034
        }
8035
8036
        /** @noinspection OffsetOperationsInspection */
8037 2
        return \base_convert($value[1], 16, 2);
8038
    }
8039
8040
    /**
8041
     * @param string   $str
8042
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8043
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8044
     *
8045
     * @return string[]
8046
     */
8047
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
8048
    {
8049 17
        if ($str === '') {
8050 1
            return $removeEmptyValues === true ? [] : [''];
8051
        }
8052
8053 16
        if (self::$SUPPORT['mbstring'] === true) {
8054
            /** @noinspection PhpComposerExtensionStubsInspection */
8055 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8056
        } else {
8057
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8058
        }
8059
8060 16
        if ($return === false) {
8061
            return $removeEmptyValues === true ? [] : [''];
8062
        }
8063
8064
        if (
8065 16
            $removeShortValues === null
8066
            &&
8067 16
            $removeEmptyValues === false
8068
        ) {
8069 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8070
        }
8071
8072
        return self::reduce_string_array(
8073
            $return,
8074
            $removeEmptyValues,
8075
            $removeShortValues
8076
        );
8077
    }
8078
8079
    /**
8080
     * Convert a string into an array of words.
8081
     *
8082
     * @param string   $str
8083
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8084
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8085
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8086
     *
8087
     * @return string[]
8088
     */
8089
    public static function str_to_words(
8090
        string $str,
8091
        string $charList = '',
8092
        bool $removeEmptyValues = false,
8093
        int $removeShortValues = null
8094
    ): array {
8095 13
        if ($str === '') {
8096 4
            return $removeEmptyValues === true ? [] : [''];
8097
        }
8098
8099 13
        $charList = self::rxClass($charList, '\pL');
8100
8101 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8102 13
        if ($return === false) {
8103
            return $removeEmptyValues === true ? [] : [''];
8104
        }
8105
8106
        if (
8107 13
            $removeShortValues === null
8108
            &&
8109 13
            $removeEmptyValues === false
8110
        ) {
8111 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8112
        }
8113
8114 2
        $tmpReturn = self::reduce_string_array(
8115 2
            $return,
8116 2
            $removeEmptyValues,
8117 2
            $removeShortValues
8118
        );
8119
8120 2
        foreach ($tmpReturn as &$item) {
8121 2
            $item = (string) $item;
8122
        }
8123
8124 2
        return $tmpReturn;
8125
    }
8126
8127
    /**
8128
     * alias for "UTF8::to_ascii()"
8129
     *
8130
     * @param string $str
8131
     * @param string $unknown
8132
     * @param bool   $strict
8133
     *
8134
     * @return string
8135
     *
8136
     * @see UTF8::to_ascii()
8137
     */
8138
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8139
    {
8140 8
        return self::to_ascii($str, $unknown, $strict);
8141
    }
8142
8143
    /**
8144
     * Truncates the string to a given length. If $substring is provided, and
8145
     * truncating occurs, the string is further truncated so that the substring
8146
     * may be appended without exceeding the desired length.
8147
     *
8148
     * @param string $str
8149
     * @param int    $length    <p>Desired length of the truncated string.</p>
8150
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8151
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8152
     *
8153
     * @return string string after truncating
8154
     */
8155
    public static function str_truncate(
8156
        string $str,
8157
        int $length,
8158
        string $substring = '',
8159
        string $encoding = 'UTF-8'
8160
    ): string {
8161 22
        if ($str === '') {
8162
            return '';
8163
        }
8164
8165 22
        if ($encoding === 'UTF-8') {
8166 10
            if ($length >= (int) \mb_strlen($str)) {
8167 2
                return $str;
8168
            }
8169
8170 8
            if ($substring !== '') {
8171 4
                $length -= (int) \mb_strlen($substring);
8172
8173
                /** @noinspection UnnecessaryCastingInspection */
8174 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8175
            }
8176
8177
            /** @noinspection UnnecessaryCastingInspection */
8178 4
            return (string) \mb_substr($str, 0, $length);
8179
        }
8180
8181 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8182
8183 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8184 2
            return $str;
8185
        }
8186
8187 10
        if ($substring !== '') {
8188 6
            $length -= (int) self::strlen($substring, $encoding);
8189
        }
8190
8191
        return (
8192 10
               (string) self::substr(
8193 10
                   $str,
8194 10
                   0,
8195 10
                   $length,
8196 10
                   $encoding
8197
               )
8198 10
               ) . $substring;
8199
    }
8200
8201
    /**
8202
     * Truncates the string to a given length, while ensuring that it does not
8203
     * split words. If $substring is provided, and truncating occurs, the
8204
     * string is further truncated so that the substring may be appended without
8205
     * exceeding the desired length.
8206
     *
8207
     * @param string $str
8208
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8209
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8210
     *                                                ''</p>
8211
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8212
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8213
     *
8214
     * @return string string after truncating
8215
     */
8216
    public static function str_truncate_safe(
8217
        string $str,
8218
        int $length,
8219
        string $substring = '',
8220
        string $encoding = 'UTF-8',
8221
        bool $ignoreDoNotSplitWordsForOneWord = false
8222
    ): string {
8223 47
        if ($str === '' || $length <= 0) {
8224 1
            return $substring;
8225
        }
8226
8227 47
        if ($encoding === 'UTF-8') {
8228 21
            if ($length >= (int) \mb_strlen($str)) {
8229 5
                return $str;
8230
            }
8231
8232
            // need to further trim the string so we can append the substring
8233 17
            $length -= (int) \mb_strlen($substring);
8234 17
            if ($length <= 0) {
8235 1
                return $substring;
8236
            }
8237
8238 17
            $truncated = \mb_substr($str, 0, $length);
8239
8240 17
            if ($truncated === false) {
8241
                return '';
8242
            }
8243
8244
            // if the last word was truncated
8245 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8246 17
            if ($strPosSpace !== $length) {
8247
                // find pos of the last occurrence of a space, get up to that
8248 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8249
8250
                if (
8251 13
                    $lastPos !== false
8252
                    ||
8253 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8254
                ) {
8255 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8256
                }
8257
            }
8258
        } else {
8259 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8260
8261 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8262 4
                return $str;
8263
            }
8264
8265
            // need to further trim the string so we can append the substring
8266 22
            $length -= (int) self::strlen($substring, $encoding);
8267 22
            if ($length <= 0) {
8268
                return $substring;
8269
            }
8270
8271 22
            $truncated = self::substr($str, 0, $length, $encoding);
8272
8273 22
            if ($truncated === false) {
8274
                return '';
8275
            }
8276
8277
            // if the last word was truncated
8278 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8279 22
            if ($strPosSpace !== $length) {
8280
                // find pos of the last occurrence of a space, get up to that
8281 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8282
8283
                if (
8284 12
                    $lastPos !== false
8285
                    ||
8286 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8287
                ) {
8288 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8289
                }
8290
            }
8291
        }
8292
8293 39
        return $truncated . $substring;
8294
    }
8295
8296
    /**
8297
     * Returns a lowercase and trimmed string separated by underscores.
8298
     * Underscores are inserted before uppercase characters (with the exception
8299
     * of the first character of the string), and in place of spaces as well as
8300
     * dashes.
8301
     *
8302
     * @param string $str
8303
     *
8304
     * @return string the underscored string
8305
     */
8306
    public static function str_underscored(string $str): string
8307
    {
8308 16
        return self::str_delimit($str, '_');
8309
    }
8310
8311
    /**
8312
     * Returns an UpperCamelCase version of the supplied string. It trims
8313
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8314
     * and underscores, and removes spaces, dashes, underscores.
8315
     *
8316
     * @param string      $str                   <p>The input string.</p>
8317
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8318
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8319
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8320
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8321
     *
8322
     * @return string string in UpperCamelCase
8323
     */
8324
    public static function str_upper_camelize(
8325
        string $str,
8326
        string $encoding = 'UTF-8',
8327
        bool $cleanUtf8 = false,
8328
        string $lang = null,
8329
        bool $tryToKeepStringLength = false
8330
    ): string {
8331 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8332
    }
8333
8334
    /**
8335
     * alias for "UTF8::ucfirst()"
8336
     *
8337
     * @param string      $str
8338
     * @param string      $encoding
8339
     * @param bool        $cleanUtf8
8340
     * @param string|null $lang
8341
     * @param bool        $tryToKeepStringLength
8342
     *
8343
     * @return string
8344
     *
8345
     * @see UTF8::ucfirst()
8346
     */
8347
    public static function str_upper_first(
8348
        string $str,
8349
        string $encoding = 'UTF-8',
8350
        bool $cleanUtf8 = false,
8351
        string $lang = null,
8352
        bool $tryToKeepStringLength = false
8353
    ): string {
8354 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8355
    }
8356
8357
    /**
8358
     * Counts number of words in the UTF-8 string.
8359
     *
8360
     * @param string $str      <p>The input string.</p>
8361
     * @param int    $format   [optional] <p>
8362
     *                         <strong>0</strong> => return a number of words (default)<br>
8363
     *                         <strong>1</strong> => return an array of words<br>
8364
     *                         <strong>2</strong> => return an array of words with word-offset as key
8365
     *                         </p>
8366
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8367
     *
8368
     * @return int|string[] The number of words in the string
8369
     */
8370
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8371
    {
8372 2
        $strParts = self::str_to_words($str, $charlist);
8373
8374 2
        $len = \count($strParts);
8375
8376 2
        if ($format === 1) {
8377 2
            $numberOfWords = [];
8378 2
            for ($i = 1; $i < $len; $i += 2) {
8379 2
                $numberOfWords[] = $strParts[$i];
8380
            }
8381 2
        } elseif ($format === 2) {
8382 2
            $numberOfWords = [];
8383 2
            $offset = (int) self::strlen($strParts[0]);
8384 2
            for ($i = 1; $i < $len; $i += 2) {
8385 2
                $numberOfWords[$offset] = $strParts[$i];
8386 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8387
            }
8388
        } else {
8389 2
            $numberOfWords = (int) (($len - 1) / 2);
8390
        }
8391
8392 2
        return $numberOfWords;
8393
    }
8394
8395
    /**
8396
     * Case-insensitive string comparison.
8397
     *
8398
     * INFO: Case-insensitive version of UTF8::strcmp()
8399
     *
8400
     * @param string $str1     <p>The first string.</p>
8401
     * @param string $str2     <p>The second string.</p>
8402
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8403
     *
8404
     * @return int
8405
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8406
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8407
     *             <strong>0</strong> if they are equal
8408
     */
8409
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8410
    {
8411 23
        return self::strcmp(
8412 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8413 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8414
        );
8415
    }
8416
8417
    /**
8418
     * alias for "UTF8::strstr()"
8419
     *
8420
     * @param string $haystack
8421
     * @param string $needle
8422
     * @param bool   $before_needle
8423
     * @param string $encoding
8424
     * @param bool   $cleanUtf8
8425
     *
8426
     * @return false|string
8427
     *
8428
     * @see UTF8::strstr()
8429
     */
8430
    public static function strchr(
8431
        string $haystack,
8432
        string $needle,
8433
        bool $before_needle = false,
8434
        string $encoding = 'UTF-8',
8435
        bool $cleanUtf8 = false
8436
    ) {
8437 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8438
    }
8439
8440
    /**
8441
     * Case-sensitive string comparison.
8442
     *
8443
     * @param string $str1 <p>The first string.</p>
8444
     * @param string $str2 <p>The second string.</p>
8445
     *
8446
     * @return int
8447
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8448
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8449
     *             <strong>0</strong> if they are equal
8450
     */
8451
    public static function strcmp(string $str1, string $str2): int
8452
    {
8453 29
        if ($str1 === $str2) {
8454 21
            return 0;
8455
        }
8456
8457 24
        return \strcmp(
8458 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8459 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8460
        );
8461
    }
8462
8463
    /**
8464
     * Find length of initial segment not matching mask.
8465
     *
8466
     * @param string $str
8467
     * @param string $charList
8468
     * @param int    $offset
8469
     * @param int    $length
8470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8471
     *
8472
     * @return int
8473
     */
8474
    public static function strcspn(
8475
        string $str,
8476
        string $charList,
8477
        int $offset = null,
8478
        int $length = null,
8479
        string $encoding = 'UTF-8'
8480
    ): int {
8481 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8482
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8483
        }
8484
8485 12
        if ($charList === '') {
8486 2
            return (int) self::strlen($str, $encoding);
8487
        }
8488
8489 11
        if ($offset !== null || $length !== null) {
8490 3
            if ($encoding === 'UTF-8') {
8491 3
                if ($length === null) {
8492
                    /** @noinspection UnnecessaryCastingInspection */
8493 2
                    $strTmp = \mb_substr($str, (int) $offset);
8494
                } else {
8495
                    /** @noinspection UnnecessaryCastingInspection */
8496 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8497
                }
8498
            } else {
8499
                /** @noinspection UnnecessaryCastingInspection */
8500
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8501
            }
8502
8503 3
            if ($strTmp === false) {
8504
                return 0;
8505
            }
8506
8507 3
            $str = $strTmp;
8508
        }
8509
8510 11
        if ($str === '') {
8511 2
            return 0;
8512
        }
8513
8514 10
        $matches = [];
8515 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8516 9
            $return = self::strlen($matches[1], $encoding);
8517 9
            if ($return === false) {
8518
                return 0;
8519
            }
8520
8521 9
            return $return;
8522
        }
8523
8524 2
        return (int) self::strlen($str, $encoding);
8525
    }
8526
8527
    /**
8528
     * alias for "UTF8::stristr()"
8529
     *
8530
     * @param string $haystack
8531
     * @param string $needle
8532
     * @param bool   $before_needle
8533
     * @param string $encoding
8534
     * @param bool   $cleanUtf8
8535
     *
8536
     * @return false|string
8537
     *
8538
     * @see UTF8::stristr()
8539
     */
8540
    public static function strichr(
8541
        string $haystack,
8542
        string $needle,
8543
        bool $before_needle = false,
8544
        string $encoding = 'UTF-8',
8545
        bool $cleanUtf8 = false
8546
    ) {
8547 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8548
    }
8549
8550
    /**
8551
     * Create a UTF-8 string from code points.
8552
     *
8553
     * INFO: opposite to UTF8::codepoints()
8554
     *
8555
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8556
     *
8557
     * @return string UTF-8 encoded string
8558
     */
8559
    public static function string(array $array): string
8560
    {
8561 4
        return \implode(
8562 4
            '',
8563 4
            \array_map(
8564
                [
8565 4
                    self::class,
8566
                    'chr',
8567
                ],
8568 4
                $array
8569
            )
8570
        );
8571
    }
8572
8573
    /**
8574
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8575
     *
8576
     * @param string $str <p>The input string.</p>
8577
     *
8578
     * @return bool
8579
     *              <strong>true</strong> if the string has BOM at the start,<br>
8580
     *              <strong>false</strong> otherwise
8581
     */
8582
    public static function string_has_bom(string $str): bool
8583
    {
8584
        /** @noinspection PhpUnusedLocalVariableInspection */
8585 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8586 6
            if (\strpos($str, $bomString) === 0) {
8587 6
                return true;
8588
            }
8589
        }
8590
8591 6
        return false;
8592
    }
8593
8594
    /**
8595
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8596
     *
8597
     * @see http://php.net/manual/en/function.strip-tags.php
8598
     *
8599
     * @param string $str            <p>
8600
     *                               The input string.
8601
     *                               </p>
8602
     * @param string $allowable_tags [optional] <p>
8603
     *                               You can use the optional second parameter to specify tags which should
8604
     *                               not be stripped.
8605
     *                               </p>
8606
     *                               <p>
8607
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8608
     *                               can not be changed with allowable_tags.
8609
     *                               </p>
8610
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8611
     *
8612
     * @return string the stripped string
8613
     */
8614
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8615
    {
8616 4
        if ($str === '') {
8617 1
            return '';
8618
        }
8619
8620 4
        if ($cleanUtf8 === true) {
8621 2
            $str = self::clean($str);
8622
        }
8623
8624 4
        if ($allowable_tags === null) {
8625 4
            return \strip_tags($str);
8626
        }
8627
8628 2
        return \strip_tags($str, $allowable_tags);
8629
    }
8630
8631
    /**
8632
     * Strip all whitespace characters. This includes tabs and newline
8633
     * characters, as well as multibyte whitespace such as the thin space
8634
     * and ideographic space.
8635
     *
8636
     * @param string $str
8637
     *
8638
     * @return string
8639
     */
8640
    public static function strip_whitespace(string $str): string
8641
    {
8642 36
        if ($str === '') {
8643 3
            return '';
8644
        }
8645
8646 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8647
    }
8648
8649
    /**
8650
     * Finds position of first occurrence of a string within another, case insensitive.
8651
     *
8652
     * @see http://php.net/manual/en/function.mb-stripos.php
8653
     *
8654
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8655
     * @param string $needle    <p>The string to find in haystack.</p>
8656
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8657
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8658
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8659
     *
8660
     * @return false|int
8661
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8662
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8663
     */
8664
    public static function stripos(
8665
        string $haystack,
8666
        string $needle,
8667
        int $offset = 0,
8668
        $encoding = 'UTF-8',
8669
        bool $cleanUtf8 = false
8670
    ) {
8671 24
        if ($haystack === '' || $needle === '') {
8672 5
            return false;
8673
        }
8674
8675 23
        if ($cleanUtf8 === true) {
8676
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8677
            // if invalid characters are found in $haystack before $needle
8678 1
            $haystack = self::clean($haystack);
8679 1
            $needle = self::clean($needle);
8680
        }
8681
8682 23
        if (self::$SUPPORT['mbstring'] === true) {
8683 23
            if ($encoding === 'UTF-8') {
8684 23
                return \mb_stripos($haystack, $needle, $offset);
8685
            }
8686
8687 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8688
8689 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8690
        }
8691
8692 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8693
8694
        if (
8695 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8696
            &&
8697 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8698
            &&
8699 2
            self::$SUPPORT['intl'] === true
8700
        ) {
8701
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8702
            if ($returnTmp !== false) {
8703
                return $returnTmp;
8704
            }
8705
        }
8706
8707
        //
8708
        // fallback for ascii only
8709
        //
8710
8711 2
        if (self::is_ascii($haystack . $needle)) {
8712
            return \stripos($haystack, $needle, $offset);
8713
        }
8714
8715
        //
8716
        // fallback via vanilla php
8717
        //
8718
8719 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8720 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8721
8722 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8723
    }
8724
8725
    /**
8726
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8727
     *
8728
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8729
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8730
     * @param bool   $before_needle [optional] <p>
8731
     *                              If <b>TRUE</b>, it returns the part of the
8732
     *                              haystack before the first occurrence of the needle (excluding the needle).
8733
     *                              </p>
8734
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8735
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8736
     *
8737
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8738
     */
8739
    public static function stristr(
8740
        string $haystack,
8741
        string $needle,
8742
        bool $before_needle = false,
8743
        string $encoding = 'UTF-8',
8744
        bool $cleanUtf8 = false
8745
    ) {
8746 12
        if ($haystack === '' || $needle === '') {
8747 3
            return false;
8748
        }
8749
8750 9
        if ($cleanUtf8 === true) {
8751
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8752
            // if invalid characters are found in $haystack before $needle
8753 1
            $needle = self::clean($needle);
8754 1
            $haystack = self::clean($haystack);
8755
        }
8756
8757 9
        if (!$needle) {
8758
            return $haystack;
8759
        }
8760
8761 9
        if (self::$SUPPORT['mbstring'] === true) {
8762 9
            if ($encoding === 'UTF-8') {
8763 9
                return \mb_stristr($haystack, $needle, $before_needle);
8764
            }
8765
8766 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8767
8768 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8769
        }
8770
8771
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8772
8773
        if (
8774
            $encoding !== 'UTF-8'
8775
            &&
8776
            self::$SUPPORT['mbstring'] === false
8777
        ) {
8778
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8779
        }
8780
8781
        if (
8782
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8783
            &&
8784
            self::$SUPPORT['intl'] === true
8785
        ) {
8786
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8787
            if ($returnTmp !== false) {
8788
                return $returnTmp;
8789
            }
8790
        }
8791
8792
        if (self::is_ascii($needle . $haystack)) {
8793
            return \stristr($haystack, $needle, $before_needle);
8794
        }
8795
8796
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8797
8798
        if (!isset($match[1])) {
8799
            return false;
8800
        }
8801
8802
        if ($before_needle) {
8803
            return $match[1];
8804
        }
8805
8806
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8807
    }
8808
8809
    /**
8810
     * Get the string length, not the byte-length!
8811
     *
8812
     * @see http://php.net/manual/en/function.mb-strlen.php
8813
     *
8814
     * @param string $str       <p>The string being checked for length.</p>
8815
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8816
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8817
     *
8818
     * @return false|int
8819
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8820
     *                   $encoding.
8821
     *                   (One multi-byte character counted as +1).
8822
     *                   <br>
8823
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8824
     *                   chars.
8825
     */
8826
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8827
    {
8828 173
        if ($str === '') {
8829 21
            return 0;
8830
        }
8831
8832 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8833 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8834
        }
8835
8836 171
        if ($cleanUtf8 === true) {
8837
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8838
            // if invalid characters are found in $str
8839 4
            $str = self::clean($str);
8840
        }
8841
8842
        //
8843
        // fallback via mbstring
8844
        //
8845
8846 171
        if (self::$SUPPORT['mbstring'] === true) {
8847 165
            if ($encoding === 'UTF-8') {
8848 165
                return \mb_strlen($str);
8849
            }
8850
8851 4
            return \mb_strlen($str, $encoding);
8852
        }
8853
8854
        //
8855
        // fallback for binary || ascii only
8856
        //
8857
8858
        if (
8859 8
            $encoding === 'CP850'
8860
            ||
8861 8
            $encoding === 'ASCII'
8862
        ) {
8863
            return \strlen($str);
8864
        }
8865
8866
        if (
8867 8
            $encoding !== 'UTF-8'
8868
            &&
8869 8
            self::$SUPPORT['mbstring'] === false
8870
            &&
8871 8
            self::$SUPPORT['iconv'] === false
8872
        ) {
8873 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8874
        }
8875
8876
        //
8877
        // fallback via iconv
8878
        //
8879
8880 8
        if (self::$SUPPORT['iconv'] === true) {
8881
            $returnTmp = \iconv_strlen($str, $encoding);
8882
            if ($returnTmp !== false) {
8883
                return $returnTmp;
8884
            }
8885
        }
8886
8887
        //
8888
        // fallback via intl
8889
        //
8890
8891
        if (
8892 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8893
            &&
8894 8
            self::$SUPPORT['intl'] === true
8895
        ) {
8896
            $returnTmp = \grapheme_strlen($str);
8897
            if ($returnTmp !== null) {
8898
                return $returnTmp;
8899
            }
8900
        }
8901
8902
        //
8903
        // fallback for ascii only
8904
        //
8905
8906 8
        if (self::is_ascii($str)) {
8907 4
            return \strlen($str);
8908
        }
8909
8910
        //
8911
        // fallback via vanilla php
8912
        //
8913
8914 8
        \preg_match_all('/./us', $str, $parts);
8915
8916 8
        $returnTmp = \count($parts[0]);
8917 8
        if ($returnTmp === 0) {
8918
            return false;
8919
        }
8920
8921 8
        return $returnTmp;
8922
    }
8923
8924
    /**
8925
     * Get string length in byte.
8926
     *
8927
     * @param string $str
8928
     *
8929
     * @return int
8930
     */
8931
    public static function strlen_in_byte(string $str): int
8932
    {
8933
        if ($str === '') {
8934
            return 0;
8935
        }
8936
8937
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8938
            // "mb_" is available if overload is used, so use it ...
8939
            return \mb_strlen($str, 'CP850'); // 8-BIT
8940
        }
8941
8942
        return \strlen($str);
8943
    }
8944
8945
    /**
8946
     * Case insensitive string comparisons using a "natural order" algorithm.
8947
     *
8948
     * INFO: natural order version of UTF8::strcasecmp()
8949
     *
8950
     * @param string $str1     <p>The first string.</p>
8951
     * @param string $str2     <p>The second string.</p>
8952
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8953
     *
8954
     * @return int
8955
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8956
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8957
     *             <strong>0</strong> if they are equal
8958
     */
8959
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8960
    {
8961 2
        return self::strnatcmp(
8962 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8963 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8964
        );
8965
    }
8966
8967
    /**
8968
     * String comparisons using a "natural order" algorithm
8969
     *
8970
     * INFO: natural order version of UTF8::strcmp()
8971
     *
8972
     * @see http://php.net/manual/en/function.strnatcmp.php
8973
     *
8974
     * @param string $str1 <p>The first string.</p>
8975
     * @param string $str2 <p>The second string.</p>
8976
     *
8977
     * @return int
8978
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8979
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8980
     *             <strong>0</strong> if they are equal
8981
     */
8982
    public static function strnatcmp(string $str1, string $str2): int
8983
    {
8984 4
        if ($str1 === $str2) {
8985 4
            return 0;
8986
        }
8987
8988 4
        return \strnatcmp(
8989 4
            (string) self::strtonatfold($str1),
8990 4
            (string) self::strtonatfold($str2)
8991
        );
8992
    }
8993
8994
    /**
8995
     * Case-insensitive string comparison of the first n characters.
8996
     *
8997
     * @see http://php.net/manual/en/function.strncasecmp.php
8998
     *
8999
     * @param string $str1     <p>The first string.</p>
9000
     * @param string $str2     <p>The second string.</p>
9001
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9002
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9003
     *
9004
     * @return int
9005
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9006
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9007
     *             <strong>0</strong> if they are equal
9008
     */
9009
    public static function strncasecmp(
9010
        string $str1,
9011
        string $str2,
9012
        int $len,
9013
        string $encoding = 'UTF-8'
9014
    ): int {
9015 2
        return self::strncmp(
9016 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9017 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9018 2
            $len
9019
        );
9020
    }
9021
9022
    /**
9023
     * String comparison of the first n characters.
9024
     *
9025
     * @see http://php.net/manual/en/function.strncmp.php
9026
     *
9027
     * @param string $str1     <p>The first string.</p>
9028
     * @param string $str2     <p>The second string.</p>
9029
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9030
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9031
     *
9032
     * @return int
9033
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9034
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9035
     *             <strong>0</strong> if they are equal
9036
     */
9037
    public static function strncmp(
9038
        string $str1,
9039
        string $str2,
9040
        int $len,
9041
        string $encoding = 'UTF-8'
9042
    ): int {
9043 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9044
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9045
        }
9046
9047 4
        if ($encoding === 'UTF-8') {
9048 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9049 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9050
        } else {
9051
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9052
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9053
        }
9054
9055 4
        return self::strcmp($str1, $str2);
9056
    }
9057
9058
    /**
9059
     * Search a string for any of a set of characters.
9060
     *
9061
     * @see http://php.net/manual/en/function.strpbrk.php
9062
     *
9063
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9064
     * @param string $char_list <p>This parameter is case sensitive.</p>
9065
     *
9066
     * @return false|string string starting from the character found, or false if it is not found
9067
     */
9068
    public static function strpbrk(string $haystack, string $char_list)
9069
    {
9070 2
        if ($haystack === '' || $char_list === '') {
9071 2
            return false;
9072
        }
9073
9074 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9075 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9076
        }
9077
9078 2
        return false;
9079
    }
9080
9081
    /**
9082
     * Find position of first occurrence of string in a string.
9083
     *
9084
     * @see http://php.net/manual/en/function.mb-strpos.php
9085
     *
9086
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9087
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9088
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9089
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9090
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9091
     *
9092
     * @return false|int
9093
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9094
     *                   string.<br> If needle is not found it returns false.
9095
     */
9096
    public static function strpos(
9097
        string $haystack,
9098
        $needle,
9099
        int $offset = 0,
9100
        $encoding = 'UTF-8',
9101
        bool $cleanUtf8 = false
9102
    ) {
9103 53
        if ($haystack === '') {
9104 4
            return false;
9105
        }
9106
9107
        // iconv and mbstring do not support integer $needle
9108 52
        if ((int) $needle === $needle) {
9109
            $needle = (string) self::chr($needle);
9110
        }
9111 52
        $needle = (string) $needle;
9112
9113 52
        if ($needle === '') {
9114 2
            return false;
9115
        }
9116
9117 52
        if ($cleanUtf8 === true) {
9118
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9119
            // if invalid characters are found in $haystack before $needle
9120 3
            $needle = self::clean($needle);
9121 3
            $haystack = self::clean($haystack);
9122
        }
9123
9124 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9125 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9126
        }
9127
9128
        //
9129
        // fallback via mbstring
9130
        //
9131
9132 52
        if (self::$SUPPORT['mbstring'] === true) {
9133 50
            if ($encoding === 'UTF-8') {
9134 50
                return \mb_strpos($haystack, $needle, $offset);
9135
            }
9136
9137 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9138
        }
9139
9140
        //
9141
        // fallback for binary || ascii only
9142
        //
9143
        if (
9144 4
            $encoding === 'CP850'
9145
            ||
9146 4
            $encoding === 'ASCII'
9147
        ) {
9148 2
            return \strpos($haystack, $needle, $offset);
9149
        }
9150
9151
        if (
9152 4
            $encoding !== 'UTF-8'
9153
            &&
9154 4
            self::$SUPPORT['iconv'] === false
9155
            &&
9156 4
            self::$SUPPORT['mbstring'] === false
9157
        ) {
9158 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9159
        }
9160
9161
        //
9162
        // fallback via intl
9163
        //
9164
9165
        if (
9166 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9167
            &&
9168 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9169
            &&
9170 4
            self::$SUPPORT['intl'] === true
9171
        ) {
9172
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9173
            if ($returnTmp !== false) {
9174
                return $returnTmp;
9175
            }
9176
        }
9177
9178
        //
9179
        // fallback via iconv
9180
        //
9181
9182
        if (
9183 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9184
            &&
9185 4
            self::$SUPPORT['iconv'] === true
9186
        ) {
9187
            // ignore invalid negative offset to keep compatibility
9188
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9189
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9190
            if ($returnTmp !== false) {
9191
                return $returnTmp;
9192
            }
9193
        }
9194
9195
        //
9196
        // fallback for ascii only
9197
        //
9198
9199 4
        if (self::is_ascii($haystack . $needle)) {
9200 2
            return \strpos($haystack, $needle, $offset);
9201
        }
9202
9203
        //
9204
        // fallback via vanilla php
9205
        //
9206
9207 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9208 4
        if ($haystackTmp === false) {
9209
            $haystackTmp = '';
9210
        }
9211 4
        $haystack = (string) $haystackTmp;
9212
9213 4
        if ($offset < 0) {
9214
            $offset = 0;
9215
        }
9216
9217 4
        $pos = \strpos($haystack, $needle);
9218 4
        if ($pos === false) {
9219 2
            return false;
9220
        }
9221
9222 4
        if ($pos) {
9223 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9224
        }
9225
9226 2
        return $offset + 0;
9227
    }
9228
9229
    /**
9230
     * Find position of first occurrence of string in a string.
9231
     *
9232
     * @param string $haystack <p>
9233
     *                         The string being checked.
9234
     *                         </p>
9235
     * @param string $needle   <p>
9236
     *                         The position counted from the beginning of haystack.
9237
     *                         </p>
9238
     * @param int    $offset   [optional] <p>
9239
     *                         The search offset. If it is not specified, 0 is used.
9240
     *                         </p>
9241
     *
9242
     * @return false|int The numeric position of the first occurrence of needle in the
9243
     *                   haystack string. If needle is not found, it returns false.
9244
     */
9245
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9246
    {
9247
        if ($haystack === '' || $needle === '') {
9248
            return false;
9249
        }
9250
9251
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9252
            // "mb_" is available if overload is used, so use it ...
9253
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9254
        }
9255
9256
        return \strpos($haystack, $needle, $offset);
9257
    }
9258
9259
    /**
9260
     * Finds the last occurrence of a character in a string within another.
9261
     *
9262
     * @see http://php.net/manual/en/function.mb-strrchr.php
9263
     *
9264
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9265
     * @param string $needle        <p>The string to find in haystack</p>
9266
     * @param bool   $before_needle [optional] <p>
9267
     *                              Determines which portion of haystack
9268
     *                              this function returns.
9269
     *                              If set to true, it returns all of haystack
9270
     *                              from the beginning to the last occurrence of needle.
9271
     *                              If set to false, it returns all of haystack
9272
     *                              from the last occurrence of needle to the end,
9273
     *                              </p>
9274
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9275
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9276
     *
9277
     * @return false|string the portion of haystack or false if needle is not found
9278
     */
9279
    public static function strrchr(
9280
        string $haystack,
9281
        string $needle,
9282
        bool $before_needle = false,
9283
        string $encoding = 'UTF-8',
9284
        bool $cleanUtf8 = false
9285
    ) {
9286 2
        if ($haystack === '' || $needle === '') {
9287 2
            return false;
9288
        }
9289
9290 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9291 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9292
        }
9293
9294 2
        if ($cleanUtf8 === true) {
9295
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9296
            // if invalid characters are found in $haystack before $needle
9297 2
            $needle = self::clean($needle);
9298 2
            $haystack = self::clean($haystack);
9299
        }
9300
9301
        //
9302
        // fallback via mbstring
9303
        //
9304
9305 2
        if (self::$SUPPORT['mbstring'] === true) {
9306 2
            if ($encoding === 'UTF-8') {
9307 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9308
            }
9309
9310 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9311
        }
9312
9313
        //
9314
        // fallback for binary || ascii only
9315
        //
9316
9317
        if (
9318
            $before_needle === false
9319
            &&
9320
            (
9321
                $encoding === 'CP850'
9322
                ||
9323
                $encoding === 'ASCII'
9324
            )
9325
        ) {
9326
            return \strrchr($haystack, $needle);
9327
        }
9328
9329
        if (
9330
            $encoding !== 'UTF-8'
9331
            &&
9332
            self::$SUPPORT['mbstring'] === false
9333
        ) {
9334
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9335
        }
9336
9337
        //
9338
        // fallback via iconv
9339
        //
9340
9341
        if (self::$SUPPORT['iconv'] === true) {
9342
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9343
            if ($needleTmp === false) {
9344
                return false;
9345
            }
9346
            $needle = (string) $needleTmp;
9347
9348
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9349
            if ($pos === false) {
9350
                return false;
9351
            }
9352
9353
            if ($before_needle) {
9354
                return self::substr($haystack, 0, $pos, $encoding);
9355
            }
9356
9357
            return self::substr($haystack, $pos, null, $encoding);
9358
        }
9359
9360
        //
9361
        // fallback via vanilla php
9362
        //
9363
9364
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9365
        if ($needleTmp === false) {
9366
            return false;
9367
        }
9368
        $needle = (string) $needleTmp;
9369
9370
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9371
        if ($pos === false) {
9372
            return false;
9373
        }
9374
9375
        if ($before_needle) {
9376
            return self::substr($haystack, 0, $pos, $encoding);
9377
        }
9378
9379
        return self::substr($haystack, $pos, null, $encoding);
9380
    }
9381
9382
    /**
9383
     * Reverses characters order in the string.
9384
     *
9385
     * @param string $str      <p>The input string.</p>
9386
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9387
     *
9388
     * @return string the string with characters in the reverse sequence
9389
     */
9390
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9391
    {
9392 10
        if ($str === '') {
9393 4
            return '';
9394
        }
9395
9396
        // init
9397 8
        $reversed = '';
9398
9399 8
        $str = self::emoji_encode($str, true);
9400
9401 8
        if ($encoding === 'UTF-8') {
9402 8
            if (self::$SUPPORT['intl'] === true) {
9403
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9404 8
                $i = (int) \grapheme_strlen($str);
9405 8
                while ($i--) {
9406 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9407 8
                    if ($reversedTmp !== false) {
9408 8
                        $reversed .= $reversedTmp;
9409
                    }
9410
                }
9411
            } else {
9412
                $i = (int) \mb_strlen($str);
9413 8
                while ($i--) {
9414
                    $reversedTmp = \mb_substr($str, $i, 1);
9415
                    if ($reversedTmp !== false) {
9416
                        $reversed .= $reversedTmp;
9417
                    }
9418
                }
9419
            }
9420
        } else {
9421
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9422
9423
            $i = (int) self::strlen($str, $encoding);
9424
            while ($i--) {
9425
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9426
                if ($reversedTmp !== false) {
9427
                    $reversed .= $reversedTmp;
9428
                }
9429
            }
9430
        }
9431
9432 8
        return self::emoji_decode($reversed, true);
9433
    }
9434
9435
    /**
9436
     * Finds the last occurrence of a character in a string within another, case insensitive.
9437
     *
9438
     * @see http://php.net/manual/en/function.mb-strrichr.php
9439
     *
9440
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9441
     * @param string $needle        <p>The string to find in haystack.</p>
9442
     * @param bool   $before_needle [optional] <p>
9443
     *                              Determines which portion of haystack
9444
     *                              this function returns.
9445
     *                              If set to true, it returns all of haystack
9446
     *                              from the beginning to the last occurrence of needle.
9447
     *                              If set to false, it returns all of haystack
9448
     *                              from the last occurrence of needle to the end,
9449
     *                              </p>
9450
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9451
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9452
     *
9453
     * @return false|string the portion of haystack or<br>false if needle is not found
9454
     */
9455
    public static function strrichr(
9456
        string $haystack,
9457
        string $needle,
9458
        bool $before_needle = false,
9459
        string $encoding = 'UTF-8',
9460
        bool $cleanUtf8 = false
9461
    ) {
9462 3
        if ($haystack === '' || $needle === '') {
9463 2
            return false;
9464
        }
9465
9466 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9467 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9468
        }
9469
9470 3
        if ($cleanUtf8 === true) {
9471
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9472
            // if invalid characters are found in $haystack before $needle
9473 2
            $needle = self::clean($needle);
9474 2
            $haystack = self::clean($haystack);
9475
        }
9476
9477
        //
9478
        // fallback via mbstring
9479
        //
9480
9481 3
        if (self::$SUPPORT['mbstring'] === true) {
9482 3
            if ($encoding === 'UTF-8') {
9483 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9484
            }
9485
9486 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9487
        }
9488
9489
        //
9490
        // fallback via vanilla php
9491
        //
9492
9493
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9494
        if ($needleTmp === false) {
9495
            return false;
9496
        }
9497
        $needle = (string) $needleTmp;
9498
9499
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9500
        if ($pos === false) {
9501
            return false;
9502
        }
9503
9504
        if ($before_needle) {
9505
            return self::substr($haystack, 0, $pos, $encoding);
9506
        }
9507
9508
        return self::substr($haystack, $pos, null, $encoding);
9509
    }
9510
9511
    /**
9512
     * Find position of last occurrence of a case-insensitive string.
9513
     *
9514
     * @param string     $haystack  <p>The string to look in.</p>
9515
     * @param int|string $needle    <p>The string to look for.</p>
9516
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9517
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9518
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9519
     *
9520
     * @return false|int
9521
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9522
     *                   string.<br>If needle is not found, it returns false.
9523
     */
9524
    public static function strripos(
9525
        string $haystack,
9526
        $needle,
9527
        int $offset = 0,
9528
        string $encoding = 'UTF-8',
9529
        bool $cleanUtf8 = false
9530
    ) {
9531 3
        if ($haystack === '') {
9532
            return false;
9533
        }
9534
9535
        // iconv and mbstring do not support integer $needle
9536 3
        if ((int) $needle === $needle && $needle >= 0) {
9537
            $needle = (string) self::chr($needle);
9538
        }
9539 3
        $needle = (string) $needle;
9540
9541 3
        if ($needle === '') {
9542
            return false;
9543
        }
9544
9545 3
        if ($cleanUtf8 === true) {
9546
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9547 2
            $needle = self::clean($needle);
9548 2
            $haystack = self::clean($haystack);
9549
        }
9550
9551 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9552 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9553
        }
9554
9555
        //
9556
        // fallback via mbstrig
9557
        //
9558
9559 3
        if (self::$SUPPORT['mbstring'] === true) {
9560 3
            if ($encoding === 'UTF-8') {
9561 3
                return \mb_strripos($haystack, $needle, $offset);
9562
            }
9563
9564
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9565
        }
9566
9567
        //
9568
        // fallback for binary || ascii only
9569
        //
9570
9571
        if (
9572
            $encoding === 'CP850'
9573
            ||
9574
            $encoding === 'ASCII'
9575
        ) {
9576
            return \strripos($haystack, $needle, $offset);
9577
        }
9578
9579
        if (
9580
            $encoding !== 'UTF-8'
9581
            &&
9582
            self::$SUPPORT['mbstring'] === false
9583
        ) {
9584
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9585
        }
9586
9587
        //
9588
        // fallback via intl
9589
        //
9590
9591
        if (
9592
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9593
            &&
9594
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9595
            &&
9596
            self::$SUPPORT['intl'] === true
9597
        ) {
9598
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9599
            if ($returnTmp !== false) {
9600
                return $returnTmp;
9601
            }
9602
        }
9603
9604
        //
9605
        // fallback for ascii only
9606
        //
9607
9608
        if (self::is_ascii($haystack . $needle)) {
9609
            return \strripos($haystack, $needle, $offset);
9610
        }
9611
9612
        //
9613
        // fallback via vanilla php
9614
        //
9615
9616
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9617
        $needle = self::strtocasefold($needle, true, false, $encoding);
9618
9619
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9620
    }
9621
9622
    /**
9623
     * Finds position of last occurrence of a string within another, case insensitive.
9624
     *
9625
     * @param string $haystack <p>
9626
     *                         The string from which to get the position of the last occurrence
9627
     *                         of needle.
9628
     *                         </p>
9629
     * @param string $needle   <p>
9630
     *                         The string to find in haystack.
9631
     *                         </p>
9632
     * @param int    $offset   [optional] <p>
9633
     *                         The position in haystack
9634
     *                         to start searching.
9635
     *                         </p>
9636
     *
9637
     * @return false|int return the numeric position of the last occurrence of needle in the
9638
     *                   haystack string, or false if needle is not found
9639
     */
9640
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9641
    {
9642
        if ($haystack === '' || $needle === '') {
9643
            return false;
9644
        }
9645
9646
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9647
            // "mb_" is available if overload is used, so use it ...
9648
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9649
        }
9650
9651
        return \strripos($haystack, $needle, $offset);
9652
    }
9653
9654
    /**
9655
     * Find position of last occurrence of a string in a string.
9656
     *
9657
     * @see http://php.net/manual/en/function.mb-strrpos.php
9658
     *
9659
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9660
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9661
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9662
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9663
     *                              the end of the string.
9664
     *                              </p>
9665
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9666
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9667
     *
9668
     * @return false|int
9669
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9670
     *                   string.<br>If needle is not found, it returns false.
9671
     */
9672
    public static function strrpos(
9673
        string $haystack,
9674
        $needle,
9675
        int $offset = 0,
9676
        string $encoding = 'UTF-8',
9677
        bool $cleanUtf8 = false
9678
    ) {
9679 35
        if ($haystack === '') {
9680 3
            return false;
9681
        }
9682
9683
        // iconv and mbstring do not support integer $needle
9684 34
        if ((int) $needle === $needle && $needle >= 0) {
9685 2
            $needle = (string) self::chr($needle);
9686
        }
9687 34
        $needle = (string) $needle;
9688
9689 34
        if ($needle === '') {
9690 2
            return false;
9691
        }
9692
9693 34
        if ($cleanUtf8 === true) {
9694
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9695 4
            $needle = self::clean($needle);
9696 4
            $haystack = self::clean($haystack);
9697
        }
9698
9699 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9700 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9701
        }
9702
9703
        //
9704
        // fallback via mbstring
9705
        //
9706
9707 34
        if (self::$SUPPORT['mbstring'] === true) {
9708 34
            if ($encoding === 'UTF-8') {
9709 34
                return \mb_strrpos($haystack, $needle, $offset);
9710
            }
9711
9712 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9713
        }
9714
9715
        //
9716
        // fallback for binary || ascii only
9717
        //
9718
9719
        if (
9720
            $encoding === 'CP850'
9721
            ||
9722
            $encoding === 'ASCII'
9723
        ) {
9724
            return \strrpos($haystack, $needle, $offset);
9725
        }
9726
9727
        if (
9728
            $encoding !== 'UTF-8'
9729
            &&
9730
            self::$SUPPORT['mbstring'] === false
9731
        ) {
9732
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9733
        }
9734
9735
        //
9736
        // fallback via intl
9737
        //
9738
9739
        if (
9740
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9741
            &&
9742
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9743
            &&
9744
            self::$SUPPORT['intl'] === true
9745
        ) {
9746
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9747
            if ($returnTmp !== false) {
9748
                return $returnTmp;
9749
            }
9750
        }
9751
9752
        //
9753
        // fallback for ascii only
9754
        //
9755
9756
        if (self::is_ascii($haystack . $needle)) {
9757
            return \strrpos($haystack, $needle, $offset);
9758
        }
9759
9760
        //
9761
        // fallback via vanilla php
9762
        //
9763
9764
        $haystackTmp = null;
9765
        if ($offset > 0) {
9766
            $haystackTmp = self::substr($haystack, $offset);
9767
        } elseif ($offset < 0) {
9768
            $haystackTmp = self::substr($haystack, 0, $offset);
9769
            $offset = 0;
9770
        }
9771
9772
        if ($haystackTmp !== null) {
9773
            if ($haystackTmp === false) {
9774
                $haystackTmp = '';
9775
            }
9776
            $haystack = (string) $haystackTmp;
9777
        }
9778
9779
        $pos = \strrpos($haystack, $needle);
9780
        if ($pos === false) {
9781
            return false;
9782
        }
9783
9784
        $strTmp = \substr($haystack, 0, $pos);
9785
        if ($strTmp === false) {
9786
            return false;
9787
        }
9788
9789
        return $offset + (int) self::strlen($strTmp);
9790
    }
9791
9792
    /**
9793
     * Find position of last occurrence of a string in a string.
9794
     *
9795
     * @param string $haystack <p>
9796
     *                         The string being checked, for the last occurrence
9797
     *                         of needle.
9798
     *                         </p>
9799
     * @param string $needle   <p>
9800
     *                         The string to find in haystack.
9801
     *                         </p>
9802
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9803
     *                         the string. Negative values will stop searching at an arbitrary point
9804
     *                         prior to the end of the string.
9805
     *
9806
     * @return false|int The numeric position of the last occurrence of needle in the
9807
     *                   haystack string. If needle is not found, it returns false.
9808
     */
9809
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9810
    {
9811
        if ($haystack === '' || $needle === '') {
9812
            return false;
9813
        }
9814
9815
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9816
            // "mb_" is available if overload is used, so use it ...
9817
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9818
        }
9819
9820
        return \strrpos($haystack, $needle, $offset);
9821
    }
9822
9823
    /**
9824
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9825
     * mask.
9826
     *
9827
     * @param string $str      <p>The input string.</p>
9828
     * @param string $mask     <p>The mask of chars</p>
9829
     * @param int    $offset   [optional]
9830
     * @param int    $length   [optional]
9831
     * @param string $encoding [optional] <p>Set the charset.</p>
9832
     *
9833
     * @return false|int
9834
     */
9835
    public static function strspn(
9836
        string $str,
9837
        string $mask,
9838
        int $offset = 0,
9839
        int $length = null,
9840
        string $encoding = 'UTF-8'
9841
    ) {
9842 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9843
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9844
        }
9845
9846 10
        if ($offset || $length !== null) {
9847 2
            if ($encoding === 'UTF-8') {
9848 2
                if ($length === null) {
9849
                    $str = (string) \mb_substr($str, $offset);
9850
                } else {
9851 2
                    $str = (string) \mb_substr($str, $offset, $length);
9852
                }
9853
            } else {
9854
                $str = (string) self::substr($str, $offset, $length, $encoding);
9855
            }
9856
        }
9857
9858 10
        if ($str === '' || $mask === '') {
9859 2
            return 0;
9860
        }
9861
9862 8
        $matches = [];
9863
9864 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9865
    }
9866
9867
    /**
9868
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9869
     *
9870
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9871
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9872
     * @param bool   $before_needle [optional] <p>
9873
     *                              If <b>TRUE</b>, strstr() returns the part of the
9874
     *                              haystack before the first occurrence of the needle (excluding the needle).
9875
     *                              </p>
9876
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9877
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9878
     *
9879
     * @return false|string
9880
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9881
     */
9882
    public static function strstr(
9883
        string $haystack,
9884
        string $needle,
9885
        bool $before_needle = false,
9886
        string $encoding = 'UTF-8',
9887
        $cleanUtf8 = false
9888
    ) {
9889 3
        if ($haystack === '' || $needle === '') {
9890 2
            return false;
9891
        }
9892
9893 3
        if ($cleanUtf8 === true) {
9894
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9895
            // if invalid characters are found in $haystack before $needle
9896
            $needle = self::clean($needle);
9897
            $haystack = self::clean($haystack);
9898
        }
9899
9900 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9901 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9902
        }
9903
9904
        //
9905
        // fallback via mbstring
9906
        //
9907
9908 3
        if (self::$SUPPORT['mbstring'] === true) {
9909 3
            if ($encoding === 'UTF-8') {
9910 3
                return \mb_strstr($haystack, $needle, $before_needle);
9911
            }
9912
9913 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9914
        }
9915
9916
        //
9917
        // fallback for binary || ascii only
9918
        //
9919
9920
        if (
9921
            $encoding === 'CP850'
9922
            ||
9923
            $encoding === 'ASCII'
9924
        ) {
9925
            return \strstr($haystack, $needle, $before_needle);
9926
        }
9927
9928
        if (
9929
            $encoding !== 'UTF-8'
9930
            &&
9931
            self::$SUPPORT['mbstring'] === false
9932
        ) {
9933
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9934
        }
9935
9936
        //
9937
        // fallback via intl
9938
        //
9939
9940
        if (
9941
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9942
            &&
9943
            self::$SUPPORT['intl'] === true
9944
        ) {
9945
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9946
            if ($returnTmp !== false) {
9947
                return $returnTmp;
9948
            }
9949
        }
9950
9951
        //
9952
        // fallback for ascii only
9953
        //
9954
9955
        if (self::is_ascii($haystack . $needle)) {
9956
            return \strstr($haystack, $needle, $before_needle);
9957
        }
9958
9959
        //
9960
        // fallback via vanilla php
9961
        //
9962
9963
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9964
9965
        if (!isset($match[1])) {
9966
            return false;
9967
        }
9968
9969
        if ($before_needle) {
9970
            return $match[1];
9971
        }
9972
9973
        return self::substr($haystack, (int) self::strlen($match[1]));
9974
    }
9975
9976
    /**
9977
     *  * Finds first occurrence of a string within another.
9978
     *
9979
     * @param string $haystack      <p>
9980
     *                              The string from which to get the first occurrence
9981
     *                              of needle.
9982
     *                              </p>
9983
     * @param string $needle        <p>
9984
     *                              The string to find in haystack.
9985
     *                              </p>
9986
     * @param bool   $before_needle [optional] <p>
9987
     *                              Determines which portion of haystack
9988
     *                              this function returns.
9989
     *                              If set to true, it returns all of haystack
9990
     *                              from the beginning to the first occurrence of needle.
9991
     *                              If set to false, it returns all of haystack
9992
     *                              from the first occurrence of needle to the end,
9993
     *                              </p>
9994
     *
9995
     * @return false|string the portion of haystack,
9996
     *                      or false if needle is not found
9997
     */
9998
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9999
    {
10000
        if ($haystack === '' || $needle === '') {
10001
            return false;
10002
        }
10003
10004
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10005
            // "mb_" is available if overload is used, so use it ...
10006
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10007
        }
10008
10009
        return \strstr($haystack, $needle, $before_needle);
10010
    }
10011
10012
    /**
10013
     * Unicode transformation for case-less matching.
10014
     *
10015
     * @see http://unicode.org/reports/tr21/tr21-5.html
10016
     *
10017
     * @param string      $str       <p>The input string.</p>
10018
     * @param bool        $full      [optional] <p>
10019
     *                               <b>true</b>, replace full case folding chars (default)<br>
10020
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10021
     *                               </p>
10022
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10023
     * @param string      $encoding  [optional] <p>Set the charset.</p>
10024
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10025
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10026
     *                               is for some languages better ...</p>
10027
     *
10028
     * @return string
10029
     */
10030
    public static function strtocasefold(
10031
        string $str,
10032
        bool $full = true,
10033
        bool $cleanUtf8 = false,
10034
        string $encoding = 'UTF-8',
10035
        string $lang = null,
10036
        $lower = true
10037
    ): string {
10038 32
        if ($str === '') {
10039 5
            return '';
10040
        }
10041
10042 31
        if ($cleanUtf8 === true) {
10043
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10044
            // if invalid characters are found in $haystack before $needle
10045 2
            $str = self::clean($str);
10046
        }
10047
10048 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10049
10050 31
        if ($lang === null && $encoding === 'UTF-8') {
10051 31
            if ($lower === true) {
10052 2
                return \mb_strtolower($str);
10053
            }
10054
10055 29
            return \mb_strtoupper($str);
10056
        }
10057
10058 2
        if ($lower === true) {
10059
            return self::strtolower($str, $encoding, false, $lang);
10060
        }
10061
10062 2
        return self::strtoupper($str, $encoding, false, $lang);
10063
    }
10064
10065
    /**
10066
     * Make a string lowercase.
10067
     *
10068
     * @see http://php.net/manual/en/function.mb-strtolower.php
10069
     *
10070
     * @param string      $str                   <p>The string being lowercased.</p>
10071
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10072
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10073
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10074
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10075
     *
10076
     * @return string
10077
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10078
     */
10079
    public static function strtolower(
10080
        $str,
10081
        string $encoding = 'UTF-8',
10082
        bool $cleanUtf8 = false,
10083
        string $lang = null,
10084
        bool $tryToKeepStringLength = false
10085
    ): string {
10086
        // init
10087 73
        $str = (string) $str;
10088
10089 73
        if ($str === '') {
10090 1
            return '';
10091
        }
10092
10093 72
        if ($cleanUtf8 === true) {
10094
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10095
            // if invalid characters are found in $haystack before $needle
10096 2
            $str = self::clean($str);
10097
        }
10098
10099
        // hack for old php version or for the polyfill ...
10100 72
        if ($tryToKeepStringLength === true) {
10101
            $str = self::fixStrCaseHelper($str, true);
10102
        }
10103
10104 72
        if ($lang === null && $encoding === 'UTF-8') {
10105 13
            return \mb_strtolower($str);
10106
        }
10107
10108 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10109
10110 61
        if ($lang !== null) {
10111 2
            if (self::$SUPPORT['intl'] === true) {
10112 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10113
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10114
                }
10115
10116 2
                $langCode = $lang . '-Lower';
10117 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10118
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10119
10120
                    $langCode = 'Any-Lower';
10121
                }
10122
10123
                /** @noinspection PhpComposerExtensionStubsInspection */
10124
                /** @noinspection UnnecessaryCastingInspection */
10125 2
                return (string) \transliterator_transliterate($langCode, $str);
10126
            }
10127
10128
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10129
        }
10130
10131
        // always fallback via symfony polyfill
10132 61
        return \mb_strtolower($str, $encoding);
10133
    }
10134
10135
    /**
10136
     * Make a string uppercase.
10137
     *
10138
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10139
     *
10140
     * @param string      $str                   <p>The string being uppercased.</p>
10141
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10142
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10143
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10144
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10145
     *
10146
     * @return string
10147
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10148
     */
10149
    public static function strtoupper(
10150
        $str,
10151
        string $encoding = 'UTF-8',
10152
        bool $cleanUtf8 = false,
10153
        string $lang = null,
10154
        bool $tryToKeepStringLength = false
10155
    ): string {
10156
        // init
10157 17
        $str = (string) $str;
10158
10159 17
        if ($str === '') {
10160 1
            return '';
10161
        }
10162
10163 16
        if ($cleanUtf8 === true) {
10164
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10165
            // if invalid characters are found in $haystack before $needle
10166 2
            $str = self::clean($str);
10167
        }
10168
10169
        // hack for old php version or for the polyfill ...
10170 16
        if ($tryToKeepStringLength === true) {
10171 2
            $str = self::fixStrCaseHelper($str, false);
10172
        }
10173
10174 16
        if ($lang === null && $encoding === 'UTF-8') {
10175 8
            return \mb_strtoupper($str);
10176
        }
10177
10178 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10179
10180 10
        if ($lang !== null) {
10181 2
            if (self::$SUPPORT['intl'] === true) {
10182 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10183
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10184
                }
10185
10186 2
                $langCode = $lang . '-Upper';
10187 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10188
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10189
10190
                    $langCode = 'Any-Upper';
10191
                }
10192
10193
                /** @noinspection PhpComposerExtensionStubsInspection */
10194
                /** @noinspection UnnecessaryCastingInspection */
10195 2
                return (string) \transliterator_transliterate($langCode, $str);
10196
            }
10197
10198
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10199
        }
10200
10201
        // always fallback via symfony polyfill
10202 10
        return \mb_strtoupper($str, $encoding);
10203
    }
10204
10205
    /**
10206
     * Translate characters or replace sub-strings.
10207
     *
10208
     * @see http://php.net/manual/en/function.strtr.php
10209
     *
10210
     * @param string          $str  <p>The string being translated.</p>
10211
     * @param string|string[] $from <p>The string replacing from.</p>
10212
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10213
     *
10214
     * @return string
10215
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10216
     *                corresponding character in to
10217
     */
10218
    public static function strtr(string $str, $from, $to = ''): string
10219
    {
10220 2
        if ($str === '') {
10221
            return '';
10222
        }
10223
10224 2
        if ($from === $to) {
10225
            return $str;
10226
        }
10227
10228 2
        if ($to !== '') {
10229 2
            $from = self::str_split($from);
10230 2
            $to = self::str_split($to);
10231 2
            $countFrom = \count($from);
10232 2
            $countTo = \count($to);
10233
10234 2
            if ($countFrom > $countTo) {
10235 2
                $from = \array_slice($from, 0, $countTo);
10236 2
            } elseif ($countFrom < $countTo) {
10237 2
                $to = \array_slice($to, 0, $countFrom);
10238
            }
10239
10240 2
            $from = \array_combine($from, $to);
10241 2
            if ($from === false) {
10242
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10243
            }
10244
        }
10245
10246 2
        if (\is_string($from)) {
10247 2
            return \str_replace($from, '', $str);
10248
        }
10249
10250 2
        return \strtr($str, $from);
10251
    }
10252
10253
    /**
10254
     * Return the width of a string.
10255
     *
10256
     * @param string $str       <p>The input string.</p>
10257
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10258
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10259
     *
10260
     * @return int
10261
     */
10262
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10263
    {
10264 2
        if ($str === '') {
10265 2
            return 0;
10266
        }
10267
10268 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10269 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10270
        }
10271
10272 2
        if ($cleanUtf8 === true) {
10273
            // iconv and mbstring are not tolerant to invalid encoding
10274
            // further, their behaviour is inconsistent with that of PHP's substr
10275 2
            $str = self::clean($str);
10276
        }
10277
10278
        //
10279
        // fallback via mbstring
10280
        //
10281
10282 2
        if (self::$SUPPORT['mbstring'] === true) {
10283 2
            if ($encoding === 'UTF-8') {
10284 2
                return \mb_strwidth($str);
10285
            }
10286
10287
            return \mb_strwidth($str, $encoding);
10288
        }
10289
10290
        //
10291
        // fallback via vanilla php
10292
        //
10293
10294
        if ($encoding !== 'UTF-8') {
10295
            $str = self::encode('UTF-8', $str, false, $encoding);
10296
        }
10297
10298
        $wide = 0;
10299
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10300
10301
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10302
    }
10303
10304
    /**
10305
     * Get part of a string.
10306
     *
10307
     * @see http://php.net/manual/en/function.mb-substr.php
10308
     *
10309
     * @param string $str       <p>The string being checked.</p>
10310
     * @param int    $offset    <p>The first position used in str.</p>
10311
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10312
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10313
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10314
     *
10315
     * @return false|string
10316
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10317
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10318
     *                      characters long, <b>FALSE</b> will be returned.
10319
     */
10320
    public static function substr(
10321
        string $str,
10322
        int $offset = 0,
10323
        int $length = null,
10324
        string $encoding = 'UTF-8',
10325
        bool $cleanUtf8 = false
10326
    ) {
10327
        // empty string
10328 172
        if ($str === '' || $length === 0) {
10329 8
            return '';
10330
        }
10331
10332 168
        if ($cleanUtf8 === true) {
10333
            // iconv and mbstring are not tolerant to invalid encoding
10334
            // further, their behaviour is inconsistent with that of PHP's substr
10335 2
            $str = self::clean($str);
10336
        }
10337
10338
        // whole string
10339 168
        if (!$offset && $length === null) {
10340 7
            return $str;
10341
        }
10342
10343 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10344 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10345
        }
10346
10347
        //
10348
        // fallback via mbstring
10349
        //
10350
10351 163
        if (self::$SUPPORT['mbstring'] === true) {
10352 161
            if ($encoding === 'UTF-8') {
10353 161
                if ($length === null) {
10354 64
                    return \mb_substr($str, $offset);
10355
                }
10356
10357 102
                return \mb_substr($str, $offset, $length);
10358
            }
10359
10360
            return self::substr($str, $offset, $length, $encoding);
10361
        }
10362
10363
        //
10364
        // fallback for binary || ascii only
10365
        //
10366
10367
        if (
10368 4
            $encoding === 'CP850'
10369
            ||
10370 4
            $encoding === 'ASCII'
10371
        ) {
10372
            if ($length === null) {
10373
                return \substr($str, $offset);
10374
            }
10375
10376
            return \substr($str, $offset, $length);
10377
        }
10378
10379
        // otherwise we need the string-length
10380 4
        $str_length = 0;
10381 4
        if ($offset || $length === null) {
10382 4
            $str_length = self::strlen($str, $encoding);
10383
        }
10384
10385
        // e.g.: invalid chars + mbstring not installed
10386 4
        if ($str_length === false) {
10387
            return false;
10388
        }
10389
10390
        // empty string
10391 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10392
            return '';
10393
        }
10394
10395
        // impossible
10396 4
        if ($offset && $offset > $str_length) {
10397
            return '';
10398
        }
10399
10400 4
        if ($length === null) {
10401 4
            $length = (int) $str_length;
10402
        } else {
10403 2
            $length = (int) $length;
10404
        }
10405
10406
        if (
10407 4
            $encoding !== 'UTF-8'
10408
            &&
10409 4
            self::$SUPPORT['mbstring'] === false
10410
        ) {
10411 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10412
        }
10413
10414
        //
10415
        // fallback via intl
10416
        //
10417
10418
        if (
10419 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10420
            &&
10421 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10422
            &&
10423 4
            self::$SUPPORT['intl'] === true
10424
        ) {
10425
            $returnTmp = \grapheme_substr($str, $offset, $length);
10426
            if ($returnTmp !== false) {
10427
                return $returnTmp;
10428
            }
10429
        }
10430
10431
        //
10432
        // fallback via iconv
10433
        //
10434
10435
        if (
10436 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10437
            &&
10438 4
            self::$SUPPORT['iconv'] === true
10439
        ) {
10440
            $returnTmp = \iconv_substr($str, $offset, $length);
10441
            if ($returnTmp !== false) {
10442
                return $returnTmp;
10443
            }
10444
        }
10445
10446
        //
10447
        // fallback for ascii only
10448
        //
10449
10450 4
        if (self::is_ascii($str)) {
10451
            return \substr($str, $offset, $length);
10452
        }
10453
10454
        //
10455
        // fallback via vanilla php
10456
        //
10457
10458
        // split to array, and remove invalid characters
10459 4
        $array = self::str_split($str);
10460
10461
        // extract relevant part, and join to make sting again
10462 4
        return \implode('', \array_slice($array, $offset, $length));
10463
    }
10464
10465
    /**
10466
     * Binary safe comparison of two strings from an offset, up to length characters.
10467
     *
10468
     * @param string   $str1               <p>The main string being compared.</p>
10469
     * @param string   $str2               <p>The secondary string being compared.</p>
10470
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10471
     *                                     counting from the end of the string.</p>
10472
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10473
     *                                     of the length of the str compared to the length of main_str less the
10474
     *                                     offset.</p>
10475
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10476
     *                                     insensitive.</p>
10477
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10478
     *
10479
     * @return int
10480
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10481
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10482
     *             <strong>0</strong> if they are equal
10483
     */
10484
    public static function substr_compare(
10485
        string $str1,
10486
        string $str2,
10487
        int $offset = 0,
10488
        int $length = null,
10489
        bool $case_insensitivity = false,
10490
        string $encoding = 'UTF-8'
10491
    ): int {
10492
        if (
10493 2
            $offset !== 0
10494
            ||
10495 2
            $length !== null
10496
        ) {
10497 2
            if ($encoding === 'UTF-8') {
10498 2
                if ($length === null) {
10499 2
                    $str1 = (string) \mb_substr($str1, $offset);
10500
                } else {
10501 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10502
                }
10503 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10504
            } else {
10505
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10506
10507
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10508
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10509
            }
10510
        }
10511
10512 2
        if ($case_insensitivity === true) {
10513 2
            return self::strcasecmp($str1, $str2, $encoding);
10514
        }
10515
10516 2
        return self::strcmp($str1, $str2);
10517
    }
10518
10519
    /**
10520
     * Count the number of substring occurrences.
10521
     *
10522
     * @see http://php.net/manual/en/function.substr-count.php
10523
     *
10524
     * @param string $haystack  <p>The string to search in.</p>
10525
     * @param string $needle    <p>The substring to search for.</p>
10526
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10527
     * @param int    $length    [optional] <p>
10528
     *                          The maximum length after the specified offset to search for the
10529
     *                          substring. It outputs a warning if the offset plus the length is
10530
     *                          greater than the haystack length.
10531
     *                          </p>
10532
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10533
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10534
     *
10535
     * @return false|int this functions returns an integer or false if there isn't a string
10536
     */
10537
    public static function substr_count(
10538
        string $haystack,
10539
        string $needle,
10540
        int $offset = 0,
10541
        int $length = null,
10542
        string $encoding = 'UTF-8',
10543
        bool $cleanUtf8 = false
10544
    ) {
10545 5
        if ($haystack === '' || $needle === '') {
10546 2
            return false;
10547
        }
10548
10549 5
        if ($length === 0) {
10550 2
            return 0;
10551
        }
10552
10553 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10554 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10555
        }
10556
10557 5
        if ($cleanUtf8 === true) {
10558
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10559
            // if invalid characters are found in $haystack before $needle
10560
            $needle = self::clean($needle);
10561
            $haystack = self::clean($haystack);
10562
        }
10563
10564 5
        if ($offset || $length > 0) {
10565 2
            if ($length === null) {
10566 2
                $lengthTmp = self::strlen($haystack, $encoding);
10567 2
                if ($lengthTmp === false) {
10568
                    return false;
10569
                }
10570 2
                $length = (int) $lengthTmp;
10571
            }
10572
10573 2
            if ($encoding === 'UTF-8') {
10574 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10575
            } else {
10576 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10577
            }
10578
        }
10579
10580
        if (
10581 5
            $encoding !== 'UTF-8'
10582
            &&
10583 5
            self::$SUPPORT['mbstring'] === false
10584
        ) {
10585
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10586
        }
10587
10588 5
        if (self::$SUPPORT['mbstring'] === true) {
10589 5
            if ($encoding === 'UTF-8') {
10590 5
                return \mb_substr_count($haystack, $needle);
10591
            }
10592
10593 2
            return \mb_substr_count($haystack, $needle, $encoding);
10594
        }
10595
10596
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10597
10598
        return \count($matches);
10599
    }
10600
10601
    /**
10602
     * Count the number of substring occurrences.
10603
     *
10604
     * @param string $haystack <p>
10605
     *                         The string being checked.
10606
     *                         </p>
10607
     * @param string $needle   <p>
10608
     *                         The string being found.
10609
     *                         </p>
10610
     * @param int    $offset   [optional] <p>
10611
     *                         The offset where to start counting
10612
     *                         </p>
10613
     * @param int    $length   [optional] <p>
10614
     *                         The maximum length after the specified offset to search for the
10615
     *                         substring. It outputs a warning if the offset plus the length is
10616
     *                         greater than the haystack length.
10617
     *                         </p>
10618
     *
10619
     * @return false|int the number of times the
10620
     *                   needle substring occurs in the
10621
     *                   haystack string
10622
     */
10623
    public static function substr_count_in_byte(
10624
        string $haystack,
10625
        string $needle,
10626
        int $offset = 0,
10627
        int $length = null
10628
    ) {
10629
        if ($haystack === '' || $needle === '') {
10630
            return 0;
10631
        }
10632
10633
        if (
10634
            ($offset || $length !== null)
10635
            &&
10636
            self::$SUPPORT['mbstring_func_overload'] === true
10637
        ) {
10638
            if ($length === null) {
10639
                $lengthTmp = self::strlen($haystack);
10640
                if ($lengthTmp === false) {
10641
                    return false;
10642
                }
10643
                $length = (int) $lengthTmp;
10644
            }
10645
10646
            if (
10647
                (
10648
                    $length !== 0
10649
                    &&
10650
                    $offset !== 0
10651
                )
10652
                &&
10653
                ($length + $offset) <= 0
10654
                &&
10655
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10656
            ) {
10657
                return false;
10658
            }
10659
10660
            $haystackTmp = \substr($haystack, $offset, $length);
10661
            if ($haystackTmp === false) {
10662
                $haystackTmp = '';
10663
            }
10664
            $haystack = (string) $haystackTmp;
10665
        }
10666
10667
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10668
            // "mb_" is available if overload is used, so use it ...
10669
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10670
        }
10671
10672
        if ($length === null) {
10673
            return \substr_count($haystack, $needle, $offset);
10674
        }
10675
10676
        return \substr_count($haystack, $needle, $offset, $length);
10677
    }
10678
10679
    /**
10680
     * Returns the number of occurrences of $substring in the given string.
10681
     * By default, the comparison is case-sensitive, but can be made insensitive
10682
     * by setting $caseSensitive to false.
10683
     *
10684
     * @param string $str           <p>The input string.</p>
10685
     * @param string $substring     <p>The substring to search for.</p>
10686
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10687
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10688
     *
10689
     * @return int
10690
     */
10691
    public static function substr_count_simple(
10692
        string $str,
10693
        string $substring,
10694
        bool $caseSensitive = true,
10695
        string $encoding = 'UTF-8'
10696
    ): int {
10697 15
        if ($str === '' || $substring === '') {
10698 2
            return 0;
10699
        }
10700
10701 13
        if ($encoding === 'UTF-8') {
10702 7
            if ($caseSensitive) {
10703
                return (int) \mb_substr_count($str, $substring);
10704
            }
10705
10706 7
            return (int) \mb_substr_count(
10707 7
                \mb_strtoupper($str),
10708 7
                \mb_strtoupper($substring)
10709
10710
            );
10711
        }
10712
10713 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10714
10715 6
        if ($caseSensitive) {
10716 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10717
        }
10718
10719 3
        return (int) \mb_substr_count(
10720 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10721 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10722 3
            $encoding
10723
        );
10724
    }
10725
10726
    /**
10727
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10728
     *
10729
     * @param string $haystack <p>The string to search in.</p>
10730
     * @param string $needle   <p>The substring to search for.</p>
10731
     *
10732
     * @return string return the sub-string
10733
     */
10734
    public static function substr_ileft(string $haystack, string $needle): string
10735
    {
10736 2
        if ($haystack === '') {
10737 2
            return '';
10738
        }
10739
10740 2
        if ($needle === '') {
10741 2
            return $haystack;
10742
        }
10743
10744 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10745 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10746
        }
10747
10748 2
        return $haystack;
10749
    }
10750
10751
    /**
10752
     * Get part of a string process in bytes.
10753
     *
10754
     * @param string $str    <p>The string being checked.</p>
10755
     * @param int    $offset <p>The first position used in str.</p>
10756
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10757
     *
10758
     * @return false|string
10759
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10760
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10761
     *                      characters long, <b>FALSE</b> will be returned.
10762
     */
10763
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10764
    {
10765
        // empty string
10766
        if ($str === '' || $length === 0) {
10767
            return '';
10768
        }
10769
10770
        // whole string
10771
        if (!$offset && $length === null) {
10772
            return $str;
10773
        }
10774
10775
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10776
            // "mb_" is available if overload is used, so use it ...
10777
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10778
        }
10779
10780
        return \substr($str, $offset, $length ?? 2147483647);
10781
    }
10782
10783
    /**
10784
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10785
     *
10786
     * @param string $haystack <p>The string to search in.</p>
10787
     * @param string $needle   <p>The substring to search for.</p>
10788
     *
10789
     * @return string return the sub-string
10790
     */
10791
    public static function substr_iright(string $haystack, string $needle): string
10792
    {
10793 2
        if ($haystack === '') {
10794 2
            return '';
10795
        }
10796
10797 2
        if ($needle === '') {
10798 2
            return $haystack;
10799
        }
10800
10801 2
        if (self::str_iends_with($haystack, $needle) === true) {
10802 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10803
        }
10804
10805 2
        return $haystack;
10806
    }
10807
10808
    /**
10809
     * Removes an prefix ($needle) from start of the string ($haystack).
10810
     *
10811
     * @param string $haystack <p>The string to search in.</p>
10812
     * @param string $needle   <p>The substring to search for.</p>
10813
     *
10814
     * @return string return the sub-string
10815
     */
10816
    public static function substr_left(string $haystack, string $needle): string
10817
    {
10818 2
        if ($haystack === '') {
10819 2
            return '';
10820
        }
10821
10822 2
        if ($needle === '') {
10823 2
            return $haystack;
10824
        }
10825
10826 2
        if (self::str_starts_with($haystack, $needle) === true) {
10827 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10828
        }
10829
10830 2
        return $haystack;
10831
    }
10832
10833
    /**
10834
     * Replace text within a portion of a string.
10835
     *
10836
     * source: https://gist.github.com/stemar/8287074
10837
     *
10838
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10839
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10840
     * @param int|int[]       $offset      <p>
10841
     *                                     If start is positive, the replacing will begin at the start'th offset
10842
     *                                     into string.
10843
     *                                     <br><br>
10844
     *                                     If start is negative, the replacing will begin at the start'th character
10845
     *                                     from the end of string.
10846
     *                                     </p>
10847
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10848
     *                                     portion of string which is to be replaced. If it is negative, it
10849
     *                                     represents the number of characters from the end of string at which to
10850
     *                                     stop replacing. If it is not given, then it will default to strlen(
10851
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10852
     *                                     length is zero then this function will have the effect of inserting
10853
     *                                     replacement into string at the given start offset.</p>
10854
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10855
     *
10856
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10857
     */
10858
    public static function substr_replace(
10859
        $str,
10860
        $replacement,
10861
        $offset,
10862
        $length = null,
10863
        string $encoding = 'UTF-8'
10864
    ) {
10865 10
        if (\is_array($str) === true) {
10866 1
            $num = \count($str);
10867
10868
            // the replacement
10869 1
            if (\is_array($replacement) === true) {
10870 1
                $replacement = \array_slice($replacement, 0, $num);
10871
            } else {
10872 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10873
            }
10874
10875
            // the offset
10876 1
            if (\is_array($offset) === true) {
10877 1
                $offset = \array_slice($offset, 0, $num);
10878 1
                foreach ($offset as &$valueTmp) {
10879 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10880
                }
10881 1
                unset($valueTmp);
10882
            } else {
10883 1
                $offset = \array_pad([$offset], $num, $offset);
10884
            }
10885
10886
            // the length
10887 1
            if ($length === null) {
10888 1
                $length = \array_fill(0, $num, 0);
10889 1
            } elseif (\is_array($length) === true) {
10890 1
                $length = \array_slice($length, 0, $num);
10891 1
                foreach ($length as &$valueTmpV2) {
10892 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10893
                }
10894 1
                unset($valueTmpV2);
10895
            } else {
10896 1
                $length = \array_pad([$length], $num, $length);
10897
            }
10898
10899
            // recursive call
10900 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10901
        }
10902
10903 10
        if (\is_array($replacement) === true) {
10904 1
            if (\count($replacement) > 0) {
10905 1
                $replacement = $replacement[0];
10906
            } else {
10907 1
                $replacement = '';
10908
            }
10909
        }
10910
10911
        // init
10912 10
        $str = (string) $str;
10913 10
        $replacement = (string) $replacement;
10914
10915 10
        if (\is_array($length) === true) {
10916
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10917
        }
10918
10919 10
        if (\is_array($offset) === true) {
10920
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10921
        }
10922
10923 10
        if ($str === '') {
10924 1
            return $replacement;
10925
        }
10926
10927 9
        if (self::$SUPPORT['mbstring'] === true) {
10928 9
            $string_length = (int) self::strlen($str, $encoding);
10929
10930 9
            if ($offset < 0) {
10931 1
                $offset = (int) \max(0, $string_length + $offset);
10932 9
            } elseif ($offset > $string_length) {
10933 1
                $offset = $string_length;
10934
            }
10935
10936 9
            if ($length !== null && $length < 0) {
10937 1
                $length = (int) \max(0, $string_length - $offset + $length);
10938 9
            } elseif ($length === null || $length > $string_length) {
10939 4
                $length = $string_length;
10940
            }
10941
10942
            /** @noinspection AdditionOperationOnArraysInspection */
10943 9
            if (($offset + $length) > $string_length) {
10944 4
                $length = $string_length - $offset;
10945
            }
10946
10947
            /** @noinspection AdditionOperationOnArraysInspection */
10948 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10949 9
                   $replacement .
10950 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10951
        }
10952
10953
        //
10954
        // fallback for ascii only
10955
        //
10956
10957
        if (self::is_ascii($str)) {
10958
            return ($length === null) ?
10959
                \substr_replace($str, $replacement, $offset) :
10960
                \substr_replace($str, $replacement, $offset, $length);
10961
        }
10962
10963
        //
10964
        // fallback via vanilla php
10965
        //
10966
10967
        \preg_match_all('/./us', $str, $smatches);
10968
        \preg_match_all('/./us', $replacement, $rmatches);
10969
10970
        if ($length === null) {
10971
            $lengthTmp = self::strlen($str, $encoding);
10972
            if ($lengthTmp === false) {
10973
                // e.g.: non mbstring support + invalid chars
10974
                return '';
10975
            }
10976
            $length = (int) $lengthTmp;
10977
        }
10978
10979
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10980
10981
        return \implode('', $smatches[0]);
10982
    }
10983
10984
    /**
10985
     * Removes an suffix ($needle) from end of the string ($haystack).
10986
     *
10987
     * @param string $haystack <p>The string to search in.</p>
10988
     * @param string $needle   <p>The substring to search for.</p>
10989
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10990
     *
10991
     * @return string return the sub-string
10992
     */
10993
    public static function substr_right(
10994
        string $haystack,
10995
        string $needle,
10996
        string $encoding = 'UTF-8'
10997
    ): string {
10998 2
        if ($haystack === '') {
10999 2
            return '';
11000
        }
11001
11002 2
        if ($needle === '') {
11003 2
            return $haystack;
11004
        }
11005
11006
        if (
11007 2
            $encoding === 'UTF-8'
11008
            &&
11009 2
            \substr($haystack, -\strlen($needle)) === $needle
11010
        ) {
11011 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
11012
        }
11013
11014 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11015
            return (string) self::substr(
11016
                $haystack,
11017
                0,
11018
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11019
                $encoding
11020
            );
11021
        }
11022
11023 2
        return $haystack;
11024
    }
11025
11026
    /**
11027
     * Returns a case swapped version of the string.
11028
     *
11029
     * @param string $str       <p>The input string.</p>
11030
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
11031
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11032
     *
11033
     * @return string each character's case swapped
11034
     */
11035
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11036
    {
11037 6
        if ($str === '') {
11038 1
            return '';
11039
        }
11040
11041 6
        if ($cleanUtf8 === true) {
11042
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11043
            // if invalid characters are found in $haystack before $needle
11044 2
            $str = self::clean($str);
11045
        }
11046
11047 6
        if ($encoding === 'UTF-8') {
11048 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11049
        }
11050
11051 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11052
    }
11053
11054
    /**
11055
     * Checks whether symfony-polyfills are used.
11056
     *
11057
     * @return bool
11058
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11059
     */
11060
    public static function symfony_polyfill_used(): bool
11061
    {
11062
        // init
11063
        $return = false;
11064
11065
        $returnTmp = \extension_loaded('mbstring');
11066
        if ($returnTmp === false && \function_exists('mb_strlen')) {
11067
            $return = true;
11068
        }
11069
11070
        $returnTmp = \extension_loaded('iconv');
11071
        if ($returnTmp === false && \function_exists('iconv')) {
11072
            $return = true;
11073
        }
11074
11075
        return $return;
11076
    }
11077
11078
    /**
11079
     * @param string $str
11080
     * @param int    $tabLength
11081
     *
11082
     * @return string
11083
     */
11084
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11085
    {
11086 6
        if ($tabLength === 4) {
11087 3
            $spaces = '    ';
11088 3
        } elseif ($tabLength === 2) {
11089 1
            $spaces = '  ';
11090
        } else {
11091 2
            $spaces = \str_repeat(' ', $tabLength);
11092
        }
11093
11094 6
        return \str_replace("\t", $spaces, $str);
11095
    }
11096
11097
    /**
11098
     * Converts the first character of each word in the string to uppercase
11099
     * and all other chars to lowercase.
11100
     *
11101
     * @param string      $str                   <p>The input string.</p>
11102
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11103
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11104
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11105
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11106
     *
11107
     * @return string string with all characters of $str being title-cased
11108
     */
11109
    public static function titlecase(
11110
        string $str,
11111
        string $encoding = 'UTF-8',
11112
        bool $cleanUtf8 = false,
11113
        string $lang = null,
11114
        bool $tryToKeepStringLength = false
11115
    ): string {
11116 5
        if ($cleanUtf8 === true) {
11117
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11118
            // if invalid characters are found in $haystack before $needle
11119
            $str = self::clean($str);
11120
        }
11121
11122 5
        if ($lang === null && $tryToKeepStringLength === false) {
11123 5
            if ($encoding === 'UTF-8') {
11124 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11125
            }
11126
11127 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11128
11129 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11130
        }
11131
11132
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11133
    }
11134
11135
    /**
11136
     * alias for "UTF8::to_ascii()"
11137
     *
11138
     * @param string $str
11139
     * @param string $subst_chr
11140
     * @param bool   $strict
11141
     *
11142
     * @return string
11143
     *
11144
     * @see UTF8::to_ascii()
11145
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11146
     */
11147
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11148
    {
11149 7
        return self::to_ascii($str, $subst_chr, $strict);
11150
    }
11151
11152
    /**
11153
     * alias for "UTF8::to_iso8859()"
11154
     *
11155
     * @param string|string[] $str
11156
     *
11157
     * @return string|string[]
11158
     *
11159
     * @see UTF8::to_iso8859()
11160
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11161
     */
11162
    public static function toIso8859($str)
11163
    {
11164 2
        return self::to_iso8859($str);
11165
    }
11166
11167
    /**
11168
     * alias for "UTF8::to_latin1()"
11169
     *
11170
     * @param string|string[] $str
11171
     *
11172
     * @return string|string[]
11173
     *
11174
     * @see UTF8::to_latin1()
11175
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11176
     */
11177
    public static function toLatin1($str)
11178
    {
11179 2
        return self::to_latin1($str);
11180
    }
11181
11182
    /**
11183
     * alias for "UTF8::to_utf8()"
11184
     *
11185
     * @param string|string[] $str
11186
     *
11187
     * @return string|string[]
11188
     *
11189
     * @see UTF8::to_utf8()
11190
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11191
     */
11192
    public static function toUTF8($str)
11193
    {
11194 2
        return self::to_utf8($str);
11195
    }
11196
11197
    /**
11198
     * Convert a string into ASCII.
11199
     *
11200
     * @param string $str     <p>The input string.</p>
11201
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11202
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11203
     *                        performance</p>
11204
     *
11205
     * @return string
11206
     */
11207
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11208
    {
11209 38
        static $UTF8_TO_ASCII;
11210
11211 38
        if ($str === '') {
11212 3
            return '';
11213
        }
11214
11215
        // check if we only have ASCII, first (better performance)
11216 35
        if (self::is_ascii($str) === true) {
11217 9
            return $str;
11218
        }
11219
11220 28
        $str = self::clean(
11221 28
            $str,
11222 28
            true,
11223 28
            true,
11224 28
            true,
11225 28
            false,
11226 28
            true,
11227 28
            true
11228
        );
11229
11230
        // check again, if we only have ASCII, now ...
11231 28
        if (self::is_ascii($str) === true) {
11232 10
            return $str;
11233
        }
11234
11235
        if (
11236 19
            $strict === true
11237
            &&
11238 19
            self::$SUPPORT['intl'] === true
11239
        ) {
11240
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11241
            /** @noinspection PhpComposerExtensionStubsInspection */
11242
            /** @noinspection UnnecessaryCastingInspection */
11243 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11244
11245
            // check again, if we only have ASCII, now ...
11246 1
            if (self::is_ascii($str) === true) {
11247 1
                return $str;
11248
            }
11249
        }
11250
11251 19
        if (self::$ORD === null) {
11252
            self::$ORD = self::getData('ord');
11253
        }
11254
11255 19
        \preg_match_all('/.|[^\x00]$/us', $str, $ar);
11256 19
        $chars = $ar[0];
11257 19
        $ord = null;
11258
        /** @noinspection ForeachSourceInspection */
11259 19
        foreach ($chars as &$c) {
11260 19
            $ordC0 = self::$ORD[$c[0]];
11261
11262 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11263 15
                continue;
11264
            }
11265
11266 19
            $ordC1 = self::$ORD[$c[1]];
11267
11268
            // ASCII - next please
11269 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11270 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11271
            }
11272
11273 19
            if ($ordC0 >= 224) {
11274 8
                $ordC2 = self::$ORD[$c[2]];
11275
11276 8
                if ($ordC0 <= 239) {
11277 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11278
                }
11279
11280 8
                if ($ordC0 >= 240) {
11281 2
                    $ordC3 = self::$ORD[$c[3]];
11282
11283 2
                    if ($ordC0 <= 247) {
11284 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11285
                    }
11286
11287 2
                    if ($ordC0 >= 248) {
11288
                        $ordC4 = self::$ORD[$c[4]];
11289
11290
                        if ($ordC0 <= 251) {
11291
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11292
                        }
11293
11294
                        if ($ordC0 >= 252) {
11295
                            $ordC5 = self::$ORD[$c[5]];
11296
11297
                            if ($ordC0 <= 253) {
11298
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11299
                            }
11300
                        }
11301
                    }
11302
                }
11303
            }
11304
11305 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11306
                $c = $unknown;
11307
11308
                continue;
11309
            }
11310
11311 19
            if ($ord === null) {
11312
                $c = $unknown;
11313
11314
                continue;
11315
            }
11316
11317 19
            $bank = $ord >> 8;
11318 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11319 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11320 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11321 2
                    $UTF8_TO_ASCII[$bank] = [];
11322
                }
11323
            }
11324
11325 19
            $newchar = $ord & 255;
11326
11327
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11328 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11329
11330
                // keep for debugging
11331
                /*
11332
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11333
                echo "char: " . $c . "\n";
11334
                echo "ord: " . $ord . "\n";
11335
                echo "newchar: " . $newchar . "\n";
11336
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11337
                echo "bank:" . $bank . "\n\n";
11338
                 */
11339
11340 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11341
            } else {
11342
11343
                // keep for debugging missing chars
11344
                /*
11345
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11346
                echo "char: " . $c . "\n";
11347
                echo "ord: " . $ord . "\n";
11348
                echo "newchar: " . $newchar . "\n";
11349
                echo "bank:" . $bank . "\n\n";
11350
                 */
11351
11352 19
                $c = $unknown;
11353
            }
11354
        }
11355
11356 19
        return \implode('', $chars);
11357
    }
11358
11359
    /**
11360
     * @param mixed $str
11361
     *
11362
     * @return bool
11363
     */
11364
    public static function to_boolean($str): bool
11365
    {
11366
        // init
11367 19
        $str = (string) $str;
11368
11369 19
        if ($str === '') {
11370 2
            return false;
11371
        }
11372
11373
        // Info: http://php.net/manual/en/filter.filters.validate.php
11374
        $map = [
11375 17
            'true'  => true,
11376
            '1'     => true,
11377
            'on'    => true,
11378
            'yes'   => true,
11379
            'false' => false,
11380
            '0'     => false,
11381
            'off'   => false,
11382
            'no'    => false,
11383
        ];
11384
11385 17
        if (isset($map[$str])) {
11386 11
            return $map[$str];
11387
        }
11388
11389 6
        $key = \strtolower($str);
11390 6
        if (isset($map[$key])) {
11391 2
            return $map[$key];
11392
        }
11393
11394 4
        if (\is_numeric($str)) {
11395 2
            return ((float) $str + 0) > 0;
11396
        }
11397
11398 2
        return (bool) \trim($str);
11399
    }
11400
11401
    /**
11402
     * Convert given string to safe filename (and keep string case).
11403
     *
11404
     * @param string $string
11405
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11406
     *                                  simply replaced with hyphen.
11407
     * @param string $fallback_char
11408
     *
11409
     * @return string
11410
     */
11411
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11412
    {
11413 1
        if ($use_transliterate === true) {
11414 1
            $string = self::str_transliterate($string, $fallback_char);
11415
        }
11416
11417 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11418
11419 1
        $string = (string) \preg_replace(
11420
            [
11421 1
                '/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
11422 1
                '/[\\s]+/u',                                             // 2) convert spaces to $fallback_char
11423 1
                '/[' . $fallback_char_escaped . ']+/u',                  // 3) remove double $fallback_char's
11424
            ],
11425
            [
11426 1
                '',
11427 1
                $fallback_char,
11428 1
                $fallback_char,
11429
            ],
11430 1
            $string
11431
        );
11432
11433
        // trim "$fallback_char" from beginning and end of the string
11434 1
        return \trim($string, $fallback_char);
11435
    }
11436
11437
    /**
11438
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11439
     *
11440
     * @param string|string[] $str
11441
     *
11442
     * @return string|string[]
11443
     */
11444
    public static function to_iso8859($str)
11445
    {
11446 8
        if (\is_array($str) === true) {
11447 2
            foreach ($str as $k => &$v) {
11448 2
                $v = self::to_iso8859($v);
11449
            }
11450
11451 2
            return $str;
11452
        }
11453
11454 8
        $str = (string) $str;
11455 8
        if ($str === '') {
11456 2
            return '';
11457
        }
11458
11459 8
        return self::utf8_decode($str);
11460
    }
11461
11462
    /**
11463
     * alias for "UTF8::to_iso8859()"
11464
     *
11465
     * @param string|string[] $str
11466
     *
11467
     * @return string|string[]
11468
     *
11469
     * @see UTF8::to_iso8859()
11470
     */
11471
    public static function to_latin1($str)
11472
    {
11473 2
        return self::to_iso8859($str);
11474
    }
11475
11476
    /**
11477
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11478
     *
11479
     * <ul>
11480
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11481
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11482
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11483
     * case.</li>
11484
     * </ul>
11485
     *
11486
     * @param string|string[] $str                    <p>Any string or array.</p>
11487
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11488
     *
11489
     * @return string|string[] the UTF-8 encoded string
11490
     */
11491
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11492
    {
11493 41
        if (\is_array($str) === true) {
11494 4
            foreach ($str as $k => &$v) {
11495 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11496
            }
11497
11498 4
            return $str;
11499
        }
11500
11501 41
        $str = (string) $str;
11502 41
        if ($str === '') {
11503 6
            return $str;
11504
        }
11505
11506 41
        $max = \strlen($str);
11507 41
        $buf = '';
11508
11509 41
        for ($i = 0; $i < $max; ++$i) {
11510 41
            $c1 = $str[$i];
11511
11512 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11513
11514 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11515
11516 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11517
11518 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11519 20
                        $buf .= $c1 . $c2;
11520 20
                        ++$i;
11521
                    } else { // not valid UTF8 - convert it
11522 34
                        $buf .= self::to_utf8_convert_helper($c1);
11523
                    }
11524 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11525
11526 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11527 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11528
11529 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11530 15
                        $buf .= $c1 . $c2 . $c3;
11531 15
                        $i += 2;
11532
                    } else { // not valid UTF8 - convert it
11533 33
                        $buf .= self::to_utf8_convert_helper($c1);
11534
                    }
11535 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11536
11537 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11538 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11539 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11540
11541 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11542 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11543 8
                        $i += 3;
11544
                    } else { // not valid UTF8 - convert it
11545 26
                        $buf .= self::to_utf8_convert_helper($c1);
11546
                    }
11547
                } else { // doesn't look like UTF8, but should be converted
11548
11549 37
                    $buf .= self::to_utf8_convert_helper($c1);
11550
                }
11551 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11552
11553 4
                $buf .= self::to_utf8_convert_helper($c1);
11554
            } else { // it doesn't need conversion
11555
11556 38
                $buf .= $c1;
11557
            }
11558
        }
11559
11560
        // decode unicode escape sequences + unicode surrogate pairs
11561 41
        $buf = \preg_replace_callback(
11562 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11563
            /**
11564
             * @param array $matches
11565
             *
11566
             * @return string
11567
             */
11568
            static function (array $matches): string {
11569 12
                if (isset($matches[3])) {
11570 12
                    $cp = (int) \hexdec($matches[3]);
11571
                } else {
11572
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11573
                    $cp = ((int) \hexdec($matches[1]) << 10)
11574
                          + (int) \hexdec($matches[2])
11575
                          + 0x10000
11576
                          - (0xD800 << 10)
11577
                          - 0xDC00;
11578
                }
11579
11580
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11581
                //
11582
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11583
11584 12
                if ($cp < 0x80) {
11585 8
                    return (string) self::chr($cp);
11586
                }
11587
11588 9
                if ($cp < 0xA0) {
11589
                    /** @noinspection UnnecessaryCastingInspection */
11590
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11591
                }
11592
11593 9
                return self::decimal_to_chr($cp);
11594 41
            },
11595 41
            $buf
11596
        );
11597
11598 41
        if ($buf === null) {
11599
            return '';
11600
        }
11601
11602
        // decode UTF-8 codepoints
11603 41
        if ($decodeHtmlEntityToUtf8 === true) {
11604 2
            $buf = self::html_entity_decode($buf);
11605
        }
11606
11607 41
        return $buf;
11608
    }
11609
11610
    /**
11611
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11612
     *
11613
     * INFO: This is slower then "trim()"
11614
     *
11615
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11616
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11617
     *
11618
     * @param string      $str   <p>The string to be trimmed</p>
11619
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11620
     *
11621
     * @return string the trimmed string
11622
     */
11623
    public static function trim(string $str = '', string $chars = null): string
11624
    {
11625 55
        if ($str === '') {
11626 9
            return '';
11627
        }
11628
11629 48
        if ($chars) {
11630 27
            $chars = \preg_quote($chars, '/');
11631 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11632
        } else {
11633 21
            $pattern = '^[\\s]+|[\\s]+$';
11634
        }
11635
11636 48
        if (self::$SUPPORT['mbstring'] === true) {
11637
            /** @noinspection PhpComposerExtensionStubsInspection */
11638 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11639
        }
11640
11641 8
        return self::regex_replace($str, $pattern, '', '', '/');
11642
    }
11643
11644
    /**
11645
     * Makes string's first char uppercase.
11646
     *
11647
     * @param string      $str                   <p>The input string.</p>
11648
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11649
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11650
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11651
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11652
     *
11653
     * @return string the resulting string
11654
     */
11655
    public static function ucfirst(
11656
        string $str,
11657
        string $encoding = 'UTF-8',
11658
        bool $cleanUtf8 = false,
11659
        string $lang = null,
11660
        bool $tryToKeepStringLength = false
11661
    ): string {
11662 69
        if ($str === '') {
11663 3
            return '';
11664
        }
11665
11666 68
        if ($cleanUtf8 === true) {
11667
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11668
            // if invalid characters are found in $haystack before $needle
11669 1
            $str = self::clean($str);
11670
        }
11671
11672 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11673
11674 68
        if ($encoding === 'UTF-8') {
11675 22
            $strPartTwo = (string) \mb_substr($str, 1);
11676
11677 22
            if ($useMbFunction === true) {
11678 22
                $strPartOne = \mb_strtoupper(
11679 22
                    (string) \mb_substr($str, 0, 1)
11680
                );
11681
            } else {
11682
                $strPartOne = self::strtoupper(
11683
                    (string) \mb_substr($str, 0, 1),
11684
                    $encoding,
11685
                    false,
11686
                    $lang,
11687 22
                    $tryToKeepStringLength
11688
                );
11689
            }
11690
        } else {
11691 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11692
11693 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11694
11695 47
            if ($useMbFunction === true) {
11696 47
                $strPartOne = \mb_strtoupper(
11697 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11698 47
                    $encoding
11699
                );
11700
            } else {
11701
                $strPartOne = self::strtoupper(
11702
                    (string) self::substr($str, 0, 1, $encoding),
11703
                    $encoding,
11704
                    false,
11705
                    $lang,
11706
                    $tryToKeepStringLength
11707
                );
11708
            }
11709
        }
11710
11711 68
        return $strPartOne . $strPartTwo;
11712
    }
11713
11714
    /**
11715
     * alias for "UTF8::ucfirst()"
11716
     *
11717
     * @param string $str
11718
     * @param string $encoding
11719
     * @param bool   $cleanUtf8
11720
     *
11721
     * @return string
11722
     *
11723
     * @see UTF8::ucfirst()
11724
     */
11725
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11726
    {
11727 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11728
    }
11729
11730
    /**
11731
     * Uppercase for all words in the string.
11732
     *
11733
     * @param string   $str        <p>The input string.</p>
11734
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11735
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11736
     *                             word.</p>
11737
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11738
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11739
     *
11740
     * @return string
11741
     */
11742
    public static function ucwords(
11743
        string $str,
11744
        array $exceptions = [],
11745
        string $charlist = '',
11746
        string $encoding = 'UTF-8',
11747
        bool $cleanUtf8 = false
11748
    ): string {
11749 8
        if (!$str) {
11750 2
            return '';
11751
        }
11752
11753
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11754
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11755
11756 7
        if ($cleanUtf8 === true) {
11757
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11758
            // if invalid characters are found in $haystack before $needle
11759 1
            $str = self::clean($str);
11760
        }
11761
11762 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11763
11764
        if (
11765 7
            $usePhpDefaultFunctions === true
11766
            &&
11767 7
            self::is_ascii($str) === true
11768
        ) {
11769
            return \ucwords($str);
11770
        }
11771
11772 7
        $words = self::str_to_words($str, $charlist);
11773 7
        $useExceptions = \count($exceptions) > 0;
11774
11775 7
        foreach ($words as &$word) {
11776 7
            if (!$word) {
11777 7
                continue;
11778
            }
11779
11780
            if (
11781 7
                $useExceptions === false
11782
                ||
11783 7
                !\in_array($word, $exceptions, true)
11784
            ) {
11785 7
                $word = self::ucfirst($word, $encoding);
11786
            }
11787
        }
11788
11789 7
        return \implode('', $words);
11790
    }
11791
11792
    /**
11793
     * Multi decode html entity & fix urlencoded-win1252-chars.
11794
     *
11795
     * e.g:
11796
     * 'test+test'                     => 'test test'
11797
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11798
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11799
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11800
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11801
     * 'Düsseldorf'                   => 'Düsseldorf'
11802
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11803
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11804
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11805
     *
11806
     * @param string $str          <p>The input string.</p>
11807
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11808
     *
11809
     * @return string
11810
     */
11811
    public static function urldecode(string $str, bool $multi_decode = true): string
11812
    {
11813 4
        if ($str === '') {
11814 3
            return '';
11815
        }
11816
11817
        if (
11818 4
            \strpos($str, '&') === false
11819
            &&
11820 4
            \strpos($str, '%') === false
11821
            &&
11822 4
            \strpos($str, '+') === false
11823
            &&
11824 4
            \strpos($str, '\u') === false
11825
        ) {
11826 3
            return self::fix_simple_utf8($str);
11827
        }
11828
11829 4
        $str = self::urldecode_unicode_helper($str);
11830
11831
        do {
11832 4
            $str_compare = $str;
11833
11834
            /**
11835
             * @psalm-suppress PossiblyInvalidArgument
11836
             */
11837 4
            $str = self::fix_simple_utf8(
11838 4
                \urldecode(
11839 4
                    self::html_entity_decode(
11840 4
                        self::to_utf8($str),
11841 4
                        \ENT_QUOTES | \ENT_HTML5
11842
                    )
11843
                )
11844
            );
11845 4
        } while ($multi_decode === true && $str_compare !== $str);
11846
11847 4
        return $str;
11848
    }
11849
11850
    /**
11851
     * Return a array with "urlencoded"-win1252 -> UTF-8
11852
     *
11853
     * @return string[]
11854
     *
11855
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11856
     */
11857
    public static function urldecode_fix_win1252_chars(): array
11858
    {
11859
        return [
11860 2
            '%20' => ' ',
11861
            '%21' => '!',
11862
            '%22' => '"',
11863
            '%23' => '#',
11864
            '%24' => '$',
11865
            '%25' => '%',
11866
            '%26' => '&',
11867
            '%27' => "'",
11868
            '%28' => '(',
11869
            '%29' => ')',
11870
            '%2A' => '*',
11871
            '%2B' => '+',
11872
            '%2C' => ',',
11873
            '%2D' => '-',
11874
            '%2E' => '.',
11875
            '%2F' => '/',
11876
            '%30' => '0',
11877
            '%31' => '1',
11878
            '%32' => '2',
11879
            '%33' => '3',
11880
            '%34' => '4',
11881
            '%35' => '5',
11882
            '%36' => '6',
11883
            '%37' => '7',
11884
            '%38' => '8',
11885
            '%39' => '9',
11886
            '%3A' => ':',
11887
            '%3B' => ';',
11888
            '%3C' => '<',
11889
            '%3D' => '=',
11890
            '%3E' => '>',
11891
            '%3F' => '?',
11892
            '%40' => '@',
11893
            '%41' => 'A',
11894
            '%42' => 'B',
11895
            '%43' => 'C',
11896
            '%44' => 'D',
11897
            '%45' => 'E',
11898
            '%46' => 'F',
11899
            '%47' => 'G',
11900
            '%48' => 'H',
11901
            '%49' => 'I',
11902
            '%4A' => 'J',
11903
            '%4B' => 'K',
11904
            '%4C' => 'L',
11905
            '%4D' => 'M',
11906
            '%4E' => 'N',
11907
            '%4F' => 'O',
11908
            '%50' => 'P',
11909
            '%51' => 'Q',
11910
            '%52' => 'R',
11911
            '%53' => 'S',
11912
            '%54' => 'T',
11913
            '%55' => 'U',
11914
            '%56' => 'V',
11915
            '%57' => 'W',
11916
            '%58' => 'X',
11917
            '%59' => 'Y',
11918
            '%5A' => 'Z',
11919
            '%5B' => '[',
11920
            '%5C' => '\\',
11921
            '%5D' => ']',
11922
            '%5E' => '^',
11923
            '%5F' => '_',
11924
            '%60' => '`',
11925
            '%61' => 'a',
11926
            '%62' => 'b',
11927
            '%63' => 'c',
11928
            '%64' => 'd',
11929
            '%65' => 'e',
11930
            '%66' => 'f',
11931
            '%67' => 'g',
11932
            '%68' => 'h',
11933
            '%69' => 'i',
11934
            '%6A' => 'j',
11935
            '%6B' => 'k',
11936
            '%6C' => 'l',
11937
            '%6D' => 'm',
11938
            '%6E' => 'n',
11939
            '%6F' => 'o',
11940
            '%70' => 'p',
11941
            '%71' => 'q',
11942
            '%72' => 'r',
11943
            '%73' => 's',
11944
            '%74' => 't',
11945
            '%75' => 'u',
11946
            '%76' => 'v',
11947
            '%77' => 'w',
11948
            '%78' => 'x',
11949
            '%79' => 'y',
11950
            '%7A' => 'z',
11951
            '%7B' => '{',
11952
            '%7C' => '|',
11953
            '%7D' => '}',
11954
            '%7E' => '~',
11955
            '%7F' => '',
11956
            '%80' => '`',
11957
            '%81' => '',
11958
            '%82' => '‚',
11959
            '%83' => 'ƒ',
11960
            '%84' => '„',
11961
            '%85' => '…',
11962
            '%86' => '†',
11963
            '%87' => '‡',
11964
            '%88' => 'ˆ',
11965
            '%89' => '‰',
11966
            '%8A' => 'Š',
11967
            '%8B' => '‹',
11968
            '%8C' => 'Œ',
11969
            '%8D' => '',
11970
            '%8E' => 'Ž',
11971
            '%8F' => '',
11972
            '%90' => '',
11973
            '%91' => '‘',
11974
            '%92' => '’',
11975
            '%93' => '“',
11976
            '%94' => '”',
11977
            '%95' => '•',
11978
            '%96' => '–',
11979
            '%97' => '—',
11980
            '%98' => '˜',
11981
            '%99' => '™',
11982
            '%9A' => 'š',
11983
            '%9B' => '›',
11984
            '%9C' => 'œ',
11985
            '%9D' => '',
11986
            '%9E' => 'ž',
11987
            '%9F' => 'Ÿ',
11988
            '%A0' => '',
11989
            '%A1' => '¡',
11990
            '%A2' => '¢',
11991
            '%A3' => '£',
11992
            '%A4' => '¤',
11993
            '%A5' => '¥',
11994
            '%A6' => '¦',
11995
            '%A7' => '§',
11996
            '%A8' => '¨',
11997
            '%A9' => '©',
11998
            '%AA' => 'ª',
11999
            '%AB' => '«',
12000
            '%AC' => '¬',
12001
            '%AD' => '',
12002
            '%AE' => '®',
12003
            '%AF' => '¯',
12004
            '%B0' => '°',
12005
            '%B1' => '±',
12006
            '%B2' => '²',
12007
            '%B3' => '³',
12008
            '%B4' => '´',
12009
            '%B5' => 'µ',
12010
            '%B6' => '¶',
12011
            '%B7' => '·',
12012
            '%B8' => '¸',
12013
            '%B9' => '¹',
12014
            '%BA' => 'º',
12015
            '%BB' => '»',
12016
            '%BC' => '¼',
12017
            '%BD' => '½',
12018
            '%BE' => '¾',
12019
            '%BF' => '¿',
12020
            '%C0' => 'À',
12021
            '%C1' => 'Á',
12022
            '%C2' => 'Â',
12023
            '%C3' => 'Ã',
12024
            '%C4' => 'Ä',
12025
            '%C5' => 'Å',
12026
            '%C6' => 'Æ',
12027
            '%C7' => 'Ç',
12028
            '%C8' => 'È',
12029
            '%C9' => 'É',
12030
            '%CA' => 'Ê',
12031
            '%CB' => 'Ë',
12032
            '%CC' => 'Ì',
12033
            '%CD' => 'Í',
12034
            '%CE' => 'Î',
12035
            '%CF' => 'Ï',
12036
            '%D0' => 'Ð',
12037
            '%D1' => 'Ñ',
12038
            '%D2' => 'Ò',
12039
            '%D3' => 'Ó',
12040
            '%D4' => 'Ô',
12041
            '%D5' => 'Õ',
12042
            '%D6' => 'Ö',
12043
            '%D7' => '×',
12044
            '%D8' => 'Ø',
12045
            '%D9' => 'Ù',
12046
            '%DA' => 'Ú',
12047
            '%DB' => 'Û',
12048
            '%DC' => 'Ü',
12049
            '%DD' => 'Ý',
12050
            '%DE' => 'Þ',
12051
            '%DF' => 'ß',
12052
            '%E0' => 'à',
12053
            '%E1' => 'á',
12054
            '%E2' => 'â',
12055
            '%E3' => 'ã',
12056
            '%E4' => 'ä',
12057
            '%E5' => 'å',
12058
            '%E6' => 'æ',
12059
            '%E7' => 'ç',
12060
            '%E8' => 'è',
12061
            '%E9' => 'é',
12062
            '%EA' => 'ê',
12063
            '%EB' => 'ë',
12064
            '%EC' => 'ì',
12065
            '%ED' => 'í',
12066
            '%EE' => 'î',
12067
            '%EF' => 'ï',
12068
            '%F0' => 'ð',
12069
            '%F1' => 'ñ',
12070
            '%F2' => 'ò',
12071
            '%F3' => 'ó',
12072
            '%F4' => 'ô',
12073
            '%F5' => 'õ',
12074
            '%F6' => 'ö',
12075
            '%F7' => '÷',
12076
            '%F8' => 'ø',
12077
            '%F9' => 'ù',
12078
            '%FA' => 'ú',
12079
            '%FB' => 'û',
12080
            '%FC' => 'ü',
12081
            '%FD' => 'ý',
12082
            '%FE' => 'þ',
12083
            '%FF' => 'ÿ',
12084
        ];
12085
    }
12086
12087
    /**
12088
     * Decodes an UTF-8 string to ISO-8859-1.
12089
     *
12090
     * @param string $str           <p>The input string.</p>
12091
     * @param bool   $keepUtf8Chars
12092
     *
12093
     * @return string
12094
     */
12095
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12096
    {
12097 14
        if ($str === '') {
12098 6
            return '';
12099
        }
12100
12101
        // save for later comparision
12102 14
        $str_backup = $str;
12103 14
        $len = \strlen($str);
12104
12105 14
        if (self::$ORD === null) {
12106
            self::$ORD = self::getData('ord');
12107
        }
12108
12109 14
        if (self::$CHR === null) {
12110
            self::$CHR = self::getData('chr');
12111
        }
12112
12113 14
        $noCharFound = '?';
12114
        /** @noinspection ForeachInvariantsInspection */
12115 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12116 14
            switch ($str[$i] & "\xF0") {
12117 14
                case "\xC0":
12118 13
                case "\xD0":
12119 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12120 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12121
12122 13
                    break;
12123
12124
                /** @noinspection PhpMissingBreakStatementInspection */
12125 13
                case "\xF0":
12126
                    ++$i;
12127
12128
                // no break
12129
12130 13
                case "\xE0":
12131 11
                    $str[$j] = $noCharFound;
12132 11
                    $i += 2;
12133
12134 11
                    break;
12135
12136
                default:
12137 12
                    $str[$j] = $str[$i];
12138
            }
12139
        }
12140
12141 14
        $return = \substr($str, 0, $j);
12142 14
        if ($return === false) {
12143
            $return = '';
12144
        }
12145
12146
        if (
12147 14
            $keepUtf8Chars === true
12148
            &&
12149 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12150
        ) {
12151 2
            return $str_backup;
12152
        }
12153
12154 14
        return $return;
12155
    }
12156
12157
    /**
12158
     * Encodes an ISO-8859-1 string to UTF-8.
12159
     *
12160
     * @param string $str <p>The input string.</p>
12161
     *
12162
     * @return string
12163
     */
12164
    public static function utf8_encode(string $str): string
12165
    {
12166 14
        if ($str === '') {
12167 14
            return '';
12168
        }
12169
12170 14
        $str = \utf8_encode($str);
12171
12172
        // the polyfill maybe return false
12173
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12174
        /** @psalm-suppress TypeDoesNotContainType */
12175 14
        if ($str === false) {
12176
            return '';
12177
        }
12178
12179 14
        return $str;
12180
    }
12181
12182
    /**
12183
     * fix -> utf8-win1252 chars
12184
     *
12185
     * @param string $str <p>The input string.</p>
12186
     *
12187
     * @return string
12188
     *
12189
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12190
     */
12191
    public static function utf8_fix_win1252_chars(string $str): string
12192
    {
12193 2
        return self::fix_simple_utf8($str);
12194
    }
12195
12196
    /**
12197
     * Returns an array with all utf8 whitespace characters.
12198
     *
12199
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12200
     *
12201
     * @return string[]
12202
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12203
     *                  as defined in above URL
12204
     */
12205
    public static function whitespace_table(): array
12206
    {
12207 2
        return self::$WHITESPACE_TABLE;
12208
    }
12209
12210
    /**
12211
     * Limit the number of words in a string.
12212
     *
12213
     * @param string $str      <p>The input string.</p>
12214
     * @param int    $limit    <p>The limit of words as integer.</p>
12215
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12216
     *
12217
     * @return string
12218
     */
12219
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12220
    {
12221 2
        if ($str === '' || $limit < 1) {
12222 2
            return '';
12223
        }
12224
12225 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12226
12227
        if (
12228 2
            !isset($matches[0])
12229
            ||
12230 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12231
        ) {
12232 2
            return $str;
12233
        }
12234
12235 2
        return \rtrim($matches[0]) . $strAddOn;
12236
    }
12237
12238
    /**
12239
     * Wraps a string to a given number of characters
12240
     *
12241
     * @see http://php.net/manual/en/function.wordwrap.php
12242
     *
12243
     * @param string $str   <p>The input string.</p>
12244
     * @param int    $width [optional] <p>The column width.</p>
12245
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12246
     * @param bool   $cut   [optional] <p>
12247
     *                      If the cut is set to true, the string is
12248
     *                      always wrapped at or before the specified width. So if you have
12249
     *                      a word that is larger than the given width, it is broken apart.
12250
     *                      </p>
12251
     *
12252
     * @return string
12253
     *                <p>The given string wrapped at the specified column.</p>
12254
     */
12255
    public static function wordwrap(
12256
        string $str,
12257
        int $width = 75,
12258
        string $break = "\n",
12259
        bool $cut = false
12260
    ): string {
12261 12
        if ($str === '' || $break === '') {
12262 4
            return '';
12263
        }
12264
12265 10
        $strSplit = \explode($break, $str);
12266 10
        if ($strSplit === false) {
12267
            return '';
12268
        }
12269
12270 10
        $chars = [];
12271 10
        $wordSplit = '';
12272 10
        foreach ($strSplit as $i => $iValue) {
12273 10
            if ($i) {
12274 3
                $chars[] = $break;
12275 3
                $wordSplit .= '#';
12276
            }
12277
12278 10
            foreach (self::str_split($iValue) as $c) {
12279 10
                $chars[] = $c;
12280 10
                if ($c === ' ') {
12281 3
                    $wordSplit .= ' ';
12282
                } else {
12283 10
                    $wordSplit .= '?';
12284
                }
12285
            }
12286
        }
12287
12288 10
        $strReturn = '';
12289 10
        $j = 0;
12290 10
        $b = -1;
12291 10
        $i = -1;
12292 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12293
12294 10
        $max = \mb_strlen($wordSplit);
12295 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
12296 8
            for (++$i; $i < $b; ++$i) {
12297 8
                $strReturn .= $chars[$j];
12298 8
                unset($chars[$j++]);
12299
12300
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12301 8
                if ($i > $max) {
12302
                    break 2;
12303
                }
12304
            }
12305
12306
            if (
12307 8
                $break === $chars[$j]
12308
                ||
12309 8
                $chars[$j] === ' '
12310
            ) {
12311 5
                unset($chars[$j++]);
12312
            }
12313
12314 8
            $strReturn .= $break;
12315
12316
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12317 8
            if ($b > $max) {
12318
                break;
12319
            }
12320
        }
12321
12322 10
        return $strReturn . \implode('', $chars);
12323
    }
12324
12325
    /**
12326
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12327
     *    ... so that we wrap the per line.
12328
     *
12329
     * @param string      $str           <p>The input string.</p>
12330
     * @param int         $width         [optional] <p>The column width.</p>
12331
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
12332
     * @param bool        $cut           [optional] <p>
12333
     *                                   If the cut is set to true, the string is
12334
     *                                   always wrapped at or before the specified width. So if you have
12335
     *                                   a word that is larger than the given width, it is broken apart.
12336
     *                                   </p>
12337
     * @param bool        $addFinalBreak [optional] <p>
12338
     *                                   If this flag is true, then the method will add a $break at the end
12339
     *                                   of the result string.
12340
     *                                   </p>
12341
     * @param string|null $delimiter     [optional] <p>
12342
     *                                   You can change the default behavior, where we split the string by newline.
12343
     *                                   </p>
12344
     *
12345
     * @return string
12346
     */
12347
    public static function wordwrap_per_line(
12348
        string $str,
12349
        int $width = 75,
12350
        string $break = "\n",
12351
        bool $cut = false,
12352
        bool $addFinalBreak = true,
12353
        string $delimiter = null
12354
    ): string {
12355 1
        if ($delimiter === null) {
12356 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12357
        } else {
12358 1
            $strings = \explode($delimiter, $str);
12359
        }
12360
12361 1
        $stringArray = [];
12362 1
        if ($strings !== false) {
12363 1
            foreach ($strings as $value) {
12364 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
12365
            }
12366
        }
12367
12368 1
        if ($addFinalBreak) {
12369 1
            $finalBreak = $break;
12370
        } else {
12371 1
            $finalBreak = '';
12372
        }
12373
12374 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
12375
    }
12376
12377
    /**
12378
     * Returns an array of Unicode White Space characters.
12379
     *
12380
     * @return string[] an array with numeric code point as key and White Space Character as value
12381
     */
12382
    public static function ws(): array
12383
    {
12384 2
        return self::$WHITESPACE;
12385
    }
12386
12387
    /**
12388
     * @param string $str
12389
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12390
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12391
     *
12392
     * @return string
12393
     */
12394
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12395
    {
12396 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12397 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12398
12399 33
        if ($useLower === true) {
12400 2
            $str = \str_replace(
12401 2
                $upper,
12402 2
                $lower,
12403 2
                $str
12404
            );
12405
        } else {
12406 31
            $str = \str_replace(
12407 31
                $lower,
12408 31
                $upper,
12409 31
                $str
12410
            );
12411
        }
12412
12413 33
        if ($fullCaseFold) {
12414 31
            static $FULL_CASE_FOLD = null;
12415 31
            if ($FULL_CASE_FOLD === null) {
12416 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12417
            }
12418
12419 31
            if ($useLower === true) {
12420 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12421
            } else {
12422 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12423
            }
12424
        }
12425
12426 33
        return $str;
12427
    }
12428
12429
    /**
12430
     * get data from "/data/*.php"
12431
     *
12432
     * @param string $file
12433
     *
12434
     * @return array
12435
     */
12436
    private static function getData(string $file): array
12437
    {
12438
        /** @noinspection PhpIncludeInspection */
12439
        /** @noinspection UsingInclusionReturnValueInspection */
12440
        /** @psalm-suppress UnresolvableInclude */
12441 6
        return include __DIR__ . '/data/' . $file . '.php';
12442
    }
12443
12444
    /**
12445
     * get data from "/data/*.php"
12446
     *
12447
     * @param string $file
12448
     *
12449
     * @return false|mixed will return false on error
12450
     */
12451
    private static function getDataIfExists(string $file)
12452
    {
12453 9
        $file = __DIR__ . '/data/' . $file . '.php';
12454 9
        if (\file_exists($file)) {
12455
            /** @noinspection PhpIncludeInspection */
12456
            /** @noinspection UsingInclusionReturnValueInspection */
12457 8
            return include $file;
12458
        }
12459
12460 2
        return false;
12461
    }
12462
12463
    /**
12464
     * @return true|null
12465
     */
12466
    private static function initEmojiData()
12467
    {
12468 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12469 1
            if (self::$EMOJI === null) {
12470 1
                self::$EMOJI = self::getData('emoji');
12471
            }
12472
12473 1
            \uksort(
12474 1
                self::$EMOJI,
12475
                static function (string $a, string $b): int {
12476 1
                    return \strlen($b) <=> \strlen($a);
12477 1
                }
12478
            );
12479
12480 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12481 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12482
12483 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12484 1
                $tmpKey = \crc32($key);
12485 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12486
            }
12487
12488 1
            return true;
12489
        }
12490
12491 12
        return null;
12492
    }
12493
12494
    /**
12495
     * Checks whether mbstring "overloaded" is active on the server.
12496
     *
12497
     * @return bool
12498
     */
12499
    private static function mbstring_overloaded(): bool
12500
    {
12501
        /**
12502
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12503
         */
12504
12505
        /** @noinspection PhpComposerExtensionStubsInspection */
12506
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12507
        return \defined('MB_OVERLOAD_STRING')
12508
               &&
12509
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12510
    }
12511
12512
    /**
12513
     * @param array $strings
12514
     * @param bool  $removeEmptyValues
12515
     * @param int   $removeShortValues
12516
     *
12517
     * @return array
12518
     */
12519
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12520
    {
12521
        // init
12522 2
        $return = [];
12523
12524 2
        foreach ($strings as &$str) {
12525
            if (
12526 2
                $removeShortValues !== null
12527
                &&
12528 2
                \mb_strlen($str) <= $removeShortValues
12529
            ) {
12530 2
                continue;
12531
            }
12532
12533
            if (
12534 2
                $removeEmptyValues === true
12535
                &&
12536 2
                \trim($str) === ''
12537
            ) {
12538 2
                continue;
12539
            }
12540
12541 2
            $return[] = $str;
12542
        }
12543
12544 2
        return $return;
12545
    }
12546
12547
    /**
12548
     * rxClass
12549
     *
12550
     * @param string $s
12551
     * @param string $class
12552
     *
12553
     * @return string
12554
     */
12555
    private static function rxClass(string $s, string $class = ''): string
12556
    {
12557 33
        static $RX_CLASS_CACHE = [];
12558
12559 33
        $cacheKey = $s . $class;
12560
12561 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12562 21
            return $RX_CLASS_CACHE[$cacheKey];
12563
        }
12564
12565 16
        $classArray = [$class];
12566
12567
        /** @noinspection SuspiciousLoopInspection */
12568
        /** @noinspection AlterInForeachInspection */
12569 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12570 15
            if ($s === '-') {
12571
                $classArray[0] = '-' . $classArray[0];
12572 15
            } elseif (!isset($s[2])) {
12573 15
                $classArray[0] .= \preg_quote($s, '/');
12574 1
            } elseif (self::strlen($s) === 1) {
12575 1
                $classArray[0] .= $s;
12576
            } else {
12577 15
                $classArray[] = $s;
12578
            }
12579
        }
12580
12581 16
        if ($classArray[0]) {
12582 16
            $classArray[0] = '[' . $classArray[0] . ']';
12583
        }
12584
12585 16
        if (\count($classArray) === 1) {
12586 16
            $return = $classArray[0];
12587
        } else {
12588
            $return = '(?:' . \implode('|', $classArray) . ')';
12589
        }
12590
12591 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12592
12593 16
        return $return;
12594
    }
12595
12596
    /**
12597
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12598
     *
12599
     * @param string $names
12600
     * @param string $delimiter
12601
     * @param string $encoding
12602
     *
12603
     * @return string
12604
     */
12605
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12606
    {
12607
        // init
12608 1
        $namesArray = \explode($delimiter, $names);
12609
12610 1
        if ($namesArray === false) {
12611
            return '';
12612
        }
12613
12614
        $specialCases = [
12615 1
            'names' => [
12616
                'ab',
12617
                'af',
12618
                'al',
12619
                'and',
12620
                'ap',
12621
                'bint',
12622
                'binte',
12623
                'da',
12624
                'de',
12625
                'del',
12626
                'den',
12627
                'der',
12628
                'di',
12629
                'dit',
12630
                'ibn',
12631
                'la',
12632
                'mac',
12633
                'nic',
12634
                'of',
12635
                'ter',
12636
                'the',
12637
                'und',
12638
                'van',
12639
                'von',
12640
                'y',
12641
                'zu',
12642
            ],
12643
            'prefixes' => [
12644
                'al-',
12645
                "d'",
12646
                'ff',
12647
                "l'",
12648
                'mac',
12649
                'mc',
12650
                'nic',
12651
            ],
12652
        ];
12653
12654 1
        foreach ($namesArray as &$name) {
12655 1
            if (\in_array($name, $specialCases['names'], true)) {
12656 1
                continue;
12657
            }
12658
12659 1
            $continue = false;
12660
12661 1
            if ($delimiter === '-') {
12662
                /** @noinspection AlterInForeachInspection */
12663 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12664 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12665 1
                        $continue = true;
12666
                    }
12667
                }
12668
            }
12669
12670
            /** @noinspection AlterInForeachInspection */
12671 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12672 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12673 1
                    $continue = true;
12674
                }
12675
            }
12676
12677 1
            if ($continue === true) {
12678 1
                continue;
12679
            }
12680
12681 1
            $name = self::ucfirst($name);
12682
        }
12683
12684 1
        return \implode($delimiter, $namesArray);
12685
    }
12686
12687
    /**
12688
     * Generic case sensitive transformation for collation matching.
12689
     *
12690
     * @param string $str <p>The input string</p>
12691
     *
12692
     * @return string|null
12693
     */
12694
    private static function strtonatfold(string $str)
12695
    {
12696 6
        return \preg_replace(
12697 6
            '/\p{Mn}+/u',
12698 6
            '',
12699 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12700
        );
12701
    }
12702
12703
    /**
12704
     * @param int|string $input
12705
     *
12706
     * @return string
12707
     */
12708
    private static function to_utf8_convert_helper($input): string
12709
    {
12710
        // init
12711 31
        $buf = '';
12712
12713 31
        if (self::$ORD === null) {
12714 1
            self::$ORD = self::getData('ord');
12715
        }
12716
12717 31
        if (self::$CHR === null) {
12718 1
            self::$CHR = self::getData('chr');
12719
        }
12720
12721 31
        if (self::$WIN1252_TO_UTF8 === null) {
12722 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12723
        }
12724
12725 31
        $ordC1 = self::$ORD[$input];
12726 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12727 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12728
        } else {
12729
            /** @noinspection OffsetOperationsInspection */
12730 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12731 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12732 1
            $buf .= $cc1 . $cc2;
12733
        }
12734
12735 31
        return $buf;
12736
    }
12737
12738
    /**
12739
     * @param string $str
12740
     *
12741
     * @return string
12742
     */
12743
    private static function urldecode_unicode_helper(string $str): string
12744
    {
12745 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12746 9
        if (\preg_match($pattern, $str)) {
12747 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12748
        }
12749
12750 9
        return $str;
12751
    }
12752
}
12753