Passed
Push — master ( 01248a...1a3996 )
by Lars
03:29
created

UTF8::regex_replace()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 15
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3.0123

Importance

Changes 0
Metric Value
cc 3
eloc 8
nc 4
nop 5
dl 0
loc 15
ccs 8
cts 9
cp 0.8889
crap 3.0123
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Changes all keys in an array.
269
     *
270
     * @param array $array <p>The array to work on</p>
271
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
272
     *                     or <strong>CASE_LOWER</strong> (default)</p>
273
     *
274
     * @return string[] an array with its keys lower or uppercased
275
     */
276 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
277
    {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => $value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower($key)
290 2
                : self::strtoupper($key);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
312
    {
313 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
314 16
        if ($posStart === false) {
315 2
            return '';
316
        }
317
318 14
        $substrIndex = $posStart + self::strlen($start, $encoding);
319 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
320
        if (
321 14
            $posEnd === false
322
            ||
323 14
            $posEnd === $substrIndex
324
        ) {
325 4
            return '';
326
        }
327
328 10
        $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
329
330 10
        if ($return === false) {
331
            return '';
332
        }
333
334 10
        return $return;
335
    }
336
337
    /**
338
     * Convert binary into an string.
339
     *
340
     * @param mixed $bin 1|0
341
     *
342
     * @return string
343
     */
344 2
    public static function binary_to_str($bin): string
345
    {
346 2
        if (!isset($bin[0])) {
347
            return '';
348
        }
349
350 2
        $convert = \base_convert($bin, 2, 16);
351 2
        if ($convert === '0') {
352 1
            return '';
353
        }
354
355 2
        return \pack('H*', $convert);
356
    }
357
358
    /**
359
     * Returns the UTF-8 Byte Order Mark Character.
360
     *
361
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
362
     *
363
     * @return string UTF-8 Byte Order Mark
364
     */
365 4
    public static function bom(): string
366
    {
367 4
        return "\xef\xbb\xbf";
368
    }
369
370
    /**
371
     * @alias of UTF8::chr_map()
372
     *
373
     * @see   UTF8::chr_map()
374
     *
375
     * @param array|string $callback
376
     * @param string       $str
377
     *
378
     * @return string[]
379
     */
380 2
    public static function callback($callback, string $str): array
381
    {
382 2
        return self::chr_map($callback, $str);
383
    }
384
385
    /**
386
     * Returns the character at $index, with indexes starting at 0.
387
     *
388
     * @param string $str
389
     * @param int    $index    <p>Position of the character.</p>
390
     * @param string $encoding [optional] <p>Default is UTF-8</p>
391
     *
392
     * @return string the character at $index
393
     */
394 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
395
    {
396 9
        return (string) self::substr($str, $index, 1, $encoding);
397
    }
398
399
    /**
400
     * Returns an array consisting of the characters in the string.
401
     *
402
     * @param string $str <p>The input string.</p>
403
     *
404
     * @return string[] an array of chars
405
     */
406 3
    public static function chars(string $str): array
407
    {
408 3
        return self::str_split($str, 1);
409
    }
410
411
    /**
412
     * This method will auto-detect your server environment for UTF-8 support.
413
     *
414
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
415
     */
416 37
    public static function checkForSupport()
417
    {
418 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
419
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
420
421
            // http://php.net/manual/en/book.mbstring.php
422
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
423
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
424
425
            // http://php.net/manual/en/book.iconv.php
426
            self::$SUPPORT['iconv'] = self::iconv_loaded();
427
428
            // http://php.net/manual/en/book.intl.php
429
            self::$SUPPORT['intl'] = self::intl_loaded();
430
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
431
432
            if (
433
                self::$SUPPORT['intl'] === true
434
                &&
435
                \function_exists('transliterator_list_ids') === true
436
            ) {
437
                /** @noinspection PhpComposerExtensionStubsInspection */
438
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
439
            }
440
441
            // http://php.net/manual/en/class.intlchar.php
442
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
443
444
            // http://php.net/manual/en/book.ctype.php
445
            self::$SUPPORT['ctype'] = self::ctype_loaded();
446
447
            // http://php.net/manual/en/class.finfo.php
448
            self::$SUPPORT['finfo'] = self::finfo_loaded();
449
450
            // http://php.net/manual/en/book.json.php
451
            self::$SUPPORT['json'] = self::json_loaded();
452
453
            // http://php.net/manual/en/book.pcre.php
454
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
455
456
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
457
        }
458 37
    }
459
460
    /**
461
     * Generates a UTF-8 encoded character from the given code point.
462
     *
463
     * INFO: opposite to UTF8::ord()
464
     *
465
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
466
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
467
     *
468
     * @return string|null multi-byte character, returns null on failure or empty input
469
     */
470 17
    public static function chr($code_point, string $encoding = 'UTF-8')
471
    {
472
        // init
473 17
        static $CHAR_CACHE = [];
474
475 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
476
            self::checkForSupport();
477
        }
478
479 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
480 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
481
        }
482
483
        if (
484 17
            $encoding !== 'UTF-8'
485
            &&
486 17
            $encoding !== 'ISO-8859-1'
487
            &&
488 17
            $encoding !== 'WINDOWS-1252'
489
            &&
490 17
            self::$SUPPORT['mbstring'] === false
491
        ) {
492
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
493
        }
494
495 17
        $cacheKey = $code_point . $encoding;
496 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
497 16
            return $CHAR_CACHE[$cacheKey];
498
        }
499
500 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
501
502 10
            if (self::$CHR === null) {
503
                $chrTmp = self::getData('chr');
504
                if ($chrTmp) {
505
                    self::$CHR = (array) $chrTmp;
506
                }
507
            }
508
509 10
            $chr = self::$CHR[$code_point];
510
511 10
            if ($encoding !== 'UTF-8') {
512 1
                $chr = self::encode($encoding, $chr);
513
            }
514
515 10
            return $CHAR_CACHE[$cacheKey] = $chr;
516
        }
517
518 7
        if (self::$SUPPORT['intlChar'] === true) {
519
            /** @noinspection PhpComposerExtensionStubsInspection */
520 7
            $chr = \IntlChar::chr($code_point);
521
522 7
            if ($encoding !== 'UTF-8') {
523
                $chr = self::encode($encoding, $chr);
524
            }
525
526 7
            return $CHAR_CACHE[$cacheKey] = $chr;
527
        }
528
529
        if (self::$CHR === null) {
530
            $chrTmp = self::getData('chr');
531
            if ($chrTmp) {
532
                self::$CHR = (array) $chrTmp;
533
            }
534
        }
535
536
        $code_point = (int) $code_point;
537
        if ($code_point <= 0x7F) {
538
            $chr = self::$CHR[$code_point];
539
        } elseif ($code_point <= 0x7FF) {
540
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
541
                   self::$CHR[($code_point & 0x3F) + 0x80];
542
        } elseif ($code_point <= 0xFFFF) {
543
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
544
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
545
                   self::$CHR[($code_point & 0x3F) + 0x80];
546
        } else {
547
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
548
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
549
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
550
                   self::$CHR[($code_point & 0x3F) + 0x80];
551
        }
552
553
        if ($encoding !== 'UTF-8') {
554
            $chr = self::encode($encoding, $chr);
555
        }
556
557
        return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560
    /**
561
     * Applies callback to all characters of a string.
562
     *
563
     * @param array|string $callback <p>The callback function.</p>
564
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
565
     *
566
     * @return string[] the outcome of callback
567
     */
568 2
    public static function chr_map($callback, string $str): array
569
    {
570 2
        $chars = self::split($str);
571
572 2
        return \array_map($callback, $chars);
573
    }
574
575
    /**
576
     * Generates an array of byte length of each character of a Unicode string.
577
     *
578
     * 1 byte => U+0000  - U+007F
579
     * 2 byte => U+0080  - U+07FF
580
     * 3 byte => U+0800  - U+FFFF
581
     * 4 byte => U+10000 - U+10FFFF
582
     *
583
     * @param string $str <p>The original unicode string.</p>
584
     *
585
     * @return int[] an array of byte lengths of each character
586
     */
587 4
    public static function chr_size_list(string $str): array
588
    {
589 4
        if ($str === '') {
590 4
            return [];
591
        }
592
593 4
        $strSplit = self::split($str);
594
595 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
596
            self::checkForSupport();
597
        }
598
599 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
600
            return \array_map(
601
                static function ($data) {
602
                    return self::strlen_in_byte($data);
603
                },
604
                $strSplit
605
            );
606
        }
607
608 4
        return \array_map('\strlen', $strSplit);
609
    }
610
611
    /**
612
     * Get a decimal code representation of a specific character.
613
     *
614
     * @param string $char <p>The input character.</p>
615
     *
616
     * @return int
617
     */
618 4
    public static function chr_to_decimal(string $char): int
619
    {
620 4
        $code = self::ord($char[0]);
621 4
        $bytes = 1;
622
623 4
        if (!($code & 0x80)) {
624
            // 0xxxxxxx
625 4
            return $code;
626
        }
627
628 4
        if (($code & 0xe0) === 0xc0) {
629
            // 110xxxxx
630 4
            $bytes = 2;
631 4
            $code &= ~0xc0;
632 4
        } elseif (($code & 0xf0) === 0xe0) {
633
            // 1110xxxx
634 4
            $bytes = 3;
635 4
            $code &= ~0xe0;
636 2
        } elseif (($code & 0xf8) === 0xf0) {
637
            // 11110xxx
638 2
            $bytes = 4;
639 2
            $code &= ~0xf0;
640
        }
641
642 4
        for ($i = 2; $i <= $bytes; ++$i) {
643
            // 10xxxxxx
644 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
645
        }
646
647 4
        return $code;
648
    }
649
650
    /**
651
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
652
     *
653
     * @param int|string $char <p>The input character</p>
654
     * @param string     $pfix [optional]
655
     *
656
     * @return string The code point encoded as U+xxxx
657
     */
658 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
659
    {
660 2
        if ($char === '') {
661 2
            return '';
662
        }
663
664 2
        if ($char === '&#0;') {
665 2
            $char = '';
666
        }
667
668 2
        return self::int_to_hex(self::ord($char), $pfix);
669
    }
670
671
    /**
672
     * alias for "UTF8::chr_to_decimal()"
673
     *
674
     * @see UTF8::chr_to_decimal()
675
     *
676
     * @param string $chr
677
     *
678
     * @return int
679
     */
680 2
    public static function chr_to_int(string $chr): int
681
    {
682 2
        return self::chr_to_decimal($chr);
683
    }
684
685
    /**
686
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
687
     *
688
     * @param string $body     <p>The original string to be split.</p>
689
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
690
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
691
     *
692
     * @return string the chunked string
693
     */
694 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
695
    {
696 4
        return \implode($end, self::split($body, $chunklen));
697
    }
698
699
    /**
700
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
701
     *
702
     * @param string $str                           <p>The string to be sanitized.</p>
703
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
704
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
705
     *                                              whitespace.</p>
706
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
707
     *                                              e.g.: "…"
708
     *                                              => "..."</p>
709
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
710
     *                                              combination with
711
     *                                              $normalize_whitespace</p>
712
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
713
     *                                              mark e.g.: "�"</p>
714
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
715
     *                                              characters e.g.: "\0"</p>
716
     *
717
     * @return string clean UTF-8 encoded string
718
     */
719 111
    public static function clean(
720
        string $str,
721
        bool $remove_bom = false,
722
        bool $normalize_whitespace = false,
723
        bool $normalize_msword = false,
724
        bool $keep_non_breaking_space = false,
725
        bool $replace_diamond_question_mark = false,
726
        bool $remove_invisible_characters = true
727
    ): string {
728
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
729
        // caused connection reset problem on larger strings
730
731 111
        $regx = '/
732
          (
733
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
734
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
735
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
736
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
737
            ){1,100}                      # ...one or more times
738
          )
739
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
740
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
741
        /x';
742 111
        $str = (string) \preg_replace($regx, '$1', $str);
743
744 111
        if ($replace_diamond_question_mark === true) {
745 60
            $str = self::replace_diamond_question_mark($str, '');
746
        }
747
748 111
        if ($remove_invisible_characters === true) {
749 111
            $str = self::remove_invisible_characters($str);
750
        }
751
752 111
        if ($normalize_whitespace === true) {
753 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
754
        }
755
756 111
        if ($normalize_msword === true) {
757 32
            $str = self::normalize_msword($str);
758
        }
759
760 111
        if ($remove_bom === true) {
761 62
            $str = self::remove_bom($str);
762
        }
763
764 111
        return $str;
765
    }
766
767
    /**
768
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
769
     *
770
     * @param string $str <p>The input string.</p>
771
     *
772
     * @return string
773
     */
774 33
    public static function cleanup($str): string
775
    {
776
        // init
777 33
        $str = (string) $str;
778
779 33
        if ($str === '') {
780 5
            return '';
781
        }
782
783
        // fixed ISO <-> UTF-8 Errors
784 33
        $str = self::fix_simple_utf8($str);
785
786
        // remove all none UTF-8 symbols
787
        // && remove diamond question mark (�)
788
        // && remove remove invisible characters (e.g. "\0")
789
        // && remove BOM
790
        // && normalize whitespace chars (but keep non-breaking-spaces)
791 33
        return self::clean(
792 33
            $str,
793 33
            true,
794 33
            true,
795 33
            false,
796 33
            true,
797 33
            true,
798 33
            true
799
        );
800
    }
801
802
    /**
803
     * Accepts a string or a array of strings and returns an array of Unicode code points.
804
     *
805
     * INFO: opposite to UTF8::string()
806
     *
807
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
808
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
809
     *                                 default, code points will be returned as integers.</p>
810
     *
811
     * @return array<int|string>
812
     *                           The array of code points:<br>
813
     *                           array<int> for $u_style === false<br>
814
     *                           array<string> for $u_style === true<br>
815
     */
816 12
    public static function codepoints($arg, bool $u_style = false): array
817
    {
818 12
        if (\is_string($arg) === true) {
819 12
            $arg = self::split($arg);
820
        }
821
822 12
        $arg = \array_map(
823
            [
824 12
                self::class,
825
                'ord',
826
            ],
827 12
            $arg
828
        );
829
830 12
        if (\count($arg) === 0) {
831 7
            return [];
832
        }
833
834 11
        if ($u_style) {
835 2
            $arg = \array_map(
836
                [
837 2
                    self::class,
838
                    'int_to_hex',
839
                ],
840 2
                $arg
841
            );
842
        }
843
844 11
        return $arg;
845
    }
846
847
    /**
848
     * Trims the string and replaces consecutive whitespace characters with a
849
     * single space. This includes tabs and newline characters, as well as
850
     * multibyte whitespace such as the thin space and ideographic space.
851
     *
852
     * @param string $str <p>The input string.</p>
853
     *
854
     * @return string string with a trimmed $str and condensed whitespace
855
     */
856 13
    public static function collapse_whitespace(string $str): string
857
    {
858 13
        return self::trim(
859 13
            self::regex_replace($str, '[[:space:]]+', ' ')
860
        );
861
    }
862
863
    /**
864
     * Returns count of characters used in a string.
865
     *
866
     * @param string $str       <p>The input string.</p>
867
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
868
     *
869
     * @return int[] an associative array of Character as keys and
870
     *               their count as values
871
     */
872 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
873
    {
874 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
875
    }
876
877
    /**
878
     * Remove css media-queries.
879
     *
880
     * @param string $str
881
     *
882
     * @return string
883
     */
884 1
    public static function css_stripe_media_queries(string $str): string
885
    {
886 1
        return (string) \preg_replace(
887 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
888 1
            '',
889 1
            $str
890
        );
891
    }
892
893
    /**
894
     * Checks whether ctype is available on the server.
895
     *
896
     * @return bool
897
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
898
     */
899
    public static function ctype_loaded(): bool
900
    {
901
        return \extension_loaded('ctype');
902
    }
903
904
    /**
905
     * Converts a int-value into an UTF-8 character.
906
     *
907
     * @param mixed $int
908
     *
909
     * @return string
910
     */
911 10
    public static function decimal_to_chr($int): string
912
    {
913 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
914
    }
915
916
    /**
917
     * Decodes a MIME header field
918
     *
919
     * @param string $str
920
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
921
     *
922
     * @return false|string
923
     *                      A decoded MIME field on success,
924
     *                      or false if an error occurs during the decoding
925
     */
926
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
927
    {
928
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
929
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
930
        }
931
932
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
933
            self::checkForSupport();
934
        }
935
936
        if (self::$SUPPORT['iconv'] === true) {
937
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
938
        }
939
940
        if ($encoding !== 'UTF-8') {
941
            $str = self::encode($encoding, $str);
942
        }
943
944
        return \mb_decode_mimeheader($str);
945
    }
946
947
    /**
948
     * Encode a string with a new charset-encoding.
949
     *
950
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
951
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
952
     *
953
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
954
     * @param string $str                    <p>The input string</p>
955
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
956
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
957
     *                                       string-encoding</p>
958
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
959
     *                                       A empty string will trigger the autodetect anyway.</p>
960
     *
961
     * @return string
962
     */
963 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
964
    {
965 28
        if ($str === '' || $toEncoding === '') {
966 12
            return $str;
967
        }
968
969 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
970 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
971
        }
972
973 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
974 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
975
        }
976
977 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
978
            return $str;
979
        }
980
981 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
982
            self::checkForSupport();
983
        }
984
985 28
        if ($toEncoding === 'JSON') {
986 1
            $return = self::json_encode($str);
987 1
            if ($return === false) {
988
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
989
            }
990
991 1
            return $return;
992
        }
993 28
        if ($fromEncoding === 'JSON') {
994 1
            $str = self::json_decode($str);
995 1
            $fromEncoding = '';
996
        }
997
998 28
        if ($toEncoding === 'BASE64') {
999 2
            return \base64_encode($str);
1000
        }
1001 28
        if ($fromEncoding === 'BASE64') {
1002 2
            $str = \base64_decode($str, true);
1003 2
            $fromEncoding = '';
1004
        }
1005
1006 28
        if ($toEncoding === 'HTML-ENTITIES') {
1007 2
            return self::html_encode($str, true, 'UTF-8');
1008
        }
1009 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1010 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1011 2
            $fromEncoding = '';
1012
        }
1013
1014 28
        $fromEncodingDetected = false;
1015
        if (
1016 28
            $autodetectFromEncoding === true
1017
            ||
1018 28
            !$fromEncoding
1019
        ) {
1020 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1021
        }
1022
1023
        // DEBUG
1024
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1025
1026 28
        if ($fromEncodingDetected !== false) {
1027 24
            $fromEncoding = $fromEncodingDetected;
1028 6
        } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1029
            // fallback for the "autodetect"-mode
1030 6
            return self::to_utf8($str);
1031
        }
1032
1033
        if (
1034 24
            !$fromEncoding
1035
            ||
1036 24
            $fromEncoding === $toEncoding
1037
        ) {
1038 15
            return $str;
1039
        }
1040
1041
        if (
1042 18
            $toEncoding === 'UTF-8'
1043
            &&
1044
            (
1045 17
                $fromEncoding === 'WINDOWS-1252'
1046
                ||
1047 18
                $fromEncoding === 'ISO-8859-1'
1048
            )
1049
        ) {
1050 14
            return self::to_utf8($str);
1051
        }
1052
1053
        if (
1054 10
            $toEncoding === 'ISO-8859-1'
1055
            &&
1056
            (
1057 5
                $fromEncoding === 'WINDOWS-1252'
1058
                ||
1059 10
                $fromEncoding === 'UTF-8'
1060
            )
1061
        ) {
1062 5
            return self::to_iso8859($str);
1063
        }
1064
1065
        if (
1066 9
            $toEncoding !== 'UTF-8'
1067
            &&
1068 9
            $toEncoding !== 'ISO-8859-1'
1069
            &&
1070 9
            $toEncoding !== 'WINDOWS-1252'
1071
            &&
1072 9
            self::$SUPPORT['mbstring'] === false
1073
        ) {
1074
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1075
        }
1076
1077 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1078
            self::checkForSupport();
1079
        }
1080
1081 9
        if (self::$SUPPORT['mbstring'] === true) {
1082
            // info: do not use the symfony polyfill here
1083 9
            $strEncoded = \mb_convert_encoding(
1084 9
                $str,
1085 9
                $toEncoding,
1086 9
                $fromEncoding
1087
            );
1088
1089 9
            if ($strEncoded) {
1090 9
                return $strEncoded;
1091
            }
1092
        }
1093
1094
        $return = \iconv($fromEncoding, $toEncoding, $str);
1095
        if ($return !== false) {
1096
            return $return;
1097
        }
1098
1099
        return $str;
1100
    }
1101
1102
    /**
1103
     * @param string $str
1104
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1105
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1106
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1107
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1108
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1109
     *
1110
     * @return false|string
1111
     *                      An encoded MIME field on success,
1112
     *                      or false if an error occurs during the encoding
1113
     */
1114
    public static function encode_mimeheader(
1115
        $str,
1116
        $fromCharset = 'UTF-8',
1117
        $toCharset = 'UTF-8',
1118
        $transferEncoding = 'Q',
1119
        $linefeed = "\r\n",
1120
        $indent = 76
1121
    ) {
1122
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1123
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1124
        }
1125
1126
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1127
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1128
        }
1129
1130
        return \iconv_mime_encode(
1131
            '',
1132
            $str,
1133
            [
1134
                'scheme'           => $transferEncoding,
1135
                'line-length'      => $indent,
1136
                'input-charset'    => $fromCharset,
1137
                'output-charset'   => $toCharset,
1138
                'line-break-chars' => $linefeed,
1139
            ]
1140
        );
1141
    }
1142
1143
    /**
1144
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1145
     *
1146
     * @param string   $str                    <p>The input string.</p>
1147
     * @param string   $search                 <p>The searched string.</p>
1148
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1149
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1150
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1151
     *
1152
     * @return string
1153
     */
1154 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1155
    {
1156 1
        if ($str === '') {
1157 1
            return '';
1158
        }
1159
1160 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1161
1162 1
        if ($length === null) {
1163 1
            $length = (int) \round(self::strlen($str, $encoding) / 2, 0);
1164
        }
1165
1166 1
        if (empty($search)) {
1167 1
            if ($length > 0) {
1168 1
                $stringLength = (int) self::strlen($str, $encoding);
1169 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1170
            } else {
1171 1
                $end = 0;
1172
            }
1173
1174 1
            $pos = (int) \min(
1175 1
                self::strpos($str, ' ', $end, $encoding),
1176 1
                self::strpos($str, '.', $end, $encoding)
1177
            );
1178
1179 1
            if ($pos) {
1180 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1181 1
                if ($strSub === false) {
1182
                    return '';
1183
                }
1184
1185 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1186
            }
1187
1188
            return $str;
1189
        }
1190
1191 1
        $wordPos = self::stripos($str, $search, 0, $encoding);
1192 1
        $halfSide = (int) ($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1193
1194 1
        $pos_start = 0;
1195 1
        if ($halfSide > 0) {
1196 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1197 1
            if ($halfText !== false) {
1198 1
                $pos_start = (int) \max(
1199 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1200 1
                    self::strrpos($halfText, '.', 0, $encoding)
1201
                );
1202
            }
1203
        }
1204
1205 1
        if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type false|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1206 1
            $offset = $pos_start + $length - 1;
1207 1
            $realLength = (int) self::strlen($str, $encoding);
1208
1209 1
            if ($offset > $realLength) {
1210
                $offset = $realLength;
1211
            }
1212
1213 1
            $pos_end = (int) \min(
1214 1
                    self::strpos($str, ' ', $offset, $encoding),
1215 1
                    self::strpos($str, '.', $offset, $encoding)
1216 1
                ) - $pos_start;
1217
1218 1
            if (!$pos_end || $pos_end <= 0) {
1219 1
                $strSub = self::substr($str, $pos_start, (int) self::strlen($str), $encoding);
1220 1
                if ($strSub !== false) {
1221 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1222
                } else {
1223 1
                    $extract = '';
1224
                }
1225
            } else {
1226 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1227 1
                if ($strSub !== false) {
1228 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1229
                } else {
1230 1
                    $extract = '';
1231
                }
1232
            }
1233
        } else {
1234 1
            $offset = $length - 1;
1235 1
            $trueLength = (int) self::strlen($str, $encoding);
1236
1237 1
            if ($offset > $trueLength) {
1238
                $offset = $trueLength;
1239
            }
1240
1241 1
            $pos_end = \min(
1242 1
                self::strpos($str, ' ', $offset, $encoding),
1243 1
                self::strpos($str, '.', $offset, $encoding)
1244
            );
1245
1246 1
            if ($pos_end) {
1247 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1248 1
                if ($strSub !== false) {
1249 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1250
                } else {
1251 1
                    $extract = '';
1252
                }
1253
            } else {
1254 1
                $extract = $str;
1255
            }
1256
        }
1257
1258 1
        return $extract;
1259
    }
1260
1261
    /**
1262
     * Reads entire file into a string.
1263
     *
1264
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1265
     *
1266
     * @see http://php.net/manual/en/function.file-get-contents.php
1267
     *
1268
     * @param string        $filename         <p>
1269
     *                                        Name of the file to read.
1270
     *                                        </p>
1271
     * @param bool          $use_include_path [optional] <p>
1272
     *                                        Prior to PHP 5, this parameter is called
1273
     *                                        use_include_path and is a bool.
1274
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1275
     *                                        to trigger include path
1276
     *                                        search.
1277
     *                                        </p>
1278
     * @param resource|null $context          [optional] <p>
1279
     *                                        A valid context resource created with
1280
     *                                        stream_context_create. If you don't need to use a
1281
     *                                        custom context, you can skip this parameter by &null;.
1282
     *                                        </p>
1283
     * @param int|null      $offset           [optional] <p>
1284
     *                                        The offset where the reading starts.
1285
     *                                        </p>
1286
     * @param int|null      $maxLength        [optional] <p>
1287
     *                                        Maximum length of data read. The default is to read until end
1288
     *                                        of file is reached.
1289
     *                                        </p>
1290
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1291
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1292
     *                                        some files, because they used non default utf-8 chars. Binary files
1293
     *                                        like images or pdf will not be converted.</p>
1294
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1295
     *                                        A empty string will trigger the autodetect anyway.</p>
1296
     *
1297
     * @return false|string the function returns the read data or false on failure
1298
     */
1299 12
    public static function file_get_contents(
1300
        string $filename,
1301
        bool $use_include_path = false,
1302
        $context = null,
1303
        int $offset = null,
1304
        int $maxLength = null,
1305
        int $timeout = 10,
1306
        bool $convertToUtf8 = true,
1307
        string $fromEncoding = ''
1308
    ) {
1309
        // init
1310 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1311
1312 12
        if ($timeout && $context === null) {
1313 9
            $context = \stream_context_create(
1314
                [
1315
                    'http' => [
1316 9
                        'timeout' => $timeout,
1317
                    ],
1318
                ]
1319
            );
1320
        }
1321
1322 12
        if ($offset === null) {
1323 12
            $offset = 0;
1324
        }
1325
1326 12
        if (\is_int($maxLength) === true) {
1327 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1328
        } else {
1329 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1330
        }
1331
1332
        // return false on error
1333 12
        if ($data === false) {
1334
            return false;
1335
        }
1336
1337 12
        if ($convertToUtf8 === true) {
1338
            if (
1339 12
                self::is_binary($data, true) === true
1340
                &&
1341 12
                self::is_utf16($data, false) === false
1342
                &&
1343 12
                self::is_utf32($data, false) === false
1344 7
            ) {
1345
                // do nothing, it's binary and not UTF16 or UTF32
1346
            } else {
1347 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1348 9
                $data = self::cleanup($data);
1349
            }
1350
        }
1351
1352 12
        return $data;
1353
    }
1354
1355
    /**
1356
     * Checks if a file starts with BOM (Byte Order Mark) character.
1357
     *
1358
     * @param string $file_path <p>Path to a valid file.</p>
1359
     *
1360
     * @throws \RuntimeException if file_get_contents() returned false
1361
     *
1362
     * @return bool
1363
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1364
     */
1365 2
    public static function file_has_bom(string $file_path): bool
1366
    {
1367 2
        $file_content = \file_get_contents($file_path);
1368 2
        if ($file_content === false) {
1369
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1370
        }
1371
1372 2
        return self::string_has_bom($file_content);
1373
    }
1374
1375
    /**
1376
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1377
     *
1378
     * @param mixed  $var
1379
     * @param int    $normalization_form
1380
     * @param string $leading_combining
1381
     *
1382
     * @return mixed
1383
     */
1384 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1385
    {
1386 43
        switch (\gettype($var)) {
1387 43
            case 'array':
1388 6
                foreach ($var as $k => $v) {
1389
                    /** @noinspection AlterInForeachInspection */
1390 6
                    $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1391
                }
1392
1393 6
                break;
1394 43
            case 'object':
1395 4
                foreach ($var as $k => $v) {
1396 4
                    $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1397
                }
1398
1399 4
                break;
1400 43
            case 'string':
1401
1402 43
                if (\strpos($var, "\r") !== false) {
1403
                    // Workaround https://bugs.php.net/65732
1404 3
                    $var = self::normalize_line_ending($var);
1405
                }
1406
1407 43
                if (self::is_ascii($var) === false) {
1408
                    /** @noinspection PhpUndefinedClassInspection */
1409 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1410 21
                        $n = '-';
1411
                    } else {
1412
                        /** @noinspection PhpUndefinedClassInspection */
1413 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1414
1415 13
                        if (isset($n[0])) {
1416 7
                            $var = $n;
1417
                        } else {
1418 9
                            $var = self::encode('UTF-8', $var, true);
1419
                        }
1420
                    }
1421
1422
                    if (
1423 26
                        $var[0] >= "\x80"
1424
                        &&
1425 26
                        isset($n[0], $leading_combining[0])
1426
                        &&
1427 26
                        \preg_match('/^\p{Mn}/u', $var)
1428
                    ) {
1429
                        // Prevent leading combining chars
1430
                        // for NFC-safe concatenations.
1431 3
                        $var = $leading_combining . $var;
1432
                    }
1433
                }
1434
1435 43
                break;
1436
        }
1437
1438 43
        return $var;
1439
    }
1440
1441
    /**
1442
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1443
     *
1444
     * Gets a specific external variable by name and optionally filters it
1445
     *
1446
     * @see  http://php.net/manual/en/function.filter-input.php
1447
     *
1448
     * @param int    $type          <p>
1449
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1450
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1451
     *                              <b>INPUT_ENV</b>.
1452
     *                              </p>
1453
     * @param string $variable_name <p>
1454
     *                              Name of a variable to get.
1455
     *                              </p>
1456
     * @param int    $filter        [optional] <p>
1457
     *                              The ID of the filter to apply. The
1458
     *                              manual page lists the available filters.
1459
     *                              </p>
1460
     * @param mixed  $options       [optional] <p>
1461
     *                              Associative array of options or bitwise disjunction of flags. If filter
1462
     *                              accepts options, flags can be provided in "flags" field of array.
1463
     *                              </p>
1464
     *
1465
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1466
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1467
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1468
     */
1469
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1470
    {
1471
        if (\func_num_args() < 4) {
1472
            $var = \filter_input($type, $variable_name, $filter);
1473
        } else {
1474
            $var = \filter_input($type, $variable_name, $filter, $options);
1475
        }
1476
1477
        return self::filter($var);
1478
    }
1479
1480
    /**
1481
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1482
     *
1483
     * Gets external variables and optionally filters them
1484
     *
1485
     * @see  http://php.net/manual/en/function.filter-input-array.php
1486
     *
1487
     * @param int   $type       <p>
1488
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1489
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1490
     *                          <b>INPUT_ENV</b>.
1491
     *                          </p>
1492
     * @param mixed $definition [optional] <p>
1493
     *                          An array defining the arguments. A valid key is a string
1494
     *                          containing a variable name and a valid value is either a filter type, or an array
1495
     *                          optionally specifying the filter, flags and options. If the value is an
1496
     *                          array, valid keys are filter which specifies the
1497
     *                          filter type,
1498
     *                          flags which specifies any flags that apply to the
1499
     *                          filter, and options which specifies any options that
1500
     *                          apply to the filter. See the example below for a better understanding.
1501
     *                          </p>
1502
     *                          <p>
1503
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1504
     *                          input array are filtered by this filter.
1505
     *                          </p>
1506
     * @param bool  $add_empty  [optional] <p>
1507
     *                          Add missing keys as <b>NULL</b> to the return value.
1508
     *                          </p>
1509
     *
1510
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1511
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1512
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1513
     *               is not set and <b>NULL</b> if the filter fails.
1514
     */
1515
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1516
    {
1517
        if (\func_num_args() < 2) {
1518
            $a = \filter_input_array($type);
1519
        } else {
1520
            $a = \filter_input_array($type, $definition, $add_empty);
1521
        }
1522
1523
        return self::filter($a);
1524
    }
1525
1526
    /**
1527
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1528
     *
1529
     * Filters a variable with a specified filter
1530
     *
1531
     * @see  http://php.net/manual/en/function.filter-var.php
1532
     *
1533
     * @param mixed $variable <p>
1534
     *                        Value to filter.
1535
     *                        </p>
1536
     * @param int   $filter   [optional] <p>
1537
     *                        The ID of the filter to apply. The
1538
     *                        manual page lists the available filters.
1539
     *                        </p>
1540
     * @param mixed $options  [optional] <p>
1541
     *                        Associative array of options or bitwise disjunction of flags. If filter
1542
     *                        accepts options, flags can be provided in "flags" field of array. For
1543
     *                        the "callback" filter, callable type should be passed. The
1544
     *                        callback must accept one argument, the value to be filtered, and return
1545
     *                        the value after filtering/sanitizing it.
1546
     *                        </p>
1547
     *                        <p>
1548
     *                        <code>
1549
     *                        // for filters that accept options, use this format
1550
     *                        $options = array(
1551
     *                        'options' => array(
1552
     *                        'default' => 3, // value to return if the filter fails
1553
     *                        // other options here
1554
     *                        'min_range' => 0
1555
     *                        ),
1556
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1557
     *                        );
1558
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1559
     *                        // for filter that only accept flags, you can pass them directly
1560
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1561
     *                        // for filter that only accept flags, you can also pass as an array
1562
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1563
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1564
     *                        // callback validate filter
1565
     *                        function foo($value)
1566
     *                        {
1567
     *                        // Expected format: Surname, GivenNames
1568
     *                        if (strpos($value, ", ") === false) return false;
1569
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1570
     *                        $empty = (empty($surname) || empty($givennames));
1571
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1572
     *                        if ($empty || $notstrings) {
1573
     *                        return false;
1574
     *                        } else {
1575
     *                        return $value;
1576
     *                        }
1577
     *                        }
1578
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1579
     *                        </code>
1580
     *                        </p>
1581
     *
1582
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1583
     */
1584 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1585
    {
1586 2
        if (\func_num_args() < 3) {
1587 2
            $variable = \filter_var($variable, $filter);
1588
        } else {
1589 2
            $variable = \filter_var($variable, $filter, $options);
1590
        }
1591
1592 2
        return self::filter($variable);
1593
    }
1594
1595
    /**
1596
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * Gets multiple variables and optionally filters them
1599
     *
1600
     * @see  http://php.net/manual/en/function.filter-var-array.php
1601
     *
1602
     * @param array $data       <p>
1603
     *                          An array with string keys containing the data to filter.
1604
     *                          </p>
1605
     * @param mixed $definition [optional] <p>
1606
     *                          An array defining the arguments. A valid key is a string
1607
     *                          containing a variable name and a valid value is either a
1608
     *                          filter type, or an
1609
     *                          array optionally specifying the filter, flags and options.
1610
     *                          If the value is an array, valid keys are filter
1611
     *                          which specifies the filter type,
1612
     *                          flags which specifies any flags that apply to the
1613
     *                          filter, and options which specifies any options that
1614
     *                          apply to the filter. See the example below for a better understanding.
1615
     *                          </p>
1616
     *                          <p>
1617
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1618
     *                          input array are filtered by this filter.
1619
     *                          </p>
1620
     * @param bool  $add_empty  [optional] <p>
1621
     *                          Add missing keys as <b>NULL</b> to the return value.
1622
     *                          </p>
1623
     *
1624
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1625
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1626
     *               set
1627
     */
1628 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1629
    {
1630 2
        if (\func_num_args() < 2) {
1631 2
            $a = \filter_var_array($data);
1632
        } else {
1633 2
            $a = \filter_var_array($data, $definition, $add_empty);
1634
        }
1635
1636 2
        return self::filter($a);
1637
    }
1638
1639
    /**
1640
     * Checks whether finfo is available on the server.
1641
     *
1642
     * @return bool
1643
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1644
     */
1645
    public static function finfo_loaded(): bool
1646
    {
1647
        return \class_exists('finfo');
1648
    }
1649
1650
    /**
1651
     * Returns the first $n characters of the string.
1652
     *
1653
     * @param string $str      <p>The input string.</p>
1654
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1655
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1656
     *
1657
     * @return string
1658
     */
1659 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1660
    {
1661 13
        if ($n <= 0) {
1662 4
            return '';
1663
        }
1664
1665 9
        $strSub = self::substr($str, 0, $n, $encoding);
1666 9
        if ($strSub === false) {
1667
            return '';
1668
        }
1669
1670 9
        return $strSub;
1671
    }
1672
1673
    /**
1674
     * Check if the number of unicode characters are not more than the specified integer.
1675
     *
1676
     * @param string $str      the original string to be checked
1677
     * @param int    $box_size the size in number of chars to be checked against string
1678
     *
1679
     * @return bool true if string is less than or equal to $box_size, false otherwise
1680
     */
1681 2
    public static function fits_inside(string $str, int $box_size): bool
1682
    {
1683 2
        return self::strlen($str) <= $box_size;
1684
    }
1685
1686
    /**
1687
     * Try to fix simple broken UTF-8 strings.
1688
     *
1689
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1690
     *
1691
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1692
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1693
     * See: http://en.wikipedia.org/wiki/Windows-1252
1694
     *
1695
     * @param string $str <p>The input string</p>
1696
     *
1697
     * @return string
1698
     */
1699 42
    public static function fix_simple_utf8(string $str): string
1700
    {
1701 42
        if ($str === '') {
1702 4
            return '';
1703
        }
1704
1705 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1706 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1707
1708 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1709 1
            if (self::$BROKEN_UTF8_FIX === null) {
1710 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1711
            }
1712
1713 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1714 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1715
        }
1716
1717 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1718
    }
1719
1720
    /**
1721
     * Fix a double (or multiple) encoded UTF8 string.
1722
     *
1723
     * @param string|string[] $str you can use a string or an array of strings
1724
     *
1725
     * @return string|string[]
1726
     *                         Will return the fixed input-"array" or
1727
     *                         the fixed input-"string"
1728
     */
1729 2
    public static function fix_utf8($str)
1730
    {
1731 2
        if (\is_array($str) === true) {
1732 2
            foreach ($str as $k => $v) {
1733 2
                $str[$k] = self::fix_utf8($v);
1734
            }
1735
1736 2
            return $str;
1737
        }
1738
1739 2
        $str = (string) $str;
1740 2
        $last = '';
1741 2
        while ($last !== $str) {
1742 2
            $last = $str;
1743 2
            $str = self::to_utf8(
1744 2
                self::utf8_decode($str, true)
1745
            );
1746
        }
1747
1748 2
        return $str;
1749
    }
1750
1751
    /**
1752
     * Get character of a specific character.
1753
     *
1754
     * @param string $char
1755
     *
1756
     * @return string 'RTL' or 'LTR'
1757
     */
1758 2
    public static function getCharDirection(string $char): string
1759
    {
1760 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1761
            self::checkForSupport();
1762
        }
1763
1764 2
        if (self::$SUPPORT['intlChar'] === true) {
1765
            /** @noinspection PhpComposerExtensionStubsInspection */
1766 2
            $tmpReturn = \IntlChar::charDirection($char);
1767
1768
            // from "IntlChar"-Class
1769
            $charDirection = [
1770 2
                'RTL' => [1, 13, 14, 15, 21],
1771
                'LTR' => [0, 11, 12, 20],
1772
            ];
1773
1774 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1775
                return 'LTR';
1776
            }
1777
1778 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1779 2
                return 'RTL';
1780
            }
1781
        }
1782
1783 2
        $c = static::chr_to_decimal($char);
1784
1785 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1786 2
            return 'LTR';
1787
        }
1788
1789 2
        if ($c <= 0x85e) {
1790 2
            if ($c === 0x5be ||
1791 2
                $c === 0x5c0 ||
1792 2
                $c === 0x5c3 ||
1793 2
                $c === 0x5c6 ||
1794 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1795 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1796 2
                $c === 0x608 ||
1797 2
                $c === 0x60b ||
1798 2
                $c === 0x60d ||
1799 2
                $c === 0x61b ||
1800 2
                ($c >= 0x61e && $c <= 0x64a) ||
1801
                ($c >= 0x66d && $c <= 0x66f) ||
1802
                ($c >= 0x671 && $c <= 0x6d5) ||
1803
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1804
                ($c >= 0x6ee && $c <= 0x6ef) ||
1805
                ($c >= 0x6fa && $c <= 0x70d) ||
1806
                $c === 0x710 ||
1807
                ($c >= 0x712 && $c <= 0x72f) ||
1808
                ($c >= 0x74d && $c <= 0x7a5) ||
1809
                $c === 0x7b1 ||
1810
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1811
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1812
                $c === 0x7fa ||
1813
                ($c >= 0x800 && $c <= 0x815) ||
1814
                $c === 0x81a ||
1815
                $c === 0x824 ||
1816
                $c === 0x828 ||
1817
                ($c >= 0x830 && $c <= 0x83e) ||
1818
                ($c >= 0x840 && $c <= 0x858) ||
1819 2
                $c === 0x85e
1820
            ) {
1821 2
                return 'RTL';
1822
            }
1823 2
        } elseif ($c === 0x200f) {
1824
            return 'RTL';
1825 2
        } elseif ($c >= 0xfb1d) {
1826 2
            if ($c === 0xfb1d ||
1827 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1828 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1829 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1830 2
                $c === 0xfb3e ||
1831 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1832 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1833 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1834 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1835 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1836 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1837 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1838 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1839 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1840 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1841 2
                $c === 0x10808 ||
1842 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1843 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1844 2
                $c === 0x1083c ||
1845 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1846 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1847 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1848 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1849 2
                $c === 0x1093f ||
1850 2
                $c === 0x10a00 ||
1851 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1852 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1853 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1854 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1855 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1856 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1857 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1858 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1859 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1860 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1861
            ) {
1862 2
                return 'RTL';
1863
            }
1864
        }
1865
1866 2
        return 'LTR';
1867
    }
1868
1869
    /**
1870
     * Check for php-support.
1871
     *
1872
     * @param string|null $key
1873
     *
1874
     * @return mixed
1875
     *               Return the full support-"array", if $key === null<br>
1876
     *               return bool-value, if $key is used and available<br>
1877
     *               otherwise return <strong>null</strong>
1878
     */
1879 26
    public static function getSupportInfo(string $key = null)
1880
    {
1881 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1882
            self::checkForSupport();
1883
        }
1884
1885 26
        if ($key === null) {
1886 4
            return self::$SUPPORT;
1887
        }
1888
1889 24
        if (!isset(self::$SUPPORT[$key])) {
1890 2
            return null;
1891
        }
1892
1893 22
        return self::$SUPPORT[$key];
1894
    }
1895
1896
    /**
1897
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1898
     *          if you need more supported types, please use e.g. "finfo"
1899
     *
1900
     * @param string $str
1901
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1902
     *
1903
     * @return array
1904
     *               with this keys: 'ext', 'mime', 'type'
1905
     */
1906 39
    public static function get_file_type(
1907
        string $str,
1908
        array $fallback = [
1909
            'ext'  => null,
1910
            'mime' => 'application/octet-stream',
1911
            'type' => null,
1912
        ]
1913
    ): array {
1914 39
        if ($str === '') {
1915
            return $fallback;
1916
        }
1917
1918 39
        $str_info = self::substr_in_byte($str, 0, 2);
1919 39
        if ($str_info === false || self::strlen_in_byte($str_info) !== 2) {
1920 10
            return $fallback;
1921
        }
1922
1923 35
        $str_info = \unpack('C2chars', $str_info);
1924 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
1925
1926
        // DEBUG
1927
        //var_dump($type_code);
1928
1929
        switch ($type_code) {
1930 35
            case 3780:
1931 5
                $ext = 'pdf';
1932 5
                $mime = 'application/pdf';
1933 5
                $type = 'binary';
1934
1935 5
                break;
1936 35
            case 7790:
1937
                $ext = 'exe';
1938
                $mime = 'application/octet-stream';
1939
                $type = 'binary';
1940
1941
                break;
1942 35
            case 7784:
1943
                $ext = 'midi';
1944
                $mime = 'audio/x-midi';
1945
                $type = 'binary';
1946
1947
                break;
1948 35
            case 8075:
1949 7
                $ext = 'zip';
1950 7
                $mime = 'application/zip';
1951 7
                $type = 'binary';
1952
1953 7
                break;
1954 35
            case 8297:
1955
                $ext = 'rar';
1956
                $mime = 'application/rar';
1957
                $type = 'binary';
1958
1959
                break;
1960 35
            case 255216:
1961
                $ext = 'jpg';
1962
                $mime = 'image/jpeg';
1963
                $type = 'binary';
1964
1965
                break;
1966 35
            case 7173:
1967
                $ext = 'gif';
1968
                $mime = 'image/gif';
1969
                $type = 'binary';
1970
1971
                break;
1972 35
            case 6677:
1973
                $ext = 'bmp';
1974
                $mime = 'image/bmp';
1975
                $type = 'binary';
1976
1977
                break;
1978 35
            case 13780:
1979 7
                $ext = 'png';
1980 7
                $mime = 'image/png';
1981 7
                $type = 'binary';
1982
1983 7
                break;
1984
            default:
1985 32
                return $fallback;
1986
        }
1987
1988
        return [
1989 7
            'ext'  => $ext,
1990 7
            'mime' => $mime,
1991 7
            'type' => $type,
1992
        ];
1993
    }
1994
1995
    /**
1996
     * @param int    $length        <p>Length of the random string.</p>
1997
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1998
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1999
     *
2000
     * @return string
2001
     */
2002 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2003
    {
2004
        // init
2005 1
        $i = 0;
2006 1
        $str = '';
2007 1
        $maxlength = self::strlen($possibleChars, $encoding);
2008
2009 1
        if ($maxlength === 0) {
2010 1
            return '';
2011
        }
2012
2013
        // add random chars
2014 1
        while ($i < $length) {
2015
            try {
2016 1
                $randInt = \random_int(0, $maxlength - 1);
2017
            } catch (\Exception $e) {
2018
                /** @noinspection RandomApiMigrationInspection */
2019
                $randInt = \mt_rand(0, $maxlength - 1);
2020
            }
2021 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2022 1
            $str .= $char;
2023 1
            ++$i;
2024
        }
2025
2026 1
        return $str;
2027
    }
2028
2029
    /**
2030
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2031
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2032
     *
2033
     * @return string
2034
     */
2035 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2036
    {
2037 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2038 1
                        \session_id() .
2039 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2040 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2041 1
                        $entropyExtra;
2042
2043 1
        $uniqueString = \uniqid($uniqueHelper, true);
2044
2045 1
        if ($md5) {
2046 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2047
        }
2048
2049 1
        return $uniqueString;
2050
    }
2051
2052
    /**
2053
     * alias for "UTF8::string_has_bom()"
2054
     *
2055
     * @see        UTF8::string_has_bom()
2056
     *
2057
     * @param string $str
2058
     *
2059
     * @return bool
2060
     *
2061
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2062
     */
2063 2
    public static function hasBom(string $str): bool
2064
    {
2065 2
        return self::string_has_bom($str);
2066
    }
2067
2068
    /**
2069
     * Returns true if the string contains a lower case char, false otherwise.
2070
     *
2071
     * @param string $str <p>The input string.</p>
2072
     *
2073
     * @return bool whether or not the string contains a lower case character
2074
     */
2075 47
    public static function has_lowercase(string $str): bool
2076
    {
2077 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2078
    }
2079
2080
    /**
2081
     * Returns true if the string contains an upper case char, false otherwise.
2082
     *
2083
     * @param string $str <p>The input string.</p>
2084
     *
2085
     * @return bool whether or not the string contains an upper case character
2086
     */
2087 12
    public static function has_uppercase(string $str): bool
2088
    {
2089 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2090
    }
2091
2092
    /**
2093
     * Converts a hexadecimal-value into an UTF-8 character.
2094
     *
2095
     * @param string $hexdec <p>The hexadecimal value.</p>
2096
     *
2097
     * @return false|string one single UTF-8 character
2098
     */
2099 4
    public static function hex_to_chr(string $hexdec)
2100
    {
2101 4
        return self::decimal_to_chr(\hexdec($hexdec));
2102
    }
2103
2104
    /**
2105
     * Converts hexadecimal U+xxxx code point representation to integer.
2106
     *
2107
     * INFO: opposite to UTF8::int_to_hex()
2108
     *
2109
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2110
     *
2111
     * @return false|int the code point, or false on failure
2112
     */
2113 2
    public static function hex_to_int($hexDec)
2114
    {
2115
        // init
2116 2
        $hexDec = (string) $hexDec;
2117
2118 2
        if ($hexDec === '') {
2119 2
            return false;
2120
        }
2121
2122 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2123 2
            return \intval($match[1], 16);
2124
        }
2125
2126 2
        return false;
2127
    }
2128
2129
    /**
2130
     * alias for "UTF8::html_entity_decode()"
2131
     *
2132
     * @see UTF8::html_entity_decode()
2133
     *
2134
     * @param string $str
2135
     * @param int    $flags
2136
     * @param string $encoding
2137
     *
2138
     * @return string
2139
     */
2140 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2141
    {
2142 4
        return self::html_entity_decode($str, $flags, $encoding);
2143
    }
2144
2145
    /**
2146
     * Converts a UTF-8 string to a series of HTML numbered entities.
2147
     *
2148
     * INFO: opposite to UTF8::html_decode()
2149
     *
2150
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2151
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2152
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2153
     *
2154
     * @return string HTML numbered entities
2155
     */
2156 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2157
    {
2158 13
        if ($str === '') {
2159 4
            return '';
2160
        }
2161
2162 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2163 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2164
        }
2165
2166 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2167
            self::checkForSupport();
2168
        }
2169
2170
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2171 13
        if (self::$SUPPORT['mbstring'] === true) {
2172 13
            $startCode = 0x00;
2173 13
            if ($keepAsciiChars === true) {
2174 13
                $startCode = 0x80;
2175
            }
2176
2177 13
            return \mb_encode_numericentity(
2178 13
                $str,
2179 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2180 13
                $encoding
2181
            );
2182
        }
2183
2184
        //
2185
        // fallback via vanilla php
2186
        //
2187
2188
        return \implode(
2189
            '',
2190
            \array_map(
2191
                static function ($chr) use ($keepAsciiChars, $encoding) {
2192
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2193
                },
2194
                self::split($str)
2195
            )
2196
        );
2197
    }
2198
2199
    /**
2200
     * UTF-8 version of html_entity_decode()
2201
     *
2202
     * The reason we are not using html_entity_decode() by itself is because
2203
     * while it is not technically correct to leave out the semicolon
2204
     * at the end of an entity most browsers will still interpret the entity
2205
     * correctly. html_entity_decode() does not convert entities without
2206
     * semicolons, so we are left with our own little solution here. Bummer.
2207
     *
2208
     * Convert all HTML entities to their applicable characters
2209
     *
2210
     * INFO: opposite to UTF8::html_encode()
2211
     *
2212
     * @see http://php.net/manual/en/function.html-entity-decode.php
2213
     *
2214
     * @param string $str      <p>
2215
     *                         The input string.
2216
     *                         </p>
2217
     * @param int    $flags    [optional] <p>
2218
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2219
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2220
     *                         <table>
2221
     *                         Available <i>flags</i> constants
2222
     *                         <tr valign="top">
2223
     *                         <td>Constant Name</td>
2224
     *                         <td>Description</td>
2225
     *                         </tr>
2226
     *                         <tr valign="top">
2227
     *                         <td><b>ENT_COMPAT</b></td>
2228
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2229
     *                         </tr>
2230
     *                         <tr valign="top">
2231
     *                         <td><b>ENT_QUOTES</b></td>
2232
     *                         <td>Will convert both double and single quotes.</td>
2233
     *                         </tr>
2234
     *                         <tr valign="top">
2235
     *                         <td><b>ENT_NOQUOTES</b></td>
2236
     *                         <td>Will leave both double and single quotes unconverted.</td>
2237
     *                         </tr>
2238
     *                         <tr valign="top">
2239
     *                         <td><b>ENT_HTML401</b></td>
2240
     *                         <td>
2241
     *                         Handle code as HTML 4.01.
2242
     *                         </td>
2243
     *                         </tr>
2244
     *                         <tr valign="top">
2245
     *                         <td><b>ENT_XML1</b></td>
2246
     *                         <td>
2247
     *                         Handle code as XML 1.
2248
     *                         </td>
2249
     *                         </tr>
2250
     *                         <tr valign="top">
2251
     *                         <td><b>ENT_XHTML</b></td>
2252
     *                         <td>
2253
     *                         Handle code as XHTML.
2254
     *                         </td>
2255
     *                         </tr>
2256
     *                         <tr valign="top">
2257
     *                         <td><b>ENT_HTML5</b></td>
2258
     *                         <td>
2259
     *                         Handle code as HTML 5.
2260
     *                         </td>
2261
     *                         </tr>
2262
     *                         </table>
2263
     *                         </p>
2264
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2265
     *
2266
     * @return string the decoded string
2267
     */
2268 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2269
    {
2270 40
        if ($str === '') {
2271 12
            return '';
2272
        }
2273
2274 40
        if (!isset($str[3])) { // examples: &; || &x;
2275 19
            return $str;
2276
        }
2277
2278
        if (
2279 39
            \strpos($str, '&') === false
2280
            ||
2281
            (
2282 39
                \strpos($str, '&#') === false
2283
                &&
2284 39
                \strpos($str, ';') === false
2285
            )
2286
        ) {
2287 18
            return $str;
2288
        }
2289
2290 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2291 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2292
        }
2293
2294 39
        if ($flags === null) {
2295 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2296
        }
2297
2298
        if (
2299 39
            $encoding !== 'UTF-8'
2300
            &&
2301 39
            $encoding !== 'ISO-8859-1'
2302
            &&
2303 39
            $encoding !== 'WINDOWS-1252'
2304
            &&
2305 39
            self::$SUPPORT['mbstring'] === false
2306
        ) {
2307
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2308
        }
2309
2310 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2311
            self::checkForSupport();
2312
        }
2313
2314
        do {
2315 39
            $str_compare = $str;
2316
2317
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2318 39
            if (self::$SUPPORT['mbstring'] === true) {
2319 39
                $str = \mb_decode_numericentity(
2320 39
                    $str,
2321 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2322 39
                    $encoding
2323
                );
2324
            } else {
2325
                $str = (string) \preg_replace_callback(
2326
                    "/&#\d{2,6};/",
2327
                    static function ($matches) use ($encoding) {
2328
                        // always fallback via symfony polyfill
2329
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2330
2331
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2332
                            return $returnTmp;
2333
                        }
2334
2335
                        return $matches[0];
2336
                    },
2337
                    $str
2338
                );
2339
            }
2340
2341
            // decode numeric & UTF16 two byte entities
2342 39
            $str = \html_entity_decode(
2343 39
                (string) \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2344 39
                $flags,
2345 39
                $encoding
2346
            );
2347 39
        } while ($str_compare !== $str);
2348
2349 39
        return $str;
2350
    }
2351
2352
    /**
2353
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2354
     *
2355
     * @param string $str
2356
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2357
     *
2358
     * @return string
2359
     */
2360 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2361
    {
2362 6
        return self::htmlspecialchars(
2363 6
            $str,
2364 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2365 6
            $encoding
2366
        );
2367
    }
2368
2369
    /**
2370
     * Remove empty html-tag.
2371
     *
2372
     * e.g.: <tag></tag>
2373
     *
2374
     * @param string $str
2375
     *
2376
     * @return string
2377
     */
2378 1
    public static function html_stripe_empty_tags(string $str): string
2379
    {
2380 1
        return (string) \preg_replace(
2381 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2382 1
            '',
2383 1
            $str
2384
        );
2385
    }
2386
2387
    /**
2388
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2389
     *
2390
     * @see http://php.net/manual/en/function.htmlentities.php
2391
     *
2392
     * @param string $str           <p>
2393
     *                              The input string.
2394
     *                              </p>
2395
     * @param int    $flags         [optional] <p>
2396
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2397
     *                              invalid code unit sequences and the used document type. The default is
2398
     *                              ENT_COMPAT | ENT_HTML401.
2399
     *                              <table>
2400
     *                              Available <i>flags</i> constants
2401
     *                              <tr valign="top">
2402
     *                              <td>Constant Name</td>
2403
     *                              <td>Description</td>
2404
     *                              </tr>
2405
     *                              <tr valign="top">
2406
     *                              <td><b>ENT_COMPAT</b></td>
2407
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2408
     *                              </tr>
2409
     *                              <tr valign="top">
2410
     *                              <td><b>ENT_QUOTES</b></td>
2411
     *                              <td>Will convert both double and single quotes.</td>
2412
     *                              </tr>
2413
     *                              <tr valign="top">
2414
     *                              <td><b>ENT_NOQUOTES</b></td>
2415
     *                              <td>Will leave both double and single quotes unconverted.</td>
2416
     *                              </tr>
2417
     *                              <tr valign="top">
2418
     *                              <td><b>ENT_IGNORE</b></td>
2419
     *                              <td>
2420
     *                              Silently discard invalid code unit sequences instead of returning
2421
     *                              an empty string. Using this flag is discouraged as it
2422
     *                              may have security implications.
2423
     *                              </td>
2424
     *                              </tr>
2425
     *                              <tr valign="top">
2426
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2427
     *                              <td>
2428
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2429
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2430
     *                              </td>
2431
     *                              </tr>
2432
     *                              <tr valign="top">
2433
     *                              <td><b>ENT_DISALLOWED</b></td>
2434
     *                              <td>
2435
     *                              Replace invalid code points for the given document type with a
2436
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2437
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2438
     *                              instance, to ensure the well-formedness of XML documents with
2439
     *                              embedded external content.
2440
     *                              </td>
2441
     *                              </tr>
2442
     *                              <tr valign="top">
2443
     *                              <td><b>ENT_HTML401</b></td>
2444
     *                              <td>
2445
     *                              Handle code as HTML 4.01.
2446
     *                              </td>
2447
     *                              </tr>
2448
     *                              <tr valign="top">
2449
     *                              <td><b>ENT_XML1</b></td>
2450
     *                              <td>
2451
     *                              Handle code as XML 1.
2452
     *                              </td>
2453
     *                              </tr>
2454
     *                              <tr valign="top">
2455
     *                              <td><b>ENT_XHTML</b></td>
2456
     *                              <td>
2457
     *                              Handle code as XHTML.
2458
     *                              </td>
2459
     *                              </tr>
2460
     *                              <tr valign="top">
2461
     *                              <td><b>ENT_HTML5</b></td>
2462
     *                              <td>
2463
     *                              Handle code as HTML 5.
2464
     *                              </td>
2465
     *                              </tr>
2466
     *                              </table>
2467
     *                              </p>
2468
     * @param string $encoding      [optional] <p>
2469
     *                              Like <b>htmlspecialchars</b>,
2470
     *                              <b>htmlentities</b> takes an optional third argument
2471
     *                              <i>encoding</i> which defines encoding used in
2472
     *                              conversion.
2473
     *                              Although this argument is technically optional, you are highly
2474
     *                              encouraged to specify the correct value for your code.
2475
     *                              </p>
2476
     * @param bool   $double_encode [optional] <p>
2477
     *                              When <i>double_encode</i> is turned off PHP will not
2478
     *                              encode existing html entities. The default is to convert everything.
2479
     *                              </p>
2480
     *
2481
     * @return string
2482
     *                <p>
2483
     *                The encoded string.
2484
     *                <br><br>
2485
     *                If the input <i>string</i> contains an invalid code unit
2486
     *                sequence within the given <i>encoding</i> an empty string
2487
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2488
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2489
     *                </p>
2490
     */
2491 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2492
    {
2493 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2494 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2495
        }
2496
2497 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2498
2499
        /**
2500
         * PHP doesn't replace a backslash to its html entity since this is something
2501
         * that's mostly used to escape characters when inserting in a database. Since
2502
         * we're using a decent database layer, we don't need this shit and we're replacing
2503
         * the double backslashes by its' html entity equivalent.
2504
         *
2505
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2506
         */
2507 9
        $str = \str_replace('\\', '&#92;', $str);
2508
2509 9
        return self::html_encode($str, true, $encoding);
2510
    }
2511
2512
    /**
2513
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2514
     *
2515
     * INFO: Take a look at "UTF8::htmlentities()"
2516
     *
2517
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2518
     *
2519
     * @param string $str           <p>
2520
     *                              The string being converted.
2521
     *                              </p>
2522
     * @param int    $flags         [optional] <p>
2523
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2524
     *                              invalid code unit sequences and the used document type. The default is
2525
     *                              ENT_COMPAT | ENT_HTML401.
2526
     *                              <table>
2527
     *                              Available <i>flags</i> constants
2528
     *                              <tr valign="top">
2529
     *                              <td>Constant Name</td>
2530
     *                              <td>Description</td>
2531
     *                              </tr>
2532
     *                              <tr valign="top">
2533
     *                              <td><b>ENT_COMPAT</b></td>
2534
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2535
     *                              </tr>
2536
     *                              <tr valign="top">
2537
     *                              <td><b>ENT_QUOTES</b></td>
2538
     *                              <td>Will convert both double and single quotes.</td>
2539
     *                              </tr>
2540
     *                              <tr valign="top">
2541
     *                              <td><b>ENT_NOQUOTES</b></td>
2542
     *                              <td>Will leave both double and single quotes unconverted.</td>
2543
     *                              </tr>
2544
     *                              <tr valign="top">
2545
     *                              <td><b>ENT_IGNORE</b></td>
2546
     *                              <td>
2547
     *                              Silently discard invalid code unit sequences instead of returning
2548
     *                              an empty string. Using this flag is discouraged as it
2549
     *                              may have security implications.
2550
     *                              </td>
2551
     *                              </tr>
2552
     *                              <tr valign="top">
2553
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2554
     *                              <td>
2555
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2556
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2557
     *                              </td>
2558
     *                              </tr>
2559
     *                              <tr valign="top">
2560
     *                              <td><b>ENT_DISALLOWED</b></td>
2561
     *                              <td>
2562
     *                              Replace invalid code points for the given document type with a
2563
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2564
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2565
     *                              instance, to ensure the well-formedness of XML documents with
2566
     *                              embedded external content.
2567
     *                              </td>
2568
     *                              </tr>
2569
     *                              <tr valign="top">
2570
     *                              <td><b>ENT_HTML401</b></td>
2571
     *                              <td>
2572
     *                              Handle code as HTML 4.01.
2573
     *                              </td>
2574
     *                              </tr>
2575
     *                              <tr valign="top">
2576
     *                              <td><b>ENT_XML1</b></td>
2577
     *                              <td>
2578
     *                              Handle code as XML 1.
2579
     *                              </td>
2580
     *                              </tr>
2581
     *                              <tr valign="top">
2582
     *                              <td><b>ENT_XHTML</b></td>
2583
     *                              <td>
2584
     *                              Handle code as XHTML.
2585
     *                              </td>
2586
     *                              </tr>
2587
     *                              <tr valign="top">
2588
     *                              <td><b>ENT_HTML5</b></td>
2589
     *                              <td>
2590
     *                              Handle code as HTML 5.
2591
     *                              </td>
2592
     *                              </tr>
2593
     *                              </table>
2594
     *                              </p>
2595
     * @param string $encoding      [optional] <p>
2596
     *                              Defines encoding used in conversion.
2597
     *                              </p>
2598
     *                              <p>
2599
     *                              For the purposes of this function, the encodings
2600
     *                              ISO-8859-1, ISO-8859-15,
2601
     *                              UTF-8, cp866,
2602
     *                              cp1251, cp1252, and
2603
     *                              KOI8-R are effectively equivalent, provided the
2604
     *                              <i>string</i> itself is valid for the encoding, as
2605
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2606
     *                              the same positions in all of these encodings.
2607
     *                              </p>
2608
     * @param bool   $double_encode [optional] <p>
2609
     *                              When <i>double_encode</i> is turned off PHP will not
2610
     *                              encode existing html entities, the default is to convert everything.
2611
     *                              </p>
2612
     *
2613
     * @return string the converted string.
2614
     *                </p>
2615
     *                <p>
2616
     *                If the input <i>string</i> contains an invalid code unit
2617
     *                sequence within the given <i>encoding</i> an empty string
2618
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2619
     *                <b>ENT_SUBSTITUTE</b> flags are set
2620
     */
2621 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2622
    {
2623 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2624 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2625
        }
2626
2627 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2628
    }
2629
2630
    /**
2631
     * Checks whether iconv is available on the server.
2632
     *
2633
     * @return bool
2634
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2635
     */
2636
    public static function iconv_loaded(): bool
2637
    {
2638
        return \extension_loaded('iconv');
2639
    }
2640
2641
    /**
2642
     * alias for "UTF8::decimal_to_chr()"
2643
     *
2644
     * @see UTF8::decimal_to_chr()
2645
     *
2646
     * @param mixed $int
2647
     *
2648
     * @return string
2649
     */
2650 4
    public static function int_to_chr($int): string
2651
    {
2652 4
        return self::decimal_to_chr($int);
2653
    }
2654
2655
    /**
2656
     * Converts Integer to hexadecimal U+xxxx code point representation.
2657
     *
2658
     * INFO: opposite to UTF8::hex_to_int()
2659
     *
2660
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2661
     * @param string $pfix [optional]
2662
     *
2663
     * @return string the code point, or empty string on failure
2664
     */
2665 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2666
    {
2667 6
        $hex = \dechex($int);
2668
2669 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2670
2671 6
        return $pfix . $hex . '';
2672
    }
2673
2674
    /**
2675
     * Checks whether intl-char is available on the server.
2676
     *
2677
     * @return bool
2678
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2679
     */
2680
    public static function intlChar_loaded(): bool
2681
    {
2682
        return \class_exists('IntlChar');
2683
    }
2684
2685
    /**
2686
     * Checks whether intl is available on the server.
2687
     *
2688
     * @return bool
2689
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2690
     */
2691 5
    public static function intl_loaded(): bool
2692
    {
2693 5
        return \extension_loaded('intl');
2694
    }
2695
2696
    /**
2697
     * alias for "UTF8::is_ascii()"
2698
     *
2699
     * @see        UTF8::is_ascii()
2700
     *
2701
     * @param string $str
2702
     *
2703
     * @return bool
2704
     *
2705
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2706
     */
2707 2
    public static function isAscii(string $str): bool
2708
    {
2709 2
        return self::is_ascii($str);
2710
    }
2711
2712
    /**
2713
     * alias for "UTF8::is_base64()"
2714
     *
2715
     * @see        UTF8::is_base64()
2716
     *
2717
     * @param string $str
2718
     *
2719
     * @return bool
2720
     *
2721
     * @deprecated <p>use "UTF8::is_base64()"</p>
2722
     */
2723 2
    public static function isBase64($str): bool
2724
    {
2725 2
        return self::is_base64($str);
2726
    }
2727
2728
    /**
2729
     * alias for "UTF8::is_binary()"
2730
     *
2731
     * @see        UTF8::is_binary()
2732
     *
2733
     * @param mixed $str
2734
     * @param bool  $strict
2735
     *
2736
     * @return bool
2737
     *
2738
     * @deprecated <p>use "UTF8::is_binary()"</p>
2739
     */
2740 4
    public static function isBinary($str, $strict = false): bool
2741
    {
2742 4
        return self::is_binary($str, $strict);
2743
    }
2744
2745
    /**
2746
     * alias for "UTF8::is_bom()"
2747
     *
2748
     * @see        UTF8::is_bom()
2749
     *
2750
     * @param string $utf8_chr
2751
     *
2752
     * @return bool
2753
     *
2754
     * @deprecated <p>use "UTF8::is_bom()"</p>
2755
     */
2756 2
    public static function isBom(string $utf8_chr): bool
2757
    {
2758 2
        return self::is_bom($utf8_chr);
2759
    }
2760
2761
    /**
2762
     * alias for "UTF8::is_html()"
2763
     *
2764
     * @see        UTF8::is_html()
2765
     *
2766
     * @param string $str
2767
     *
2768
     * @return bool
2769
     *
2770
     * @deprecated <p>use "UTF8::is_html()"</p>
2771
     */
2772 2
    public static function isHtml(string $str): bool
2773
    {
2774 2
        return self::is_html($str);
2775
    }
2776
2777
    /**
2778
     * alias for "UTF8::is_json()"
2779
     *
2780
     * @see        UTF8::is_json()
2781
     *
2782
     * @param string $str
2783
     *
2784
     * @return bool
2785
     *
2786
     * @deprecated <p>use "UTF8::is_json()"</p>
2787
     */
2788
    public static function isJson(string $str): bool
2789
    {
2790
        return self::is_json($str);
2791
    }
2792
2793
    /**
2794
     * alias for "UTF8::is_utf16()"
2795
     *
2796
     * @see        UTF8::is_utf16()
2797
     *
2798
     * @param mixed $str
2799
     *
2800
     * @return false|int
2801
     *                   <strong>false</strong> if is't not UTF16,<br>
2802
     *                   <strong>1</strong> for UTF-16LE,<br>
2803
     *                   <strong>2</strong> for UTF-16BE
2804
     *
2805
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2806
     */
2807 2
    public static function isUtf16($str)
2808
    {
2809 2
        return self::is_utf16($str);
2810
    }
2811
2812
    /**
2813
     * alias for "UTF8::is_utf32()"
2814
     *
2815
     * @see        UTF8::is_utf32()
2816
     *
2817
     * @param mixed $str
2818
     *
2819
     * @return false|int
2820
     *                   <strong>false</strong> if is't not UTF16,
2821
     *                   <strong>1</strong> for UTF-32LE,
2822
     *                   <strong>2</strong> for UTF-32BE
2823
     *
2824
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2825
     */
2826 2
    public static function isUtf32($str)
2827
    {
2828 2
        return self::is_utf32($str);
2829
    }
2830
2831
    /**
2832
     * alias for "UTF8::is_utf8()"
2833
     *
2834
     * @see        UTF8::is_utf8()
2835
     *
2836
     * @param string $str
2837
     * @param bool   $strict
2838
     *
2839
     * @return bool
2840
     *
2841
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2842
     */
2843 17
    public static function isUtf8($str, $strict = false): bool
2844
    {
2845 17
        return self::is_utf8($str, $strict);
2846
    }
2847
2848
    /**
2849
     * Returns true if the string contains only alphabetic chars, false otherwise.
2850
     *
2851
     * @param string $str
2852
     *
2853
     * @return bool
2854
     *              Whether or not $str contains only alphabetic chars
2855
     */
2856 10
    public static function is_alpha(string $str): bool
2857
    {
2858 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2859
    }
2860
2861
    /**
2862
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2863
     *
2864
     * @param string $str
2865
     *
2866
     * @return bool
2867
     *              Whether or not $str contains only alphanumeric chars
2868
     */
2869 13
    public static function is_alphanumeric(string $str): bool
2870
    {
2871 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2872
    }
2873
2874
    /**
2875
     * Checks if a string is 7 bit ASCII.
2876
     *
2877
     * @param string $str <p>The string to check.</p>
2878
     *
2879
     * @return bool
2880
     *              <strong>true</strong> if it is ASCII<br>
2881
     *              <strong>false</strong> otherwise
2882
     */
2883 201
    public static function is_ascii(string $str): bool
2884
    {
2885 201
        if ($str === '') {
2886 10
            return true;
2887
        }
2888
2889 200
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2890
    }
2891
2892
    /**
2893
     * Returns true if the string is base64 encoded, false otherwise.
2894
     *
2895
     * @param string $str <p>The input string.</p>
2896
     *
2897
     * @return bool whether or not $str is base64 encoded
2898
     */
2899 9
    public static function is_base64($str): bool
2900
    {
2901 9
        if ($str === '') {
2902 3
            return false;
2903
        }
2904
2905 8
        if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2906 2
            return false;
2907
        }
2908
2909 8
        $base64String = (string) \base64_decode($str, true);
2910
2911 8
        return $base64String && \base64_encode($base64String) === $str;
2912
    }
2913
2914
    /**
2915
     * Check if the input is binary... (is look like a hack).
2916
     *
2917
     * @param mixed $input
2918
     * @param bool  $strict
2919
     *
2920
     * @return bool
2921
     */
2922 39
    public static function is_binary($input, bool $strict = false): bool
2923
    {
2924 39
        $input = (string) $input;
2925 39
        if ($input === '') {
2926 10
            return false;
2927
        }
2928
2929 39
        if (\preg_match('~^[01]+$~', $input)) {
2930 12
            return true;
2931
        }
2932
2933 39
        $ext = self::get_file_type($input);
2934 39
        if ($ext['type'] === 'binary') {
2935 7
            return true;
2936
        }
2937
2938 36
        $testLength = self::strlen_in_byte($input);
2939 36
        if ($testLength) {
2940 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2941
                self::checkForSupport();
2942
            }
2943
2944 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2945 36
            if (($testNull / $testLength) > 0.256) {
2946 12
                return true;
2947
            }
2948
        }
2949
2950 34
        if ($strict === true) {
2951 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2952
                self::checkForSupport();
2953
            }
2954
2955 34
            if (self::$SUPPORT['finfo'] === false) {
2956
                throw new \RuntimeException('ext-fileinfo: is not installed');
2957
            }
2958
2959 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
2960 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
2961 14
                return true;
2962
            }
2963
        }
2964
2965 30
        return false;
2966
    }
2967
2968
    /**
2969
     * Check if the file is binary.
2970
     *
2971
     * @param string $file
2972
     *
2973
     * @return bool
2974
     */
2975 6
    public static function is_binary_file($file): bool
2976
    {
2977
        // init
2978 6
        $block = '';
2979
2980 6
        $fp = \fopen($file, 'rb');
2981 6
        if (\is_resource($fp)) {
2982 6
            $block = \fread($fp, 512);
2983 6
            \fclose($fp);
2984
        }
2985
2986 6
        if ($block === '') {
2987 2
            return false;
2988
        }
2989
2990 6
        return self::is_binary($block, true);
2991
    }
2992
2993
    /**
2994
     * Returns true if the string contains only whitespace chars, false otherwise.
2995
     *
2996
     * @param string $str
2997
     *
2998
     * @return bool
2999
     *              Whether or not $str contains only whitespace characters
3000
     */
3001 15
    public static function is_blank(string $str): bool
3002
    {
3003 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3004
    }
3005
3006
    /**
3007
     * Checks if the given string is equal to any "Byte Order Mark".
3008
     *
3009
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3010
     *
3011
     * @param string $str <p>The input string.</p>
3012
     *
3013
     * @return bool
3014
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3015
     */
3016 2
    public static function is_bom($str): bool
3017
    {
3018 2
        foreach (self::$BOM as $bomString => $bomByteLength) {
3019 2
            if ($str === $bomString) {
3020 2
                return true;
3021
            }
3022
        }
3023
3024 2
        return false;
3025
    }
3026
3027
    /**
3028
     * Determine whether the string is considered to be empty.
3029
     *
3030
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3031
     * empty() does not generate a warning if the variable does not exist.
3032
     *
3033
     * @param mixed $str
3034
     *
3035
     * @return bool whether or not $str is empty()
3036
     */
3037
    public static function is_empty($str): bool
3038
    {
3039
        return empty($str);
3040
    }
3041
3042
    /**
3043
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3044
     *
3045
     * @param string $str
3046
     *
3047
     * @return bool
3048
     *              Whether or not $str contains only hexadecimal chars
3049
     */
3050 13
    public static function is_hexadecimal(string $str): bool
3051
    {
3052 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3053
    }
3054
3055
    /**
3056
     * Check if the string contains any html-tags <lall>.
3057
     *
3058
     * @param string $str <p>The input string.</p>
3059
     *
3060
     * @return bool
3061
     */
3062 3
    public static function is_html(string $str): bool
3063
    {
3064 3
        if ($str === '') {
3065 3
            return false;
3066
        }
3067
3068
        // init
3069 3
        $matches = [];
3070
3071 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3072
3073 3
        return \count($matches) !== 0;
3074
    }
3075
3076
    /**
3077
     * Try to check if "$str" is an json-string.
3078
     *
3079
     * @param string $str <p>The input string.</p>
3080
     *
3081
     * @return bool
3082
     */
3083 22
    public static function is_json(string $str): bool
3084
    {
3085 22
        if ($str === '') {
3086 3
            return false;
3087
        }
3088
3089 21
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3090
            self::checkForSupport();
3091
        }
3092
3093 21
        if (self::$SUPPORT['json'] === false) {
3094
            throw new \RuntimeException('ext-json: is not installed');
3095
        }
3096
3097 21
        $json = self::json_decode($str);
3098
3099
        /** @noinspection PhpComposerExtensionStubsInspection */
3100
        return (
3101 21
                   \is_object($json) === true
3102
                   ||
3103 21
                   \is_array($json) === true
3104
               )
3105
               &&
3106 21
               \json_last_error() === \JSON_ERROR_NONE;
3107
    }
3108
3109
    /**
3110
     * @param string $str
3111
     *
3112
     * @return bool
3113
     */
3114 8
    public static function is_lowercase(string $str): bool
3115
    {
3116 8
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3117
    }
3118
3119
    /**
3120
     * Returns true if the string is serialized, false otherwise.
3121
     *
3122
     * @param string $str
3123
     *
3124
     * @return bool whether or not $str is serialized
3125
     */
3126 7
    public static function is_serialized(string $str): bool
3127
    {
3128 7
        if ($str === '') {
3129 1
            return false;
3130
        }
3131
3132
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3133
        /** @noinspection UnserializeExploitsInspection */
3134 6
        return $str === 'b:0;'
3135
               ||
3136 6
               @\unserialize($str) !== false;
3137
    }
3138
3139
    /**
3140
     * Returns true if the string contains only lower case chars, false
3141
     * otherwise.
3142
     *
3143
     * @param string $str <p>The input string.</p>
3144
     *
3145
     * @return bool
3146
     *              Whether or not $str contains only lower case characters
3147
     */
3148 8
    public static function is_uppercase(string $str): bool
3149
    {
3150 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3151
    }
3152
3153
    /**
3154
     * Check if the string is UTF-16.
3155
     *
3156
     * @param mixed $str                   <p>The input string.</p>
3157
     * @param bool  $checkIfStringIsBinary
3158
     *
3159
     * @return false|int
3160
     *                   <strong>false</strong> if is't not UTF-16,<br>
3161
     *                   <strong>1</strong> for UTF-16LE,<br>
3162
     *                   <strong>2</strong> for UTF-16BE
3163
     */
3164 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3165
    {
3166
        // init
3167 21
        $str = (string) $str;
3168 21
        $strChars = [];
3169
3170
        if (
3171 21
            $checkIfStringIsBinary === true
3172
            &&
3173 21
            self::is_binary($str, true) === false
3174
        ) {
3175 2
            return false;
3176
        }
3177
3178 21
        if (self::$SUPPORT['mbstring'] === false) {
3179 2
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3180
        }
3181
3182 21
        $str = self::remove_bom($str);
3183
3184 21
        $maybeUTF16LE = 0;
3185 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3186 21
        if ($test) {
3187 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3188 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3189 15
            if ($test3 === $test) {
3190 15
                if (\count($strChars) === 0) {
3191 15
                    $strChars = self::count_chars($str, true);
3192
                }
3193 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3194 15
                    if (\in_array($test3char, $strChars, true) === true) {
3195 15
                        ++$maybeUTF16LE;
3196
                    }
3197
                }
3198
            }
3199
        }
3200
3201 21
        $maybeUTF16BE = 0;
3202 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3203 21
        if ($test) {
3204 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3205 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3206 15
            if ($test3 === $test) {
3207 15
                if (\count($strChars) === 0) {
3208 7
                    $strChars = self::count_chars($str, true);
3209
                }
3210 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3211 15
                    if (\in_array($test3char, $strChars, true) === true) {
3212 15
                        ++$maybeUTF16BE;
3213
                    }
3214
                }
3215
            }
3216
        }
3217
3218 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3219 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3220 4
                return 1;
3221
            }
3222
3223 6
            return 2;
3224
        }
3225
3226 17
        return false;
3227
    }
3228
3229
    /**
3230
     * Check if the string is UTF-32.
3231
     *
3232
     * @param mixed $str                   <p>The input string.</p>
3233
     * @param bool  $checkIfStringIsBinary
3234
     *
3235
     * @return false|int
3236
     *                   <strong>false</strong> if is't not UTF-32,<br>
3237
     *                   <strong>1</strong> for UTF-32LE,<br>
3238
     *                   <strong>2</strong> for UTF-32BE
3239
     */
3240 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3241
    {
3242
        // init
3243 17
        $str = (string) $str;
3244 17
        $strChars = [];
3245
3246
        if (
3247 17
            $checkIfStringIsBinary === true
3248
            &&
3249 17
            self::is_binary($str, true) === false
3250
        ) {
3251 2
            return false;
3252
        }
3253
3254 17
        if (self::$SUPPORT['mbstring'] === false) {
3255 2
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3256
        }
3257
3258 17
        $str = self::remove_bom($str);
3259
3260 17
        $maybeUTF32LE = 0;
3261 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3262 17
        if ($test) {
3263 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3264 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3265 11
            if ($test3 === $test) {
3266 11
                if (\count($strChars) === 0) {
3267 11
                    $strChars = self::count_chars($str, true);
3268
                }
3269 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3270 11
                    if (\in_array($test3char, $strChars, true) === true) {
3271 11
                        ++$maybeUTF32LE;
3272
                    }
3273
                }
3274
            }
3275
        }
3276
3277 17
        $maybeUTF32BE = 0;
3278 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3279 17
        if ($test) {
3280 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3281 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3282 11
            if ($test3 === $test) {
3283 11
                if (\count($strChars) === 0) {
3284 7
                    $strChars = self::count_chars($str, true);
3285
                }
3286 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3287 11
                    if (\in_array($test3char, $strChars, true) === true) {
3288 11
                        ++$maybeUTF32BE;
3289
                    }
3290
                }
3291
            }
3292
        }
3293
3294 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3295 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3296 2
                return 1;
3297
            }
3298
3299 2
            return 2;
3300
        }
3301
3302 17
        return false;
3303
    }
3304
3305
    /**
3306
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3307
     *
3308
     * @see    http://hsivonen.iki.fi/php-utf8/
3309
     *
3310
     * @param string|string[] $str    <p>The string to be checked.</p>
3311
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3312
     *
3313
     * @return bool
3314
     */
3315 106
    public static function is_utf8($str, bool $strict = false): bool
3316
    {
3317 106
        if (\is_array($str) === true) {
3318 2
            foreach ($str as $v) {
3319 2
                if (self::is_utf8($v, $strict) === false) {
3320 2
                    return false;
3321
                }
3322
            }
3323
3324
            return true;
3325
        }
3326
3327 106
        if ($str === '') {
3328 12
            return true;
3329
        }
3330
3331 102
        if ($strict === true) {
3332 2
            $isBinary = self::is_binary($str, true);
3333
3334 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3335 2
                return false;
3336
            }
3337
3338
            if ($isBinary && self::is_utf32($str, false) !== false) {
3339
                return false;
3340
            }
3341
        }
3342
3343 102
        if (self::pcre_utf8_support() !== true) {
3344
3345
            // If even just the first character can be matched, when the /u
3346
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3347
            // invalid, nothing at all will match, even if the string contains
3348
            // some valid sequences
3349
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3350
        }
3351
3352 102
        $mState = 0; // cached expected number of octets after the current octet
3353
        // until the beginning of the next UTF8 character sequence
3354 102
        $mUcs4 = 0; // cached Unicode character
3355 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3356
3357 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3358
            self::checkForSupport();
3359
        }
3360
3361 102
        if (self::$ORD === null) {
3362
            self::$ORD = self::getData('ord');
3363
        }
3364
3365 102
        $len = self::strlen_in_byte((string) $str);
3366
        /** @noinspection ForeachInvariantsInspection */
3367 102
        for ($i = 0; $i < $len; ++$i) {
3368 102
            $in = self::$ORD[$str[$i]];
3369 102
            if ($mState === 0) {
3370
                // When mState is zero we expect either a US-ASCII character or a
3371
                // multi-octet sequence.
3372 102
                if ((0x80 & $in) === 0) {
3373
                    // US-ASCII, pass straight through.
3374 98
                    $mBytes = 1;
3375 83
                } elseif ((0xE0 & $in) === 0xC0) {
3376
                    // First octet of 2 octet sequence.
3377 74
                    $mUcs4 = $in;
3378 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3379 74
                    $mState = 1;
3380 74
                    $mBytes = 2;
3381 58
                } elseif ((0xF0 & $in) === 0xE0) {
3382
                    // First octet of 3 octet sequence.
3383 41
                    $mUcs4 = $in;
3384 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3385 41
                    $mState = 2;
3386 41
                    $mBytes = 3;
3387 30
                } elseif ((0xF8 & $in) === 0xF0) {
3388
                    // First octet of 4 octet sequence.
3389 19
                    $mUcs4 = $in;
3390 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3391 19
                    $mState = 3;
3392 19
                    $mBytes = 4;
3393 13
                } elseif ((0xFC & $in) === 0xF8) {
3394
                    /* First octet of 5 octet sequence.
3395
                     *
3396
                     * This is illegal because the encoded codepoint must be either
3397
                     * (a) not the shortest form or
3398
                     * (b) outside the Unicode range of 0-0x10FFFF.
3399
                     * Rather than trying to resynchronize, we will carry on until the end
3400
                     * of the sequence and let the later error handling code catch it.
3401
                     */
3402 5
                    $mUcs4 = $in;
3403 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3404 5
                    $mState = 4;
3405 5
                    $mBytes = 5;
3406 10
                } elseif ((0xFE & $in) === 0xFC) {
3407
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3408 5
                    $mUcs4 = $in;
3409 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3410 5
                    $mState = 5;
3411 5
                    $mBytes = 6;
3412
                } else {
3413
                    // Current octet is neither in the US-ASCII range nor a legal first
3414
                    // octet of a multi-octet sequence.
3415 102
                    return false;
3416
                }
3417
            } else {
3418
                // When mState is non-zero, we expect a continuation of the multi-octet
3419
                // sequence
3420 83
                if ((0xC0 & $in) === 0x80) {
3421
                    // Legal continuation.
3422 75
                    $shift = ($mState - 1) * 6;
3423 75
                    $tmp = $in;
3424 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3425 75
                    $mUcs4 |= $tmp;
3426
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3427
                    // Unicode code point to be output.
3428 75
                    if (--$mState === 0) {
3429
                        // Check for illegal sequences and code points.
3430
                        //
3431
                        // From Unicode 3.1, non-shortest form is illegal
3432
                        if (
3433 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3434
                            ||
3435 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3436
                            ||
3437 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3438
                            ||
3439 75
                            ($mBytes > 4)
3440
                            ||
3441
                            // From Unicode 3.2, surrogate characters are illegal.
3442 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3443
                            ||
3444
                            // Code points outside the Unicode range are illegal.
3445 75
                            ($mUcs4 > 0x10FFFF)
3446
                        ) {
3447 8
                            return false;
3448
                        }
3449
                        // initialize UTF8 cache
3450 75
                        $mState = 0;
3451 75
                        $mUcs4 = 0;
3452 75
                        $mBytes = 1;
3453
                    }
3454
                } else {
3455
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3456
                    // Incomplete multi-octet sequence.
3457 36
                    return false;
3458
                }
3459
            }
3460
        }
3461
3462 66
        return true;
3463
    }
3464
3465
    /**
3466
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3467
     * Decodes a JSON string
3468
     *
3469
     * @see http://php.net/manual/en/function.json-decode.php
3470
     *
3471
     * @param string $json    <p>
3472
     *                        The <i>json</i> string being decoded.
3473
     *                        </p>
3474
     *                        <p>
3475
     *                        This function only works with UTF-8 encoded strings.
3476
     *                        </p>
3477
     *                        <p>PHP implements a superset of
3478
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3479
     *                        only supports these values when they are nested inside an array or an object.
3480
     *                        </p>
3481
     * @param bool   $assoc   [optional] <p>
3482
     *                        When <b>TRUE</b>, returned objects will be converted into
3483
     *                        associative arrays.
3484
     *                        </p>
3485
     * @param int    $depth   [optional] <p>
3486
     *                        User specified recursion depth.
3487
     *                        </p>
3488
     * @param int    $options [optional] <p>
3489
     *                        Bitmask of JSON decode options. Currently only
3490
     *                        <b>JSON_BIGINT_AS_STRING</b>
3491
     *                        is supported (default is to cast large integers as floats)
3492
     *                        </p>
3493
     *
3494
     * @return mixed
3495
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3496
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3497
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3498
     *               is deeper than the recursion limit.
3499
     */
3500 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3501
    {
3502 24
        $json = self::filter($json);
3503
3504 24
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3505
            self::checkForSupport();
3506
        }
3507
3508 24
        if (self::$SUPPORT['json'] === false) {
3509
            throw new \RuntimeException('ext-json: is not installed');
3510
        }
3511
3512
        /** @noinspection PhpComposerExtensionStubsInspection */
3513 24
        return \json_decode($json, $assoc, $depth, $options);
3514
    }
3515
3516
    /**
3517
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3518
     * Returns the JSON representation of a value.
3519
     *
3520
     * @see http://php.net/manual/en/function.json-encode.php
3521
     *
3522
     * @param mixed $value   <p>
3523
     *                       The <i>value</i> being encoded. Can be any type except
3524
     *                       a resource.
3525
     *                       </p>
3526
     *                       <p>
3527
     *                       All string data must be UTF-8 encoded.
3528
     *                       </p>
3529
     *                       <p>PHP implements a superset of
3530
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3531
     *                       only supports these values when they are nested inside an array or an object.
3532
     *                       </p>
3533
     * @param int   $options [optional] <p>
3534
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3535
     *                       <b>JSON_HEX_TAG</b>,
3536
     *                       <b>JSON_HEX_AMP</b>,
3537
     *                       <b>JSON_HEX_APOS</b>,
3538
     *                       <b>JSON_NUMERIC_CHECK</b>,
3539
     *                       <b>JSON_PRETTY_PRINT</b>,
3540
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3541
     *                       <b>JSON_FORCE_OBJECT</b>,
3542
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3543
     *                       constants is described on
3544
     *                       the JSON constants page.
3545
     *                       </p>
3546
     * @param int   $depth   [optional] <p>
3547
     *                       Set the maximum depth. Must be greater than zero.
3548
     *                       </p>
3549
     *
3550
     * @return false|string
3551
     *                      A JSON encoded <strong>string</strong> on success or<br>
3552
     *                      <strong>FALSE</strong> on failure
3553
     */
3554 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3555
    {
3556 5
        $value = self::filter($value);
3557
3558 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3559
            self::checkForSupport();
3560
        }
3561
3562 5
        if (self::$SUPPORT['json'] === false) {
3563
            throw new \RuntimeException('ext-json: is not installed');
3564
        }
3565
3566
        /** @noinspection PhpComposerExtensionStubsInspection */
3567 5
        return \json_encode($value, $options, $depth);
3568
    }
3569
3570
    /**
3571
     * Checks whether JSON is available on the server.
3572
     *
3573
     * @return bool
3574
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3575
     */
3576
    public static function json_loaded(): bool
3577
    {
3578
        return \function_exists('json_decode');
3579
    }
3580
3581
    /**
3582
     * Makes string's first char lowercase.
3583
     *
3584
     * @param string      $str                   <p>The input string</p>
3585
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3586
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3587
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3588
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3589
     *
3590
     * @return string the resulting string
3591
     */
3592 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3593
    {
3594 46
        $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3595 46
        if ($strPartTwo === false) {
3596
            $strPartTwo = '';
3597
        }
3598
3599 46
        $strPartOne = self::strtolower(
3600 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3601 46
            $encoding,
3602 46
            $cleanUtf8,
3603 46
            $lang,
3604 46
            $tryToKeepStringLength
3605
        );
3606
3607 46
        return $strPartOne . $strPartTwo;
3608
    }
3609
3610
    /**
3611
     * alias for "UTF8::lcfirst()"
3612
     *
3613
     * @see UTF8::lcfirst()
3614
     *
3615
     * @param string      $str
3616
     * @param string      $encoding
3617
     * @param bool        $cleanUtf8
3618
     * @param string|null $lang
3619
     * @param bool        $tryToKeepStringLength
3620
     *
3621
     * @return string
3622
     */
3623 2
    public static function lcword(
3624
        string $str,
3625
        string $encoding = 'UTF-8',
3626
        bool $cleanUtf8 = false,
3627
        string $lang = null,
3628
        bool $tryToKeepStringLength = false
3629
    ): string {
3630 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3631
    }
3632
3633
    /**
3634
     * Lowercase for all words in the string.
3635
     *
3636
     * @param string      $str                   <p>The input string.</p>
3637
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3638
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3639
     *                                           a new word.</p>
3640
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3641
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3642
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3643
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3644
     *
3645
     * @return string
3646
     */
3647 2
    public static function lcwords(
3648
        string $str,
3649
        array $exceptions = [],
3650
        string $charlist = '',
3651
        string $encoding = 'UTF-8',
3652
        bool $cleanUtf8 = false,
3653
        string $lang = null,
3654
        bool $tryToKeepStringLength = false
3655
    ): string {
3656 2
        if (!$str) {
3657 2
            return '';
3658
        }
3659
3660 2
        $words = self::str_to_words($str, $charlist);
3661 2
        $newWords = [];
3662
3663 2
        $useExceptions = \count($exceptions) > 0;
3664
3665 2
        foreach ($words as $word) {
3666 2
            if (!$word) {
3667 2
                continue;
3668
            }
3669
3670
            if (
3671 2
                $useExceptions === false
3672
                ||
3673
                (
3674 2
                    $useExceptions === true
3675
                    &&
3676 2
                    !\in_array($word, $exceptions, true)
3677
                )
3678
            ) {
3679 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3680
            }
3681
3682 2
            $newWords[] = $word;
3683
        }
3684
3685 2
        return \implode('', $newWords);
3686
    }
3687
3688
    /**
3689
     * alias for "UTF8::lcfirst()"
3690
     *
3691
     * @see UTF8::lcfirst()
3692
     *
3693
     * @param string      $str
3694
     * @param string      $encoding
3695
     * @param bool        $cleanUtf8
3696
     * @param string|null $lang
3697
     * @param bool        $tryToKeepStringLength
3698
     *
3699
     * @return string
3700
     */
3701 5
    public static function lowerCaseFirst(
3702
        string $str,
3703
        string $encoding = 'UTF-8',
3704
        bool $cleanUtf8 = false,
3705
        string $lang = null,
3706
        bool $tryToKeepStringLength = false
3707
    ): string {
3708 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3709
    }
3710
3711
    /**
3712
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3713
     *
3714
     * @param string $str   <p>The string to be trimmed</p>
3715
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3716
     *
3717
     * @return string the string with unwanted characters stripped from the left
3718
     */
3719 22
    public static function ltrim(string $str = '', $chars = \INF): string
3720
    {
3721 22
        if ($str === '') {
3722 3
            return '';
3723
        }
3724
3725
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3726 21
        if ($chars === \INF || !$chars) {
3727 14
            $pattern = "^[\pZ\pC]+";
3728
        } else {
3729 10
            $chars = \preg_quote($chars, '/');
3730 10
            $pattern = "^[${chars}]+";
3731
        }
3732
3733 21
        return self::regex_replace($str, $pattern, '', '', '/');
3734
    }
3735
3736
    /**
3737
     * Returns the UTF-8 character with the maximum code point in the given data.
3738
     *
3739
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3740
     *
3741
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3742
     */
3743 2
    public static function max($arg)
3744
    {
3745 2
        if (\is_array($arg) === true) {
3746 2
            $arg = \implode('', $arg);
3747
        }
3748
3749 2
        $codepoints = self::codepoints($arg, false);
3750 2
        if (\count($codepoints) === 0) {
3751 2
            return null;
3752
        }
3753
3754 2
        $codepoint_max = \max($codepoints);
3755
3756 2
        return self::chr($codepoint_max);
3757
    }
3758
3759
    /**
3760
     * Calculates and returns the maximum number of bytes taken by any
3761
     * UTF-8 encoded character in the given string.
3762
     *
3763
     * @param string $str <p>The original Unicode string.</p>
3764
     *
3765
     * @return int max byte lengths of the given chars
3766
     */
3767 2
    public static function max_chr_width(string $str): int
3768
    {
3769 2
        $bytes = self::chr_size_list($str);
3770 2
        if (\count($bytes) > 0) {
3771 2
            return (int) \max($bytes);
3772
        }
3773
3774 2
        return 0;
3775
    }
3776
3777
    /**
3778
     * Checks whether mbstring is available on the server.
3779
     *
3780
     * @return bool
3781
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3782
     */
3783 27
    public static function mbstring_loaded(): bool
3784
    {
3785 27
        $return = \extension_loaded('mbstring');
3786 27
        if ($return === true) {
3787 27
            \mb_internal_encoding('UTF-8');
3788
        }
3789
3790 27
        return $return;
3791
    }
3792
3793
    /**
3794
     * Returns the UTF-8 character with the minimum code point in the given data.
3795
     *
3796
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3797
     *
3798
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3799
     */
3800 2
    public static function min($arg)
3801
    {
3802 2
        if (\is_array($arg) === true) {
3803 2
            $arg = \implode('', $arg);
3804
        }
3805
3806 2
        $codepoints = self::codepoints($arg, false);
3807 2
        if (\count($codepoints) === 0) {
3808 2
            return null;
3809
        }
3810
3811 2
        $codepoint_min = \min($codepoints);
3812
3813 2
        return self::chr($codepoint_min);
3814
    }
3815
3816
    /**
3817
     * alias for "UTF8::normalize_encoding()"
3818
     *
3819
     * @see        UTF8::normalize_encoding()
3820
     *
3821
     * @param mixed $encoding
3822
     * @param mixed $fallback
3823
     *
3824
     * @return mixed
3825
     *
3826
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3827
     */
3828 2
    public static function normalizeEncoding($encoding, $fallback = '')
3829
    {
3830 2
        return self::normalize_encoding($encoding, $fallback);
3831
    }
3832
3833
    /**
3834
     * Normalize the encoding-"name" input.
3835
     *
3836
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3837
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3838
     *
3839
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3840
     */
3841 341
    public static function normalize_encoding($encoding, $fallback = '')
3842
    {
3843 341
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3844
3845
        // init
3846 341
        $encoding = (string) $encoding;
3847
3848
        if (
3849 341
            !$encoding
3850
            ||
3851 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3852
            ||
3853 341
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3854
        ) {
3855 296
            return $fallback;
3856
        }
3857
3858
        if (
3859 49
            $encoding === 'UTF-8'
3860
            ||
3861 49
            $encoding === 'UTF8'
3862
        ) {
3863 22
            return 'UTF-8';
3864
        }
3865
3866
        if (
3867 42
            $encoding === '8BIT'
3868
            ||
3869 42
            $encoding === 'BINARY'
3870
        ) {
3871
            return 'CP850';
3872
        }
3873
3874
        if (
3875 42
            $encoding === 'HTML'
3876
            ||
3877 42
            $encoding === 'HTML-ENTITIES'
3878
        ) {
3879 2
            return 'HTML-ENTITIES';
3880
        }
3881
3882 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3883 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3884
        }
3885
3886 6
        if (self::$ENCODINGS === null) {
3887 1
            self::$ENCODINGS = self::getData('encodings');
3888
        }
3889
3890 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
3891 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3892
3893 4
            return $encoding;
3894
        }
3895
3896 5
        $encodingOrig = $encoding;
3897 5
        $encoding = \strtoupper($encoding);
3898 5
        $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3899
3900
        $equivalences = [
3901 5
            'ISO8859'     => 'ISO-8859-1',
3902
            'ISO88591'    => 'ISO-8859-1',
3903
            'ISO'         => 'ISO-8859-1',
3904
            'LATIN'       => 'ISO-8859-1',
3905
            'LATIN1'      => 'ISO-8859-1', // Western European
3906
            'ISO88592'    => 'ISO-8859-2',
3907
            'LATIN2'      => 'ISO-8859-2', // Central European
3908
            'ISO88593'    => 'ISO-8859-3',
3909
            'LATIN3'      => 'ISO-8859-3', // Southern European
3910
            'ISO88594'    => 'ISO-8859-4',
3911
            'LATIN4'      => 'ISO-8859-4', // Northern European
3912
            'ISO88595'    => 'ISO-8859-5',
3913
            'ISO88596'    => 'ISO-8859-6', // Greek
3914
            'ISO88597'    => 'ISO-8859-7',
3915
            'ISO88598'    => 'ISO-8859-8', // Hebrew
3916
            'ISO88599'    => 'ISO-8859-9',
3917
            'LATIN5'      => 'ISO-8859-9', // Turkish
3918
            'ISO885911'   => 'ISO-8859-11',
3919
            'TIS620'      => 'ISO-8859-11', // Thai
3920
            'ISO885910'   => 'ISO-8859-10',
3921
            'LATIN6'      => 'ISO-8859-10', // Nordic
3922
            'ISO885913'   => 'ISO-8859-13',
3923
            'LATIN7'      => 'ISO-8859-13', // Baltic
3924
            'ISO885914'   => 'ISO-8859-14',
3925
            'LATIN8'      => 'ISO-8859-14', // Celtic
3926
            'ISO885915'   => 'ISO-8859-15',
3927
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3928
            'ISO885916'   => 'ISO-8859-16',
3929
            'LATIN10'     => 'ISO-8859-16', // Southeast European
3930
            'CP1250'      => 'WINDOWS-1250',
3931
            'WIN1250'     => 'WINDOWS-1250',
3932
            'WINDOWS1250' => 'WINDOWS-1250',
3933
            'CP1251'      => 'WINDOWS-1251',
3934
            'WIN1251'     => 'WINDOWS-1251',
3935
            'WINDOWS1251' => 'WINDOWS-1251',
3936
            'CP1252'      => 'WINDOWS-1252',
3937
            'WIN1252'     => 'WINDOWS-1252',
3938
            'WINDOWS1252' => 'WINDOWS-1252',
3939
            'CP1253'      => 'WINDOWS-1253',
3940
            'WIN1253'     => 'WINDOWS-1253',
3941
            'WINDOWS1253' => 'WINDOWS-1253',
3942
            'CP1254'      => 'WINDOWS-1254',
3943
            'WIN1254'     => 'WINDOWS-1254',
3944
            'WINDOWS1254' => 'WINDOWS-1254',
3945
            'CP1255'      => 'WINDOWS-1255',
3946
            'WIN1255'     => 'WINDOWS-1255',
3947
            'WINDOWS1255' => 'WINDOWS-1255',
3948
            'CP1256'      => 'WINDOWS-1256',
3949
            'WIN1256'     => 'WINDOWS-1256',
3950
            'WINDOWS1256' => 'WINDOWS-1256',
3951
            'CP1257'      => 'WINDOWS-1257',
3952
            'WIN1257'     => 'WINDOWS-1257',
3953
            'WINDOWS1257' => 'WINDOWS-1257',
3954
            'CP1258'      => 'WINDOWS-1258',
3955
            'WIN1258'     => 'WINDOWS-1258',
3956
            'WINDOWS1258' => 'WINDOWS-1258',
3957
            'UTF16'       => 'UTF-16',
3958
            'UTF32'       => 'UTF-32',
3959
            'UTF8'        => 'UTF-8',
3960
            'UTF'         => 'UTF-8',
3961
            'UTF7'        => 'UTF-7',
3962
            '8BIT'        => 'CP850',
3963
            'BINARY'      => 'CP850',
3964
        ];
3965
3966 5
        if (!empty($equivalences[$encodingUpperHelper])) {
3967 4
            $encoding = $equivalences[$encodingUpperHelper];
3968
        }
3969
3970 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3971
3972 5
        return $encoding;
3973
    }
3974
3975
    /**
3976
     * Standardize line ending to unix-like.
3977
     *
3978
     * @param string $str
3979
     *
3980
     * @return string
3981
     */
3982 5
    public static function normalize_line_ending(string $str): string
3983
    {
3984 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
3985
    }
3986
3987
    /**
3988
     * Normalize some MS Word special characters.
3989
     *
3990
     * @param string $str <p>The string to be normalized.</p>
3991
     *
3992
     * @return string
3993
     */
3994 38
    public static function normalize_msword(string $str): string
3995
    {
3996 38
        if ($str === '') {
3997 2
            return '';
3998
        }
3999
4000 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
4001 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4002
4003 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4004 1
            if (self::$UTF8_MSWORD === null) {
4005 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4006
            }
4007
4008 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4009 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4010
        }
4011
4012 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4013
    }
4014
4015
    /**
4016
     * Normalize the whitespace.
4017
     *
4018
     * @param string $str                     <p>The string to be normalized.</p>
4019
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4020
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4021
     *                                        bidirectional text chars.</p>
4022
     *
4023
     * @return string
4024
     */
4025 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4026
    {
4027 86
        if ($str === '') {
4028 9
            return '';
4029
        }
4030
4031 86
        static $WHITESPACE_CACHE = [];
4032 86
        $cacheKey = (int) $keepNonBreakingSpace;
4033
4034 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4035 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4036
4037 2
            if ($keepNonBreakingSpace === true) {
4038 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4039
            }
4040
4041 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4042
        }
4043
4044 86
        if ($keepBidiUnicodeControls === false) {
4045 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4046
4047 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4048 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4049
            }
4050
4051 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4052
        }
4053
4054 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4055
    }
4056
4057
    /**
4058
     * Calculates Unicode code point of the given UTF-8 encoded character.
4059
     *
4060
     * INFO: opposite to UTF8::chr()
4061
     *
4062
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4063
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4064
     *
4065
     * @return int
4066
     *             Unicode code point of the given character,<br>
4067
     *             0 on invalid UTF-8 byte sequence
4068
     */
4069 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4070
    {
4071
        // init
4072 30
        $chr = (string) $chr;
4073
4074 30
        static $CHAR_CACHE = [];
4075
4076
        // save the original string
4077 30
        $chr_orig = $chr;
4078
4079 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4080 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4081
        }
4082
4083 30
        $cacheKey = $chr_orig . $encoding;
4084 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4085 23
            return $CHAR_CACHE[$cacheKey];
4086
        }
4087
4088 25
        if (self::$ORD === null) {
4089
            self::$ORD = self::getData('ord');
4090
        }
4091
4092 25
        if (isset(self::$ORD[$chr])) {
4093 25
            return self::$ORD[$chr];
4094
        }
4095
4096
        // check again, if it's still not UTF-8
4097 7
        if ($encoding !== 'UTF-8') {
4098 1
            $chr = self::encode($encoding, $chr);
4099
        }
4100
4101 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4102
            self::checkForSupport();
4103
        }
4104
4105 7
        if (self::$SUPPORT['intlChar'] === true) {
4106
            /** @noinspection PhpComposerExtensionStubsInspection */
4107 6
            $code = \IntlChar::ord($chr);
4108 6
            if ($code) {
4109 5
                return $CHAR_CACHE[$cacheKey] = $code;
4110
            }
4111
        }
4112
4113
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4114 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4115 2
        $code = $chr ? $chr[1] : 0;
4116
4117 2
        if ($code >= 0xF0 && isset($chr[4])) {
4118
            /** @noinspection UnnecessaryCastingInspection */
4119
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4120
        }
4121
4122 2
        if ($code >= 0xE0 && isset($chr[3])) {
4123
            /** @noinspection UnnecessaryCastingInspection */
4124 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4125
        }
4126
4127 2
        if ($code >= 0xC0 && isset($chr[2])) {
4128
            /** @noinspection UnnecessaryCastingInspection */
4129 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4130
        }
4131
4132 1
        return $CHAR_CACHE[$cacheKey] = $code;
4133
    }
4134
4135
    /**
4136
     * Parses the string into an array (into the the second parameter).
4137
     *
4138
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4139
     *          if the second parameter is not set!
4140
     *
4141
     * @see http://php.net/manual/en/function.parse-str.php
4142
     *
4143
     * @param string $str       <p>The input string.</p>
4144
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4145
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4146
     *
4147
     * @return bool
4148
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4149
     */
4150 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4151
    {
4152 2
        if ($cleanUtf8 === true) {
4153 2
            $str = self::clean($str);
4154
        }
4155
4156 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4157
            self::checkForSupport();
4158
        }
4159
4160 2
        if (self::$SUPPORT['mbstring'] === true) {
4161 2
            $return = \mb_parse_str($str, $result);
4162
4163 2
            return $return !== false && !empty($result);
4164
        }
4165
4166
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4167
        \parse_str($str, $result);
4168
4169
        return !empty($result);
4170
    }
4171
4172
    /**
4173
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4174
     *
4175
     * @return bool
4176
     *              <strong>true</strong> if support is available,<br>
4177
     *              <strong>false</strong> otherwise
4178
     */
4179 102
    public static function pcre_utf8_support(): bool
4180
    {
4181
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4182 102
        return (bool) @\preg_match('//u', '');
4183
    }
4184
4185
    /**
4186
     * Create an array containing a range of UTF-8 characters.
4187
     *
4188
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4189
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4190
     *
4191
     * @return string[]
4192
     */
4193 2
    public static function range($var1, $var2): array
4194
    {
4195 2
        if (!$var1 || !$var2) {
4196 2
            return [];
4197
        }
4198
4199 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4200
            self::checkForSupport();
4201
        }
4202
4203 2
        if (self::$SUPPORT['ctype'] === false) {
4204
            throw new \RuntimeException('ext-ctype: is not installed');
4205
        }
4206
4207
        /** @noinspection PhpComposerExtensionStubsInspection */
4208 2
        if (\ctype_digit((string) $var1)) {
4209 2
            $start = (int) $var1;
4210 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4211
            $start = (int) self::hex_to_int($var1);
4212
        } else {
4213 2
            $start = self::ord($var1);
4214
        }
4215
4216 2
        if (!$start) {
4217
            return [];
4218
        }
4219
4220
        /** @noinspection PhpComposerExtensionStubsInspection */
4221 2
        if (\ctype_digit((string) $var2)) {
4222 2
            $end = (int) $var2;
4223 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4224
            $end = (int) self::hex_to_int($var2);
4225
        } else {
4226 2
            $end = self::ord($var2);
4227
        }
4228
4229 2
        if (!$end) {
4230
            return [];
4231
        }
4232
4233 2
        return \array_map(
4234
            static function ($i) {
4235 2
                return (string) self::chr($i);
4236 2
            },
4237 2
            \range($start, $end)
4238
        );
4239
    }
4240
4241
    /**
4242
     * Multi decode html entity & fix urlencoded-win1252-chars.
4243
     *
4244
     * e.g:
4245
     * 'test+test'                     => 'test+test'
4246
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4247
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4248
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4249
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4250
     * 'Düsseldorf'                   => 'Düsseldorf'
4251
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4252
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4253
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4254
     *
4255
     * @param string $str          <p>The input string.</p>
4256
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4257
     *
4258
     * @return string
4259
     */
4260 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4261
    {
4262 3
        if ($str === '') {
4263 2
            return '';
4264
        }
4265
4266 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4267 3
        if (\preg_match($pattern, $str)) {
4268 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4269
        }
4270
4271 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4272
4273
        do {
4274 3
            $str_compare = $str;
4275
4276 3
            $str = self::fix_simple_utf8(
4277 3
                \rawurldecode(
4278 3
                    self::html_entity_decode(
4279 3
                        self::to_utf8($str),
4280 3
                        $flags
4281
                    )
4282
                )
4283
            );
4284 3
        } while ($multi_decode === true && $str_compare !== $str);
4285
4286 3
        return $str;
4287
    }
4288
4289
    /**
4290
     * Replaces all occurrences of $pattern in $str by $replacement.
4291
     *
4292
     * @param string $str         <p>The input string.</p>
4293
     * @param string $pattern     <p>The regular expression pattern.</p>
4294
     * @param string $replacement <p>The string to replace with.</p>
4295
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4296
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4297
     *
4298
     * @return string
4299
     */
4300 259
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4301
    {
4302 259
        if ($options === 'msr') {
4303 9
            $options = 'ms';
4304
        }
4305
4306
        // fallback
4307 259
        if (!$delimiter) {
4308
            $delimiter = '/';
4309
        }
4310
4311 259
        return (string) \preg_replace(
4312 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4313 259
            $replacement,
4314 259
            $str
4315
        );
4316
    }
4317
4318
    /**
4319
     * alias for "UTF8::remove_bom()"
4320
     *
4321
     * @see        UTF8::remove_bom()
4322
     *
4323
     * @param string $str
4324
     *
4325
     * @return string
4326
     *
4327
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4328
     */
4329
    public static function removeBOM(string $str): string
4330
    {
4331
        return self::remove_bom($str);
4332
    }
4333
4334
    /**
4335
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4336
     *
4337
     * @param string $str <p>The input string.</p>
4338
     *
4339
     * @return string string without UTF-BOM
4340
     */
4341 79
    public static function remove_bom(string $str): string
4342
    {
4343 79
        if ($str === '') {
4344 7
            return '';
4345
        }
4346
4347 79
        $strLength = self::strlen_in_byte($str);
4348 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4349 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4350 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4351 10
                if ($strTmp === false) {
4352
                    return '';
4353
                }
4354
4355 10
                $strLength -= (int) $bomByteLength;
4356
4357 79
                $str = (string) $strTmp;
4358
            }
4359
        }
4360
4361 79
        return $str;
4362
    }
4363
4364
    /**
4365
     * Removes duplicate occurrences of a string in another string.
4366
     *
4367
     * @param string          $str  <p>The base string.</p>
4368
     * @param string|string[] $what <p>String to search for in the base string.</p>
4369
     *
4370
     * @return string the result string with removed duplicates
4371
     */
4372 2
    public static function remove_duplicates(string $str, $what = ' '): string
4373
    {
4374 2
        if (\is_string($what) === true) {
4375 2
            $what = [$what];
4376
        }
4377
4378 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4379
            /** @noinspection ForeachSourceInspection */
4380 2
            foreach ($what as $item) {
4381 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4382
            }
4383
        }
4384
4385 2
        return $str;
4386
    }
4387
4388
    /**
4389
     * Remove html via "strip_tags()" from the string.
4390
     *
4391
     * @param string $str
4392
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4393
     *                              not be stripped. Default: null
4394
     *                              </p>
4395
     *
4396
     * @return string
4397
     */
4398 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4399
    {
4400 6
        return \strip_tags($str, $allowableTags);
4401
    }
4402
4403
    /**
4404
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4405
     *
4406
     * @param string $str
4407
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4408
     *
4409
     * @return string
4410
     */
4411 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4412
    {
4413 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4414
    }
4415
4416
    /**
4417
     * Remove invisible characters from a string.
4418
     *
4419
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4420
     *
4421
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4422
     *
4423
     * @param string $str
4424
     * @param bool   $url_encoded
4425
     * @param string $replacement
4426
     *
4427
     * @return string
4428
     */
4429 113
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4430
    {
4431
        // init
4432 113
        $non_displayables = [];
4433
4434
        // every control character except newline (dec 10),
4435
        // carriage return (dec 13) and horizontal tab (dec 09)
4436 113
        if ($url_encoded) {
4437 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4438 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4439
        }
4440
4441 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4442
4443
        do {
4444 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4445 113
        } while ($count !== 0);
4446
4447 113
        return $str;
4448
    }
4449
4450
    /**
4451
     * Returns a new string with the prefix $substring removed, if present.
4452
     *
4453
     * @param string $str
4454
     * @param string $substring <p>The prefix to remove.</p>
4455
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4456
     *
4457
     * @return string string without the prefix $substring
4458
     */
4459 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4460
    {
4461 12
        if (self::str_starts_with($str, $substring)) {
4462 6
            return (string) self::substr(
4463 6
                $str,
4464 6
                (int) self::strlen($substring, $encoding),
4465 6
                null,
4466 6
                $encoding
4467
            );
4468
        }
4469
4470 6
        return $str;
4471
    }
4472
4473
    /**
4474
     * Returns a new string with the suffix $substring removed, if present.
4475
     *
4476
     * @param string $str
4477
     * @param string $substring <p>The suffix to remove.</p>
4478
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4479
     *
4480
     * @return string string having a $str without the suffix $substring
4481
     */
4482 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4483
    {
4484 12
        if (self::str_ends_with($str, $substring)) {
4485 6
            return (string) self::substr(
4486 6
                $str,
4487 6
                0,
4488 6
                self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4489
            );
4490
        }
4491
4492 6
        return $str;
4493
    }
4494
4495
    /**
4496
     * Replaces all occurrences of $search in $str by $replacement.
4497
     *
4498
     * @param string $str           <p>The input string.</p>
4499
     * @param string $search        <p>The needle to search for.</p>
4500
     * @param string $replacement   <p>The string to replace with.</p>
4501
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4502
     *
4503
     * @return string string after the replacements
4504
     */
4505 29
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4506
    {
4507 29
        if ($caseSensitive) {
4508 22
            return self::str_replace($search, $replacement, $str);
4509
        }
4510
4511 7
        return self::str_ireplace($search, $replacement, $str);
4512
    }
4513
4514
    /**
4515
     * Replaces all occurrences of $search in $str by $replacement.
4516
     *
4517
     * @param string       $str           <p>The input string.</p>
4518
     * @param array        $search        <p>The elements to search for.</p>
4519
     * @param array|string $replacement   <p>The string to replace with.</p>
4520
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4521
     *
4522
     * @return string string after the replacements
4523
     */
4524 30
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4525
    {
4526 30
        if ($caseSensitive) {
4527 23
            return self::str_replace($search, $replacement, $str);
4528
        }
4529
4530 7
        return self::str_ireplace($search, $replacement, $str);
4531
    }
4532
4533
    /**
4534
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4535
     *
4536
     * @param string $str                <p>The input string</p>
4537
     * @param string $replacementChar    <p>The replacement character.</p>
4538
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4539
     *
4540
     * @return string
4541
     */
4542 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4543
    {
4544 62
        if ($str === '') {
4545 9
            return '';
4546
        }
4547
4548 62
        if ($processInvalidUtf8 === true) {
4549 62
            $replacementCharHelper = $replacementChar;
4550 62
            if ($replacementChar === '') {
4551 62
                $replacementCharHelper = 'none';
4552
            }
4553
4554 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4555
                self::checkForSupport();
4556
            }
4557
4558 62
            if (self::$SUPPORT['mbstring'] === false) {
4559
                // if there is no native support for "mbstring",
4560
                // then we need to clean the string before ...
4561
                $str = self::clean($str);
4562
            }
4563
4564
            // always fallback via symfony polyfill
4565 62
            $save = \mb_substitute_character();
4566 62
            \mb_substitute_character($replacementCharHelper);
4567 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4568 62
            \mb_substitute_character($save);
4569
4570 62
            $str = \is_string($strTmp) ? $strTmp : '';
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4571
        }
4572
4573 62
        return \str_replace(
4574
            [
4575 62
                "\xEF\xBF\xBD",
4576
                '�',
4577
            ],
4578
            [
4579 62
                $replacementChar,
4580 62
                $replacementChar,
4581
            ],
4582 62
            $str
4583
        );
4584
    }
4585
4586
    /**
4587
     * Strip whitespace or other characters from end of a UTF-8 string.
4588
     *
4589
     * @param string $str   <p>The string to be trimmed.</p>
4590
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4591
     *
4592
     * @return string the string with unwanted characters stripped from the right
4593
     */
4594 22
    public static function rtrim(string $str = '', $chars = \INF): string
4595
    {
4596 22
        if ($str === '') {
4597 3
            return '';
4598
        }
4599
4600
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4601 21
        if ($chars === \INF || !$chars) {
4602 16
            $pattern = "[\pZ\pC]+\$";
4603
        } else {
4604 8
            $chars = \preg_quote($chars, '/');
4605 8
            $pattern = "[${chars}]+\$";
4606
        }
4607
4608 21
        return self::regex_replace($str, $pattern, '', '', '/');
4609
    }
4610
4611
    /**
4612
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4613
     */
4614 2
    public static function showSupport()
4615
    {
4616 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4617
            self::checkForSupport();
4618
        }
4619
4620 2
        echo '<pre>';
4621 2
        foreach (self::$SUPPORT as $key => $value) {
4622 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4623
        }
4624 2
        echo '</pre>';
4625 2
    }
4626
4627
    /**
4628
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4629
     *
4630
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4631
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4632
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4633
     *
4634
     * @return string the HTML numbered entity
4635
     */
4636 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4637
    {
4638 2
        if ($char === '') {
4639 2
            return '';
4640
        }
4641
4642
        if (
4643 2
            $keepAsciiChars === true
4644
            &&
4645 2
            self::is_ascii($char) === true
4646
        ) {
4647 2
            return $char;
4648
        }
4649
4650 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4651 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4652
        }
4653
4654 2
        return '&#' . self::ord($char, $encoding) . ';';
4655
    }
4656
4657
    /**
4658
     * @param string $str
4659
     * @param int    $tabLength
4660
     *
4661
     * @return string
4662
     */
4663 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4664
    {
4665 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4666
    }
4667
4668
    /**
4669
     * Convert a string to an array of Unicode characters.
4670
     *
4671
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4672
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4673
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4674
     *
4675
     * @return string[] an array containing chunks of the string
4676
     */
4677 87
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4678
    {
4679 87
        if ($length <= 0) {
4680 3
            return [];
4681
        }
4682
4683 86
        if (\is_array($str) === true) {
4684 2
            foreach ($str as $k => $v) {
4685 2
                $str[$k] = self::split($v, $length);
4686
            }
4687
4688 2
            return $str;
4689
        }
4690
4691
        // init
4692 86
        $str = (string) $str;
4693
4694 86
        if ($str === '') {
4695 13
            return [];
4696
        }
4697
4698
        // init
4699 83
        $ret = [];
4700
4701 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4702
            self::checkForSupport();
4703
        }
4704
4705 83
        if ($cleanUtf8 === true) {
4706 19
            $str = self::clean($str);
4707
        }
4708
4709 83
        if (self::$SUPPORT['pcre_utf8'] === true) {
4710 79
            \preg_match_all('/./us', $str, $retArray);
4711 79
            if (isset($retArray[0])) {
4712 79
                $ret = $retArray[0];
4713
            }
4714 79
            unset($retArray);
4715
        } else {
4716
4717
            // fallback
4718
4719 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4720
                self::checkForSupport();
4721
            }
4722
4723 8
            $len = self::strlen_in_byte($str);
4724
4725
            /** @noinspection ForeachInvariantsInspection */
4726 8
            for ($i = 0; $i < $len; ++$i) {
4727 8
                if (($str[$i] & "\x80") === "\x00") {
4728 8
                    $ret[] = $str[$i];
4729
                } elseif (
4730 8
                    isset($str[$i + 1])
4731
                    &&
4732 8
                    ($str[$i] & "\xE0") === "\xC0"
4733
                ) {
4734 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4735 4
                        $ret[] = $str[$i] . $str[$i + 1];
4736
4737 4
                        ++$i;
4738
                    }
4739
                } elseif (
4740 6
                    isset($str[$i + 2])
4741
                    &&
4742 6
                    ($str[$i] & "\xF0") === "\xE0"
4743
                ) {
4744
                    if (
4745 6
                        ($str[$i + 1] & "\xC0") === "\x80"
4746
                        &&
4747 6
                        ($str[$i + 2] & "\xC0") === "\x80"
4748
                    ) {
4749 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4750
4751 6
                        $i += 2;
4752
                    }
4753
                } elseif (
4754
                    isset($str[$i + 3])
4755
                    &&
4756
                    ($str[$i] & "\xF8") === "\xF0"
4757
                ) {
4758
                    if (
4759
                        ($str[$i + 1] & "\xC0") === "\x80"
4760
                        &&
4761
                        ($str[$i + 2] & "\xC0") === "\x80"
4762
                        &&
4763
                        ($str[$i + 3] & "\xC0") === "\x80"
4764
                    ) {
4765
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4766
4767
                        $i += 3;
4768
                    }
4769
                }
4770
            }
4771
        }
4772
4773 83
        if ($length > 1) {
4774 11
            $ret = \array_chunk($ret, $length);
4775
4776 11
            return \array_map(
4777
                static function ($item) {
4778 11
                    return \implode('', $item);
4779 11
                },
4780 11
                $ret
4781
            );
4782
        }
4783
4784 76
        if (isset($ret[0]) && $ret[0] === '') {
4785
            return [];
4786
        }
4787
4788 76
        return $ret;
4789
    }
4790
4791
    /**
4792
     * Returns a camelCase version of the string. Trims surrounding spaces,
4793
     * capitalizes letters following digits, spaces, dashes and underscores,
4794
     * and removes spaces, dashes, as well as underscores.
4795
     *
4796
     * @param string      $str                   <p>The input string.</p>
4797
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
4798
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4799
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4800
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4801
     *
4802
     * @return string
4803
     */
4804 32
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
4805
    {
4806 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4807 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4808
4809 32
        $str = (string) \preg_replace_callback(
4810 32
            '/[-_\s]+(.)?/u',
4811
            static function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
4812 27
                if (isset($match[1])) {
4813 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4814
                }
4815
4816 1
                return '';
4817 32
            },
4818 32
            $str
4819
        );
4820
4821 32
        return (string) \preg_replace_callback(
4822 32
            '/[\d]+(.)?/u',
4823
            static function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
4824 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4825 32
            },
4826 32
            $str
4827
        );
4828
    }
4829
4830
    /**
4831
     * Returns the string with the first letter of each word capitalized,
4832
     * except for when the word is a name which shouldn't be capitalized.
4833
     *
4834
     * @param string $str
4835
     *
4836
     * @return string string with $str capitalized
4837
     */
4838 1
    public static function str_capitalize_name(string $str): string
4839
    {
4840 1
        $str = self::collapse_whitespace($str);
4841
4842 1
        $str = self::str_capitalize_name_helper($str, ' ');
4843
4844 1
        return self::str_capitalize_name_helper($str, '-');
4845
    }
4846
4847
    /**
4848
     * Returns true if the string contains $needle, false otherwise. By default
4849
     * the comparison is case-sensitive, but can be made insensitive by setting
4850
     * $caseSensitive to false.
4851
     *
4852
     * @param string $haystack      <p>The input string.</p>
4853
     * @param string $needle        <p>Substring to look for.</p>
4854
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4855
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4856
     *
4857
     * @return bool whether or not $haystack contains $needle
4858
     */
4859 106
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4860
    {
4861 106
        if ($haystack === '' || $needle === '') {
4862 1
            return false;
4863
        }
4864
4865
        // only a fallback to prevent BC in the api ...
4866 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4867 2
            $encoding = (string) $caseSensitive;
4868
        }
4869
4870 105
        if ($caseSensitive) {
4871 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
4872
        }
4873
4874 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
4875
    }
4876
4877
    /**
4878
     * Returns true if the string contains all $needles, false otherwise. By
4879
     * default the comparison is case-sensitive, but can be made insensitive by
4880
     * setting $caseSensitive to false.
4881
     *
4882
     * @param string $haystack      <p>The input string.</p>
4883
     * @param array  $needles       <p>SubStrings to look for.</p>
4884
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4885
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4886
     *
4887
     * @return bool whether or not $haystack contains $needle
4888
     */
4889 44
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4890
    {
4891 44
        if ($haystack === '') {
4892
            return false;
4893
        }
4894
4895 44
        if (empty($needles)) {
4896 1
            return false;
4897
        }
4898
4899
        // only a fallback to prevent BC in the api ...
4900 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4901 1
            $encoding = (string) $caseSensitive;
4902
        }
4903
4904 43
        foreach ($needles as $needle) {
4905 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4906 43
                return false;
4907
            }
4908
        }
4909
4910 24
        return true;
4911
    }
4912
4913
    /**
4914
     * Returns true if the string contains any $needles, false otherwise. By
4915
     * default the comparison is case-sensitive, but can be made insensitive by
4916
     * setting $caseSensitive to false.
4917
     *
4918
     * @param string $haystack      <p>The input string.</p>
4919
     * @param array  $needles       <p>SubStrings to look for.</p>
4920
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4921
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4922
     *
4923
     * @return bool
4924
     *              Whether or not $str contains $needle
4925
     */
4926 43
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4927
    {
4928 43
        if (empty($needles)) {
4929 1
            return false;
4930
        }
4931
4932 42
        foreach ($needles as $needle) {
4933 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4934 42
                return true;
4935
            }
4936
        }
4937
4938 18
        return false;
4939
    }
4940
4941
    /**
4942
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
4943
     * inserted before uppercase characters (with the exception of the first
4944
     * character of the string), and in place of spaces as well as underscores.
4945
     *
4946
     * @param string $str      <p>The input string.</p>
4947
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4948
     *
4949
     * @return string
4950
     */
4951 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4952
    {
4953 19
        return self::str_delimit($str, '-', $encoding);
4954
    }
4955
4956
    /**
4957
     * Returns a lowercase and trimmed string separated by the given delimiter.
4958
     * Delimiters are inserted before uppercase characters (with the exception
4959
     * of the first character of the string), and in place of spaces, dashes,
4960
     * and underscores. Alpha delimiters are not converted to lowercase.
4961
     *
4962
     * @param string      $str                   <p>The input string.</p>
4963
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
4964
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
4965
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4966
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
4967
     *                                           tr</p>
4968
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
4969
     *                                           ß</p>
4970
     *
4971
     * @return string
4972
     */
4973 49
    public static function str_delimit(
4974
        string $str,
4975
        string $delimiter,
4976
        string $encoding = 'UTF-8',
4977
        bool $cleanUtf8 = false,
4978
        string $lang = null,
4979
        bool $tryToKeepStringLength = false
4980
    ): string {
4981 49
        $str = self::trim($str);
4982
4983 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
4984
4985 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4986
4987 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
4988
    }
4989
4990
    /**
4991
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4992
     *
4993
     * @param string $str <p>The input string.</p>
4994
     *
4995
     * @return false|string
4996
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4997
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4998
     */
4999 30
    public static function str_detect_encoding($str)
5000
    {
5001
        // init
5002 30
        $str = (string) $str;
5003
5004
        //
5005
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5006
        //
5007
5008 30
        if (self::is_binary($str, true) === true) {
5009 10
            $isUtf16 = self::is_utf16($str, false);
5010 10
            if ($isUtf16 === 1) {
5011 2
                return 'UTF-16LE';
5012
            }
5013 10
            if ($isUtf16 === 2) {
5014 2
                return 'UTF-16BE';
5015
            }
5016
5017 8
            $isUtf32 = self::is_utf32($str, false);
5018 8
            if ($isUtf32 === 1) {
5019
                return 'UTF-32LE';
5020
            }
5021 8
            if ($isUtf32 === 2) {
5022
                return 'UTF-32BE';
5023
            }
5024
5025
            // is binary but not "UTF-16" or "UTF-32"
5026 8
            return false;
5027
        }
5028
5029
        //
5030
        // 2.) simple check for ASCII chars
5031
        //
5032
5033 26
        if (self::is_ascii($str) === true) {
5034 9
            return 'ASCII';
5035
        }
5036
5037
        //
5038
        // 3.) simple check for UTF-8 chars
5039
        //
5040
5041 26
        if (self::is_utf8($str) === true) {
5042 18
            return 'UTF-8';
5043
        }
5044
5045
        //
5046
        // 4.) check via "mb_detect_encoding()"
5047
        //
5048
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5049
5050
        $detectOrder = [
5051 16
            'ISO-8859-1',
5052
            'ISO-8859-2',
5053
            'ISO-8859-3',
5054
            'ISO-8859-4',
5055
            'ISO-8859-5',
5056
            'ISO-8859-6',
5057
            'ISO-8859-7',
5058
            'ISO-8859-8',
5059
            'ISO-8859-9',
5060
            'ISO-8859-10',
5061
            'ISO-8859-13',
5062
            'ISO-8859-14',
5063
            'ISO-8859-15',
5064
            'ISO-8859-16',
5065
            'WINDOWS-1251',
5066
            'WINDOWS-1252',
5067
            'WINDOWS-1254',
5068
            'CP932',
5069
            'CP936',
5070
            'CP950',
5071
            'CP866',
5072
            'CP850',
5073
            'CP51932',
5074
            'CP50220',
5075
            'CP50221',
5076
            'CP50222',
5077
            'ISO-2022-JP',
5078
            'ISO-2022-KR',
5079
            'JIS',
5080
            'JIS-ms',
5081
            'EUC-CN',
5082
            'EUC-JP',
5083
        ];
5084
5085 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5086
            self::checkForSupport();
5087
        }
5088
5089 16
        if (self::$SUPPORT['mbstring'] === true) {
5090
            // info: do not use the symfony polyfill here
5091 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5092 16
            if ($encoding) {
5093 16
                return $encoding;
5094
            }
5095
        }
5096
5097
        //
5098
        // 5.) check via "iconv()"
5099
        //
5100
5101
        if (self::$ENCODINGS === null) {
5102
            self::$ENCODINGS = self::getData('encodings');
5103
        }
5104
5105
        foreach (self::$ENCODINGS as $encodingTmp) {
5106
            // INFO: //IGNORE but still throw notice
5107
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5108
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5109
                return $encodingTmp;
5110
            }
5111
        }
5112
5113
        return false;
5114
    }
5115
5116
    /**
5117
     * Check if the string ends with the given substring.
5118
     *
5119
     * @param string $haystack <p>The string to search in.</p>
5120
     * @param string $needle   <p>The substring to search for.</p>
5121
     *
5122
     * @return bool
5123
     */
5124 40
    public static function str_ends_with(string $haystack, string $needle): bool
5125
    {
5126 40
        if ($haystack === '' || $needle === '') {
5127 4
            return false;
5128
        }
5129
5130 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5131
    }
5132
5133
    /**
5134
     * Returns true if the string ends with any of $substrings, false otherwise.
5135
     *
5136
     * - case-sensitive
5137
     *
5138
     * @param string   $str        <p>The input string.</p>
5139
     * @param string[] $substrings <p>Substrings to look for.</p>
5140
     *
5141
     * @return bool whether or not $str ends with $substring
5142
     */
5143 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5144
    {
5145 7
        if (empty($substrings)) {
5146
            return false;
5147
        }
5148
5149 7
        foreach ($substrings as $substring) {
5150 7
            if (self::str_ends_with($str, $substring)) {
5151 7
                return true;
5152
            }
5153
        }
5154
5155 6
        return false;
5156
    }
5157
5158
    /**
5159
     * Ensures that the string begins with $substring. If it doesn't, it's
5160
     * prepended.
5161
     *
5162
     * @param string $str       <p>The input string.</p>
5163
     * @param string $substring <p>The substring to add if not present.</p>
5164
     *
5165
     * @return string
5166
     */
5167 10
    public static function str_ensure_left(string $str, string $substring): string
5168
    {
5169 10
        if (!self::str_starts_with($str, $substring)) {
5170 4
            $str = $substring . $str;
5171
        }
5172
5173 10
        return $str;
5174
    }
5175
5176
    /**
5177
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5178
     *
5179
     * @param string $str       <p>The input string.</p>
5180
     * @param string $substring <p>The substring to add if not present.</p>
5181
     *
5182
     * @return string
5183
     */
5184 10
    public static function str_ensure_right(string $str, string $substring): string
5185
    {
5186 10
        if (!self::str_ends_with($str, $substring)) {
5187 4
            $str .= $substring;
5188
        }
5189
5190 10
        return $str;
5191
    }
5192
5193
    /**
5194
     * Capitalizes the first word of the string, replaces underscores with
5195
     * spaces, and strips '_id'.
5196
     *
5197
     * @param string $str
5198
     *
5199
     * @return string
5200
     */
5201 3
    public static function str_humanize($str): string
5202
    {
5203 3
        $str = self::str_replace(
5204
            [
5205 3
                '_id',
5206
                '_',
5207
            ],
5208
            [
5209 3
                '',
5210
                ' ',
5211
            ],
5212 3
            $str
5213
        );
5214
5215 3
        return self::ucfirst(self::trim($str));
5216
    }
5217
5218
    /**
5219
     * Check if the string ends with the given substring, case insensitive.
5220
     *
5221
     * @param string $haystack <p>The string to search in.</p>
5222
     * @param string $needle   <p>The substring to search for.</p>
5223
     *
5224
     * @return bool
5225
     */
5226 12
    public static function str_iends_with(string $haystack, string $needle): bool
5227
    {
5228 12
        if ($haystack === '' || $needle === '') {
5229 2
            return false;
5230
        }
5231
5232 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5233
    }
5234
5235
    /**
5236
     * Returns true if the string ends with any of $substrings, false otherwise.
5237
     *
5238
     * - case-insensitive
5239
     *
5240
     * @param string   $str        <p>The input string.</p>
5241
     * @param string[] $substrings <p>Substrings to look for.</p>
5242
     *
5243
     * @return bool whether or not $str ends with $substring
5244
     */
5245 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5246
    {
5247 4
        if (empty($substrings)) {
5248
            return false;
5249
        }
5250
5251 4
        foreach ($substrings as $substring) {
5252 4
            if (self::str_iends_with($str, $substring)) {
5253 4
                return true;
5254
            }
5255
        }
5256
5257
        return false;
5258
    }
5259
5260
    /**
5261
     * Returns the index of the first occurrence of $needle in the string,
5262
     * and false if not found. Accepts an optional offset from which to begin
5263
     * the search.
5264
     *
5265
     * @param string $str      <p>The input string.</p>
5266
     * @param string $needle   <p>Substring to look for.</p>
5267
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5268
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5269
     *
5270
     * @return false|int
5271
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5272
     */
5273 2
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5274
    {
5275 2
        return self::stripos(
5276 2
            $str,
5277 2
            $needle,
5278 2
            $offset,
5279 2
            $encoding
5280
        );
5281
    }
5282
5283
    /**
5284
     * Returns the index of the last occurrence of $needle in the string,
5285
     * and false if not found. Accepts an optional offset from which to begin
5286
     * the search. Offsets may be negative to count from the last character
5287
     * in the string.
5288
     *
5289
     * @param string $str      <p>The input string.</p>
5290
     * @param string $needle   <p>Substring to look for.</p>
5291
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5292
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5293
     *
5294
     * @return false|int
5295
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5296
     */
5297 2
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5298
    {
5299 2
        return self::strripos(
5300 2
            $str,
5301 2
            $needle,
5302 2
            $offset,
5303 2
            $encoding
5304
        );
5305
    }
5306
5307
    /**
5308
     * Returns the index of the first occurrence of $needle in the string,
5309
     * and false if not found. Accepts an optional offset from which to begin
5310
     * the search.
5311
     *
5312
     * @param string $str      <p>The input string.</p>
5313
     * @param string $needle   <p>Substring to look for.</p>
5314
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5315
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5316
     *
5317
     * @return false|int
5318
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5319
     */
5320 12
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5321
    {
5322 12
        return self::strpos(
5323 12
            $str,
5324 12
            $needle,
5325 12
            $offset,
5326 12
            $encoding
5327
        );
5328
    }
5329
5330
    /**
5331
     * Returns the index of the last occurrence of $needle in the string,
5332
     * and false if not found. Accepts an optional offset from which to begin
5333
     * the search. Offsets may be negative to count from the last character
5334
     * in the string.
5335
     *
5336
     * @param string $str      <p>The input string.</p>
5337
     * @param string $needle   <p>Substring to look for.</p>
5338
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5339
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5340
     *
5341
     * @return false|int
5342
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5343
     */
5344 12
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5345
    {
5346 12
        return self::strrpos(
5347 12
            $str,
5348 12
            $needle,
5349 12
            $offset,
5350 12
            $encoding
5351
        );
5352
    }
5353
5354
    /**
5355
     * Inserts $substring into the string at the $index provided.
5356
     *
5357
     * @param string $str       <p>The input string.</p>
5358
     * @param string $substring <p>String to be inserted.</p>
5359
     * @param int    $index     <p>The index at which to insert the substring.</p>
5360
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5361
     *
5362
     * @return string
5363
     */
5364 8
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5365
    {
5366 8
        $len = (int) self::strlen($str, $encoding);
5367
5368 8
        if ($index > $len) {
5369 1
            return $str;
5370
        }
5371
5372 7
        $start = self::substr($str, 0, $index, $encoding);
5373 7
        $end = self::substr($str, $index, $len, $encoding);
5374
5375 7
        return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5375
        return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5375
        return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5376
    }
5377
5378
    /**
5379
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5380
     *
5381
     * @see  http://php.net/manual/en/function.str-ireplace.php
5382
     *
5383
     * @param mixed $search  <p>
5384
     *                       Every replacement with search array is
5385
     *                       performed on the result of previous replacement.
5386
     *                       </p>
5387
     * @param mixed $replace <p>
5388
     *                       </p>
5389
     * @param mixed $subject <p>
5390
     *                       If subject is an array, then the search and
5391
     *                       replace is performed with every entry of
5392
     *                       subject, and the return value is an array as
5393
     *                       well.
5394
     *                       </p>
5395
     * @param int   $count   [optional] <p>
5396
     *                       The number of matched and replaced needles will
5397
     *                       be returned in count which is passed by
5398
     *                       reference.
5399
     *                       </p>
5400
     *
5401
     * @return mixed a string or an array of replacements
5402
     */
5403 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5404
    {
5405 29
        $search = (array) $search;
5406
5407
        /** @noinspection AlterInForeachInspection */
5408 29
        foreach ($search as &$s) {
5409 29
            $s = (string) $s;
5410 29
            if ($s === '') {
5411 6
                $s = '/^(?<=.)$/';
5412
            } else {
5413 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5414
            }
5415
        }
5416
5417 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5418 29
        $count = $replace; // used as reference parameter
5419
5420 29
        return $subject;
5421
    }
5422
5423
    /**
5424
     * Replaces $search from the beginning of string with $replacement.
5425
     *
5426
     * @param string $str         <p>The input string.</p>
5427
     * @param string $search      <p>The string to search for.</p>
5428
     * @param string $replacement <p>The replacement.</p>
5429
     *
5430
     * @return string string after the replacements
5431
     */
5432 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5433
    {
5434 17
        if ($str === '') {
5435 4
            if ($replacement === '') {
5436 2
                return '';
5437
            }
5438
5439 2
            if ($search === '') {
5440 2
                return $replacement;
5441
            }
5442
        }
5443
5444 13
        if ($search === '') {
5445 2
            return $str . $replacement;
5446
        }
5447
5448 11
        if (\stripos($str, $search) === 0) {
5449 10
            return $replacement . \substr($str, \strlen($search));
5450
        }
5451
5452 1
        return $str;
5453
    }
5454
5455
    /**
5456
     * Replaces $search from the ending of string with $replacement.
5457
     *
5458
     * @param string $str         <p>The input string.</p>
5459
     * @param string $search      <p>The string to search for.</p>
5460
     * @param string $replacement <p>The replacement.</p>
5461
     *
5462
     * @return string string after the replacements
5463
     */
5464 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5465
    {
5466 17
        if ($str === '') {
5467 4
            if ($replacement === '') {
5468 2
                return '';
5469
            }
5470
5471 2
            if ($search === '') {
5472 2
                return $replacement;
5473
            }
5474
        }
5475
5476 13
        if ($search === '') {
5477 2
            return $str . $replacement;
5478
        }
5479
5480 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5481 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5482
        }
5483
5484 11
        return $str;
5485
    }
5486
5487
    /**
5488
     * Check if the string starts with the given substring, case insensitive.
5489
     *
5490
     * @param string $haystack <p>The string to search in.</p>
5491
     * @param string $needle   <p>The substring to search for.</p>
5492
     *
5493
     * @return bool
5494
     */
5495 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5496
    {
5497 12
        if ($haystack === '' || $needle === '') {
5498 2
            return false;
5499
        }
5500
5501 12
        return self::stripos($haystack, $needle) === 0;
5502
    }
5503
5504
    /**
5505
     * Returns true if the string begins with any of $substrings, false otherwise.
5506
     *
5507
     * - case-insensitive
5508
     *
5509
     * @param string $str        <p>The input string.</p>
5510
     * @param array  $substrings <p>Substrings to look for.</p>
5511
     *
5512
     * @return bool whether or not $str starts with $substring
5513
     */
5514 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5515
    {
5516 4
        if ($str === '') {
5517
            return false;
5518
        }
5519
5520 4
        if (empty($substrings)) {
5521
            return false;
5522
        }
5523
5524 4
        foreach ($substrings as $substring) {
5525 4
            if (self::str_istarts_with($str, $substring)) {
5526 4
                return true;
5527
            }
5528
        }
5529
5530
        return false;
5531
    }
5532
5533
    /**
5534
     * Gets the substring after the first occurrence of a separator.
5535
     *
5536
     * @param string $str       <p>The input string.</p>
5537
     * @param string $separator <p>The string separator.</p>
5538
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5539
     *
5540
     * @return string
5541
     */
5542 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5543
    {
5544
        if (
5545 1
            $separator === ''
5546
            ||
5547 1
            $str === ''
5548
        ) {
5549 1
            return '';
5550
        }
5551
5552 1
        $offset = self::str_iindex_first($str, $separator);
5553 1
        if ($offset === false) {
5554 1
            return '';
5555
        }
5556
5557 1
        return (string) self::substr(
5558 1
            $str,
5559 1
            $offset + self::strlen($separator, $encoding),
5560 1
            null,
5561 1
            $encoding
5562
        );
5563
    }
5564
5565
    /**
5566
     * Gets the substring after the last occurrence of a separator.
5567
     *
5568
     * @param string $str       <p>The input string.</p>
5569
     * @param string $separator <p>The string separator.</p>
5570
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5571
     *
5572
     * @return string
5573
     */
5574 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5575
    {
5576
        if (
5577 1
            $separator === ''
5578
            ||
5579 1
            $str === ''
5580
        ) {
5581 1
            return '';
5582
        }
5583
5584 1
        $offset = self::str_iindex_last($str, $separator);
5585 1
        if ($offset === false) {
5586 1
            return '';
5587
        }
5588
5589 1
        return (string) self::substr(
5590 1
            $str,
5591 1
            $offset + self::strlen($separator, $encoding),
5592 1
            null,
5593 1
            $encoding
5594
        );
5595
    }
5596
5597
    /**
5598
     * Gets the substring before the first occurrence of a separator.
5599
     *
5600
     * @param string $str       <p>The input string.</p>
5601
     * @param string $separator <p>The string separator.</p>
5602
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5603
     *
5604
     * @return string
5605
     */
5606 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5607
    {
5608
        if (
5609 1
            $separator === ''
5610
            ||
5611 1
            $str === ''
5612
        ) {
5613 1
            return '';
5614
        }
5615
5616 1
        $offset = self::str_iindex_first($str, $separator);
5617 1
        if ($offset === false) {
5618 1
            return '';
5619
        }
5620
5621 1
        return (string) self::substr($str, 0, $offset, $encoding);
5622
    }
5623
5624
    /**
5625
     * Gets the substring before the last occurrence of a separator.
5626
     *
5627
     * @param string $str       <p>The input string.</p>
5628
     * @param string $separator <p>The string separator.</p>
5629
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5630
     *
5631
     * @return string
5632
     */
5633 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5634
    {
5635
        if (
5636 1
            $separator === ''
5637
            ||
5638 1
            $str === ''
5639
        ) {
5640 1
            return '';
5641
        }
5642
5643 1
        $offset = self::str_iindex_last($str, $separator);
5644 1
        if ($offset === false) {
5645 1
            return '';
5646
        }
5647
5648 1
        return (string) self::substr($str, 0, $offset, $encoding);
5649
    }
5650
5651
    /**
5652
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5653
     *
5654
     * @param string $str          <p>The input string.</p>
5655
     * @param string $needle       <p>The string to look for.</p>
5656
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5657
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5658
     *
5659
     * @return string
5660
     */
5661 2
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5662
    {
5663
        if (
5664 2
            $needle === ''
5665
            ||
5666 2
            $str === ''
5667
        ) {
5668 2
            return '';
5669
        }
5670
5671 2
        $part = self::stristr(
5672 2
            $str,
5673 2
            $needle,
5674 2
            $beforeNeedle,
5675 2
            $encoding
5676
        );
5677 2
        if ($part === false) {
5678 2
            return '';
5679
        }
5680
5681 2
        return $part;
5682
    }
5683
5684
    /**
5685
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5686
     *
5687
     * @param string $str          <p>The input string.</p>
5688
     * @param string $needle       <p>The string to look for.</p>
5689
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5690
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5691
     *
5692
     * @return string
5693
     */
5694 1
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5695
    {
5696
        if (
5697 1
            $needle === ''
5698
            ||
5699 1
            $str === ''
5700
        ) {
5701 1
            return '';
5702
        }
5703
5704 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5705 1
        if ($part === false) {
5706 1
            return '';
5707
        }
5708
5709 1
        return $part;
5710
    }
5711
5712
    /**
5713
     * Returns the last $n characters of the string.
5714
     *
5715
     * @param string $str      <p>The input string.</p>
5716
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5717
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5718
     *
5719
     * @return string
5720
     */
5721 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5722
    {
5723 12
        if ($n <= 0) {
5724 4
            return '';
5725
        }
5726
5727 8
        $returnTmp = self::substr($str, -$n, null, $encoding);
5728
5729 8
        return $returnTmp === false ? '' : $returnTmp;
5730
    }
5731
5732
    /**
5733
     * Limit the number of characters in a string.
5734
     *
5735
     * @param string $str      <p>The input string.</p>
5736
     * @param int    $length   [optional] <p>Default: 100</p>
5737
     * @param string $strAddOn [optional] <p>Default: …</p>
5738
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5739
     *
5740
     * @return string
5741
     */
5742 2
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5743
    {
5744 2
        if ($str === '') {
5745 2
            return '';
5746
        }
5747
5748 2
        if ($length <= 0) {
5749 2
            return '';
5750
        }
5751
5752 2
        if (self::strlen($str, $encoding) <= $length) {
5753 2
            return $str;
5754
        }
5755
5756 2
        return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5756
        return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5757
    }
5758
5759
    /**
5760
     * Limit the number of characters in a string, but also after the next word.
5761
     *
5762
     * @param string $str      <p>The input string.</p>
5763
     * @param int    $length   [optional] <p>Default: 100</p>
5764
     * @param string $strAddOn [optional] <p>Default: …</p>
5765
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5766
     *
5767
     * @return string
5768
     */
5769 6
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5770
    {
5771 6
        if ($str === '') {
5772 2
            return '';
5773
        }
5774
5775 6
        if ($length <= 0) {
5776 2
            return '';
5777
        }
5778
5779 6
        if (self::strlen($str, $encoding) <= $length) {
5780 2
            return $str;
5781
        }
5782
5783 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5784 5
            return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5784
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5785
        }
5786
5787 3
        $str = (string) self::substr($str, 0, $length, $encoding);
5788 3
        $array = \explode(' ', $str);
5789 3
        \array_pop($array);
5790 3
        $new_str = \implode(' ', $array);
5791
5792 3
        if ($new_str === '') {
5793 2
            $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5794
        } else {
5795 3
            $str = $new_str . $strAddOn;
5796
        }
5797
5798 3
        return $str;
5799
    }
5800
5801
    /**
5802
     * Returns the longest common prefix between the string and $otherStr.
5803
     *
5804
     * @param string $str      <p>The input sting.</p>
5805
     * @param string $otherStr <p>Second string for comparison.</p>
5806
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5807
     *
5808
     * @return string
5809
     */
5810 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5811
    {
5812 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5813
5814 10
        $longestCommonPrefix = '';
5815 10
        for ($i = 0; $i < $maxLength; ++$i) {
5816 8
            $char = self::substr($str, $i, 1, $encoding);
5817
5818 8
            if ($char === self::substr($otherStr, $i, 1, $encoding)) {
5819 6
                $longestCommonPrefix .= $char;
5820
            } else {
5821 6
                break;
5822
            }
5823
        }
5824
5825 10
        return $longestCommonPrefix;
5826
    }
5827
5828
    /**
5829
     * Returns the longest common substring between the string and $otherStr.
5830
     * In the case of ties, it returns that which occurs first.
5831
     *
5832
     * @param string $str
5833
     * @param string $otherStr <p>Second string for comparison.</p>
5834
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5835
     *
5836
     * @return string string with its $str being the longest common substring
5837
     */
5838 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5839
    {
5840
        // Uses dynamic programming to solve
5841
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5842 11
        $strLength = self::strlen($str, $encoding);
5843 11
        $otherLength = self::strlen($otherStr, $encoding);
5844
5845
        // Return if either string is empty
5846 11
        if ($strLength === 0 || $otherLength === 0) {
5847 2
            return '';
5848
        }
5849
5850 9
        $len = 0;
5851 9
        $end = 0;
5852 9
        $table = \array_fill(
5853 9
            0,
5854 9
            $strLength + 1,
5855 9
            \array_fill(0, $otherLength + 1, 0)
5856
        );
5857
5858 9
        for ($i = 1; $i <= $strLength; ++$i) {
5859 9
            for ($j = 1; $j <= $otherLength; ++$j) {
5860 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
5861 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5862
5863 9
                if ($strChar === $otherChar) {
5864 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5865 8
                    if ($table[$i][$j] > $len) {
5866 8
                        $len = $table[$i][$j];
5867 8
                        $end = $i;
5868
                    }
5869
                } else {
5870 9
                    $table[$i][$j] = 0;
5871
                }
5872
            }
5873
        }
5874
5875 9
        $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5876
5877 9
        return $returnTmp === false ? '' : $returnTmp;
5878
    }
5879
5880
    /**
5881
     * Returns the longest common suffix between the string and $otherStr.
5882
     *
5883
     * @param string $str
5884
     * @param string $otherStr <p>Second string for comparison.</p>
5885
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5886
     *
5887
     * @return string
5888
     */
5889 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5890
    {
5891 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5892
5893 10
        $longestCommonSuffix = '';
5894 10
        for ($i = 1; $i <= $maxLength; ++$i) {
5895 8
            $char = self::substr($str, -$i, 1, $encoding);
5896
5897 8
            if ($char === self::substr($otherStr, -$i, 1, $encoding)) {
5898 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5898
                $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5899
            } else {
5900 6
                break;
5901
            }
5902
        }
5903
5904 10
        return $longestCommonSuffix;
5905
    }
5906
5907
    /**
5908
     * Returns true if $str matches the supplied pattern, false otherwise.
5909
     *
5910
     * @param string $str     <p>The input string.</p>
5911
     * @param string $pattern <p>Regex pattern to match against.</p>
5912
     *
5913
     * @return bool whether or not $str matches the pattern
5914
     */
5915 126
    public static function str_matches_pattern(string $str, string $pattern): bool
5916
    {
5917 126
        return (bool) \preg_match('/' . $pattern . '/u', $str);
5918
    }
5919
5920
    /**
5921
     * Returns whether or not a character exists at an index. Offsets may be
5922
     * negative to count from the last character in the string. Implements
5923
     * part of the ArrayAccess interface.
5924
     *
5925
     * @param string $str      <p>The input string.</p>
5926
     * @param int    $offset   <p>The index to check.</p>
5927
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5928
     *
5929
     * @return bool whether or not the index exists
5930
     */
5931 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5932
    {
5933
        // init
5934 6
        $length = self::strlen($str, $encoding);
5935
5936 6
        if ($offset >= 0) {
5937 3
            return $length > $offset;
5938
        }
5939
5940 3
        return $length >= \abs($offset);
5941
    }
5942
5943
    /**
5944
     * Returns the character at the given index. Offsets may be negative to
5945
     * count from the last character in the string. Implements part of the
5946
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
5947
     * does not exist.
5948
     *
5949
     * @param string $str      <p>The input string.</p>
5950
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5951
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5952
     *
5953
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
5954
     *
5955
     * @return string the character at the specified index
5956
     */
5957 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5958
    {
5959
        // init
5960 2
        $length = self::strlen($str);
5961
5962
        if (
5963 2
            ($index >= 0 && $length <= $index)
5964
            ||
5965 2
            $length < \abs($index)
5966
        ) {
5967 1
            throw new \OutOfBoundsException('No character exists at the index');
5968
        }
5969
5970 1
        return self::char_at($str, $index, $encoding);
5971
    }
5972
5973
    /**
5974
     * Pad a UTF-8 string to given length with another string.
5975
     *
5976
     * @param string     $str        <p>The input string.</p>
5977
     * @param int        $pad_length <p>The length of return string.</p>
5978
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
5979
     * @param int|string $pad_type   [optional] <p>
5980
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
5981
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
5982
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
5983
     *                               </p>
5984
     * @param string     $encoding   [optional] <p>Default: UTF-8</p>
5985
     *
5986
     * @return string returns the padded string
5987
     */
5988 41
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5989
    {
5990 41
        if ($str === '') {
5991
            return '';
5992
        }
5993
5994 41
        if ($pad_type !== (int) $pad_type) {
5995 13
            if ($pad_type === 'left') {
5996 3
                $pad_type = \STR_PAD_LEFT;
5997 10
            } elseif ($pad_type === 'right') {
5998 6
                $pad_type = \STR_PAD_RIGHT;
5999 4
            } elseif ($pad_type === 'both') {
6000 3
                $pad_type = \STR_PAD_BOTH;
6001
            } else {
6002 1
                throw new \InvalidArgumentException(
6003 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6004
                );
6005
            }
6006
        }
6007
6008 40
        $str_length = self::strlen($str, $encoding);
6009
6010
        if (
6011 40
            $pad_length > 0
6012
            &&
6013 40
            $pad_length >= $str_length
6014
        ) {
6015 39
            $ps_length = self::strlen($pad_string, $encoding);
6016
6017 39
            $diff = ($pad_length - $str_length);
6018
6019
            switch ($pad_type) {
6020 39
                case \STR_PAD_LEFT:
6021 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6022 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6023 13
                    $post = '';
6024
6025 13
                    break;
6026
6027 29
                case \STR_PAD_BOTH:
6028 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6029 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6030 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6031 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6032
6033 14
                    break;
6034
6035 18
                case \STR_PAD_RIGHT:
6036
                default:
6037 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6038 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6039 18
                    $pre = '';
6040
            }
6041
6042 39
            return $pre . $str . $post;
6043
        }
6044
6045 4
        return $str;
6046
    }
6047
6048
    /**
6049
     * Returns a new string of a given length such that both sides of the
6050
     * string are padded. Alias for pad() with a $padType of 'both'.
6051
     *
6052
     * @param string $str
6053
     * @param int    $length   <p>Desired string length after padding.</p>
6054
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6055
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6056
     *
6057
     * @return string string with padding applied
6058
     */
6059 11
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6060
    {
6061 11
        $padding = $length - self::strlen($str, $encoding);
6062
6063 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6064
    }
6065
6066
    /**
6067
     * Returns a new string of a given length such that the beginning of the
6068
     * string is padded. Alias for pad() with a $padType of 'left'.
6069
     *
6070
     * @param string $str
6071
     * @param int    $length   <p>Desired string length after padding.</p>
6072
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6073
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6074
     *
6075
     * @return string string with left padding
6076
     */
6077 7
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6078
    {
6079 7
        return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6080
    }
6081
6082
    /**
6083
     * Returns a new string of a given length such that the end of the string
6084
     * is padded. Alias for pad() with a $padType of 'right'.
6085
     *
6086
     * @param string $str
6087
     * @param int    $length   <p>Desired string length after padding.</p>
6088
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6089
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6090
     *
6091
     * @return string string with right padding
6092
     */
6093 7
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6094
    {
6095 7
        return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6096
    }
6097
6098
    /**
6099
     * Repeat a string.
6100
     *
6101
     * @param string $str        <p>
6102
     *                           The string to be repeated.
6103
     *                           </p>
6104
     * @param int    $multiplier <p>
6105
     *                           Number of time the input string should be
6106
     *                           repeated.
6107
     *                           </p>
6108
     *                           <p>
6109
     *                           multiplier has to be greater than or equal to 0.
6110
     *                           If the multiplier is set to 0, the function
6111
     *                           will return an empty string.
6112
     *                           </p>
6113
     *
6114
     * @return string the repeated string
6115
     */
6116 9
    public static function str_repeat(string $str, int $multiplier): string
6117
    {
6118 9
        $str = self::filter($str);
6119
6120 9
        return \str_repeat($str, $multiplier);
6121
    }
6122
6123
    /**
6124
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6125
     *
6126
     * Replace all occurrences of the search string with the replacement string
6127
     *
6128
     * @see http://php.net/manual/en/function.str-replace.php
6129
     *
6130
     * @param mixed $search  <p>
6131
     *                       The value being searched for, otherwise known as the needle.
6132
     *                       An array may be used to designate multiple needles.
6133
     *                       </p>
6134
     * @param mixed $replace <p>
6135
     *                       The replacement value that replaces found search
6136
     *                       values. An array may be used to designate multiple replacements.
6137
     *                       </p>
6138
     * @param mixed $subject <p>
6139
     *                       The string or array being searched and replaced on,
6140
     *                       otherwise known as the haystack.
6141
     *                       </p>
6142
     *                       <p>
6143
     *                       If subject is an array, then the search and
6144
     *                       replace is performed with every entry of
6145
     *                       subject, and the return value is an array as
6146
     *                       well.
6147
     *                       </p>
6148
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6149
     *
6150
     * @return mixed this function returns a string or an array with the replaced values
6151
     */
6152 60
    public static function str_replace($search, $replace, $subject, int &$count = null)
6153
    {
6154 60
        return \str_replace($search, $replace, $subject, $count);
6155
    }
6156
6157
    /**
6158
     * Replaces $search from the beginning of string with $replacement.
6159
     *
6160
     * @param string $str         <p>The input string.</p>
6161
     * @param string $search      <p>The string to search for.</p>
6162
     * @param string $replacement <p>The replacement.</p>
6163
     *
6164
     * @return string string after the replacements
6165
     */
6166 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6167
    {
6168 17
        if ($str === '') {
6169 4
            if ($replacement === '') {
6170 2
                return '';
6171
            }
6172
6173 2
            if ($search === '') {
6174 2
                return $replacement;
6175
            }
6176
        }
6177
6178 13
        if ($search === '') {
6179 2
            return $str . $replacement;
6180
        }
6181
6182 11
        if (\strpos($str, $search) === 0) {
6183 9
            return $replacement . \substr($str, \strlen($search));
6184
        }
6185
6186 2
        return $str;
6187
    }
6188
6189
    /**
6190
     * Replaces $search from the ending of string with $replacement.
6191
     *
6192
     * @param string $str         <p>The input string.</p>
6193
     * @param string $search      <p>The string to search for.</p>
6194
     * @param string $replacement <p>The replacement.</p>
6195
     *
6196
     * @return string string after the replacements
6197
     */
6198 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6199
    {
6200 17
        if ($str === '') {
6201 4
            if ($replacement === '') {
6202 2
                return '';
6203
            }
6204
6205 2
            if ($search === '') {
6206 2
                return $replacement;
6207
            }
6208
        }
6209
6210 13
        if ($search === '') {
6211 2
            return $str . $replacement;
6212
        }
6213
6214 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6215 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6216
        }
6217
6218 11
        return $str;
6219
    }
6220
6221
    /**
6222
     * Replace the first "$search"-term with the "$replace"-term.
6223
     *
6224
     * @param string $search
6225
     * @param string $replace
6226
     * @param string $subject
6227
     *
6228
     * @return string
6229
     */
6230 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6231
    {
6232 2
        $pos = self::strpos($subject, $search);
6233 2
        if ($pos !== false) {
6234 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6235
        }
6236
6237 2
        return $subject;
6238
    }
6239
6240
    /**
6241
     * Replace the last "$search"-term with the "$replace"-term.
6242
     *
6243
     * @param string $search
6244
     * @param string $replace
6245
     * @param string $subject
6246
     *
6247
     * @return string
6248
     */
6249 2
    public static function str_replace_last(string $search, string $replace, string $subject): string
6250
    {
6251 2
        $pos = self::strrpos($subject, $search);
6252 2
        if ($pos !== false) {
6253 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6254
        }
6255
6256 2
        return $subject;
6257
    }
6258
6259
    /**
6260
     * Shuffles all the characters in the string.
6261
     *
6262
     * PS: uses random algorithm which is weak for cryptography purposes
6263
     *
6264
     * @param string $str <p>The input string</p>
6265
     *
6266
     * @return string the shuffled string
6267
     */
6268 5
    public static function str_shuffle(string $str): string
6269
    {
6270 5
        $indexes = \range(0, self::strlen($str) - 1);
6271
        /** @noinspection NonSecureShuffleUsageInspection */
6272 5
        \shuffle($indexes);
6273
6274 5
        $shuffledStr = '';
6275 5
        foreach ($indexes as $i) {
6276 5
            $shuffledStr .= self::substr($str, $i, 1);
6277
        }
6278
6279 5
        return $shuffledStr;
6280
    }
6281
6282
    /**
6283
     * Returns the substring beginning at $start, and up to, but not including
6284
     * the index specified by $end. If $end is omitted, the function extracts
6285
     * the remaining string. If $end is negative, it is computed from the end
6286
     * of the string.
6287
     *
6288
     * @param string $str
6289
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6290
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6291
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6292
     *
6293
     * @return false|string
6294
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6295
     *                      characters long, <b>FALSE</b> will be returned.
6296
     */
6297 18
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6298
    {
6299 18
        if ($end === null) {
6300 6
            $length = (int) self::strlen($str);
6301 12
        } elseif ($end >= 0 && $end <= $start) {
6302 4
            return '';
6303 8
        } elseif ($end < 0) {
6304 2
            $length = (int) self::strlen($str) + $end - $start;
6305
        } else {
6306 6
            $length = $end - $start;
6307
        }
6308
6309 14
        return self::substr($str, $start, $length, $encoding);
6310
    }
6311
6312
    /**
6313
     * Convert a string to e.g.: "snake_case"
6314
     *
6315
     * @param string $str
6316
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6317
     *
6318
     * @return string string in snake_case
6319
     */
6320 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6321
    {
6322 20
        $str = self::normalize_whitespace($str);
6323 20
        $str = \str_replace('-', '_', $str);
6324
6325 20
        $str = (string) \preg_replace_callback(
6326 20
            '/([\d|A-Z])/u',
6327
            static function ($matches) use ($encoding) {
6328 8
                $match = $matches[1];
6329 8
                $matchInt = (int) $match;
6330
6331 8
                if ((string) $matchInt === $match) {
6332 4
                    return '_' . $match . '_';
6333
                }
6334
6335 4
                return '_' . self::strtolower($match, $encoding);
6336 20
            },
6337 20
            $str
6338
        );
6339
6340 20
        $str = (string) \preg_replace(
6341
            [
6342 20
                '/\s+/',        // convert spaces to "_"
6343
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6344
                '/_+/',         // remove double "_"
6345
            ],
6346
            [
6347 20
                '_',
6348
                '',
6349
                '_',
6350
            ],
6351 20
            $str
6352
        );
6353
6354 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6355
6356 20
        return self::trim($str); // trim leading & trailing whitespace
6357
    }
6358
6359
    /**
6360
     * Sort all characters according to code points.
6361
     *
6362
     * @param string $str    <p>A UTF-8 string.</p>
6363
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6364
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6365
     *
6366
     * @return string string of sorted characters
6367
     */
6368 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6369
    {
6370 2
        $array = self::codepoints($str);
6371
6372 2
        if ($unique) {
6373 2
            $array = \array_flip(\array_flip($array));
6374
        }
6375
6376 2
        if ($desc) {
6377 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6377
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6378
        } else {
6379 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6379
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6380
        }
6381
6382 2
        return self::string($array);
6383
    }
6384
6385
    /**
6386
     * alias for "UTF8::split()"
6387
     *
6388
     * @see UTF8::split()
6389
     *
6390
     * @param string|string[] $str
6391
     * @param int             $len
6392
     *
6393
     * @return string[]
6394
     */
6395 25
    public static function str_split($str, int $len = 1): array
6396
    {
6397 25
        return self::split($str, $len);
6398
    }
6399
6400
    /**
6401
     * Splits the string with the provided regular expression, returning an
6402
     * array of Stringy objects. An optional integer $limit will truncate the
6403
     * results.
6404
     *
6405
     * @param string $str
6406
     * @param string $pattern <p>The regex with which to split the string.</p>
6407
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6408
     *
6409
     * @return string[] an array of strings
6410
     */
6411 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6412
    {
6413 16
        if ($limit === 0) {
6414 2
            return [];
6415
        }
6416
6417
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6418
        // and current versions of HHVM (3.8 and below)
6419 14
        if ($pattern === '') {
6420 1
            return [$str];
6421
        }
6422
6423
        // this->split returns the remaining unsplit string in the last index when
6424
        // supplying a limit
6425 13
        if ($limit > 0) {
6426 8
            ++$limit;
6427
        } else {
6428 5
            $limit = -1;
6429
        }
6430
6431 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6432
6433 13
        if ($array === false) {
6434
            return [];
6435
        }
6436
6437 13
        if ($limit > 0 && \count($array) === $limit) {
6438 4
            \array_pop($array);
6439
        }
6440
6441 13
        return $array;
6442
    }
6443
6444
    /**
6445
     * Check if the string starts with the given substring.
6446
     *
6447
     * @param string $haystack <p>The string to search in.</p>
6448
     * @param string $needle   <p>The substring to search for.</p>
6449
     *
6450
     * @return bool
6451
     */
6452 41
    public static function str_starts_with(string $haystack, string $needle): bool
6453
    {
6454 41
        if ($haystack === '' || $needle === '') {
6455 4
            return false;
6456
        }
6457
6458 39
        return \strpos($haystack, $needle) === 0;
6459
    }
6460
6461
    /**
6462
     * Returns true if the string begins with any of $substrings, false otherwise.
6463
     *
6464
     * - case-sensitive
6465
     *
6466
     * @param string $str        <p>The input string.</p>
6467
     * @param array  $substrings <p>Substrings to look for.</p>
6468
     *
6469
     * @return bool whether or not $str starts with $substring
6470
     */
6471 8
    public static function str_starts_with_any(string $str, array $substrings): bool
6472
    {
6473 8
        if ($str === '') {
6474
            return false;
6475
        }
6476
6477 8
        if (empty($substrings)) {
6478
            return false;
6479
        }
6480
6481 8
        foreach ($substrings as $substring) {
6482 8
            if (self::str_starts_with($str, $substring)) {
6483 8
                return true;
6484
            }
6485
        }
6486
6487 6
        return false;
6488
    }
6489
6490
    /**
6491
     * Gets the substring after the first occurrence of a separator.
6492
     *
6493
     * @param string $str       <p>The input string.</p>
6494
     * @param string $separator <p>The string separator.</p>
6495
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6496
     *
6497
     * @return string
6498
     */
6499 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6500
    {
6501
        if (
6502 1
            $separator === ''
6503
            ||
6504 1
            $str === ''
6505
        ) {
6506 1
            return '';
6507
        }
6508
6509 1
        $offset = self::str_index_first($str, $separator);
6510 1
        if ($offset === false) {
6511 1
            return '';
6512
        }
6513
6514 1
        return (string) self::substr(
6515 1
            $str,
6516 1
            $offset + self::strlen($separator, $encoding),
6517 1
            null,
6518 1
            $encoding
6519
        );
6520
    }
6521
6522
    /**
6523
     * Gets the substring after the last occurrence of a separator.
6524
     *
6525
     * @param string $str       <p>The input string.</p>
6526
     * @param string $separator <p>The string separator.</p>
6527
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6528
     *
6529
     * @return string
6530
     */
6531 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6532
    {
6533
        if (
6534 1
            $separator === ''
6535
            ||
6536 1
            $str === ''
6537
        ) {
6538 1
            return '';
6539
        }
6540
6541 1
        $offset = self::str_index_last($str, $separator);
6542 1
        if ($offset === false) {
6543 1
            return '';
6544
        }
6545
6546 1
        return (string) self::substr(
6547 1
            $str,
6548 1
            $offset + self::strlen($separator, $encoding),
6549 1
            null,
6550 1
            $encoding
6551
        );
6552
    }
6553
6554
    /**
6555
     * Gets the substring before the first occurrence of a separator.
6556
     *
6557
     * @param string $str       <p>The input string.</p>
6558
     * @param string $separator <p>The string separator.</p>
6559
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6560
     *
6561
     * @return string
6562
     */
6563 1
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6564
    {
6565
        if (
6566 1
            $separator === ''
6567
            ||
6568 1
            $str === ''
6569
        ) {
6570 1
            return '';
6571
        }
6572
6573 1
        $offset = self::str_index_first($str, $separator);
6574 1
        if ($offset === false) {
6575 1
            return '';
6576
        }
6577
6578 1
        return (string) self::substr(
6579 1
            $str,
6580 1
            0,
6581 1
            $offset,
6582 1
            $encoding
6583
        );
6584
    }
6585
6586
    /**
6587
     * Gets the substring before the last occurrence of a separator.
6588
     *
6589
     * @param string $str       <p>The input string.</p>
6590
     * @param string $separator <p>The string separator.</p>
6591
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6592
     *
6593
     * @return string
6594
     */
6595 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6596
    {
6597
        if (
6598 1
            $separator === ''
6599
            ||
6600 1
            $str === ''
6601
        ) {
6602 1
            return '';
6603
        }
6604
6605 1
        $offset = self::str_index_last($str, $separator);
6606 1
        if ($offset === false) {
6607 1
            return '';
6608
        }
6609
6610 1
        return (string) self::substr(
6611 1
            $str,
6612 1
            0,
6613 1
            $offset,
6614 1
            $encoding
6615
        );
6616
    }
6617
6618
    /**
6619
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6620
     *
6621
     * @param string $str          <p>The input string.</p>
6622
     * @param string $needle       <p>The string to look for.</p>
6623
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6624
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6625
     *
6626
     * @return string
6627
     */
6628 2
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6629
    {
6630
        if (
6631 2
            $str === ''
6632
            ||
6633 2
            $needle === ''
6634
        ) {
6635 2
            return '';
6636
        }
6637
6638 2
        $part = self::strstr(
6639 2
            $str,
6640 2
            $needle,
6641 2
            $beforeNeedle,
6642 2
            $encoding
6643
        );
6644 2
        if ($part === false) {
6645 2
            return '';
6646
        }
6647
6648 2
        return $part;
6649
    }
6650
6651
    /**
6652
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6653
     *
6654
     * @param string $str          <p>The input string.</p>
6655
     * @param string $needle       <p>The string to look for.</p>
6656
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6657
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6658
     *
6659
     * @return string
6660
     */
6661 2
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6662
    {
6663
        if (
6664 2
            $str === ''
6665
            ||
6666 2
            $needle === ''
6667
        ) {
6668 2
            return '';
6669
        }
6670
6671 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6672 2
        if ($part === false) {
6673 2
            return '';
6674
        }
6675
6676 2
        return $part;
6677
    }
6678
6679
    /**
6680
     * Surrounds $str with the given substring.
6681
     *
6682
     * @param string $str
6683
     * @param string $substring <p>The substring to add to both sides.</P>
6684
     *
6685
     * @return string string with the substring both prepended and appended
6686
     */
6687 5
    public static function str_surround(string $str, string $substring): string
6688
    {
6689 5
        return \implode('', [$substring, $str, $substring]);
6690
    }
6691
6692
    /**
6693
     * Returns a trimmed string with the first letter of each word capitalized.
6694
     * Also accepts an array, $ignore, allowing you to list words not to be
6695
     * capitalized.
6696
     *
6697
     * @param string              $str
6698
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
6699
     *                                                   Default: null</p>
6700
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6701
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6702
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6703
     *                                                   tr</p>
6704
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6705
     *                                                   ß</p>
6706
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6707
     *
6708
     * @return string the titleized string
6709
     */
6710 10
    public static function str_titleize(
6711
        string $str,
6712
        array $ignore = null,
6713
        string $encoding = 'UTF-8',
6714
        bool $cleanUtf8 = false,
6715
        string $lang = null,
6716
        bool $tryToKeepStringLength = false,
6717
        bool $useTrimFirst = true
6718
    ): string {
6719 10
        if ($useTrimFirst === true) {
6720 5
            $str = self::trim($str);
6721
        }
6722
6723 10
        $str_array = self::str_to_words($str);
6724
6725 10
        foreach ($str_array as &$str_tmp) {
6726 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
6727 2
                continue;
6728
            }
6729
6730 10
            $str_tmp = self::str_upper_first(
6731 10
                self::strtolower(
6732 10
                    $str_tmp,
6733 10
                    $encoding,
6734 10
                    $cleanUtf8,
6735 10
                    $lang,
6736 10
                    $tryToKeepStringLength
6737
                ),
6738 10
                $encoding,
6739 10
                $cleanUtf8,
6740 10
                $lang,
6741 10
                $tryToKeepStringLength
6742
            );
6743
        }
6744
6745 10
        return \implode('', $str_array);
6746
    }
6747
6748
    /**
6749
     * Returns a trimmed string in proper title case.
6750
     *
6751
     * Also accepts an array, $ignore, allowing you to list words not to be
6752
     * capitalized.
6753
     *
6754
     * Adapted from John Gruber's script.
6755
     *
6756
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6757
     *
6758
     * @param string $str
6759
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
6760
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6761
     *
6762
     * @return string the titleized string
6763
     */
6764 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6765
    {
6766 35
        $smallWords = \array_merge(
6767
            [
6768 35
                '(?<!q&)a',
6769
                'an',
6770
                'and',
6771
                'as',
6772
                'at(?!&t)',
6773
                'but',
6774
                'by',
6775
                'en',
6776
                'for',
6777
                'if',
6778
                'in',
6779
                'of',
6780
                'on',
6781
                'or',
6782
                'the',
6783
                'to',
6784
                'v[.]?',
6785
                'via',
6786
                'vs[.]?',
6787
            ],
6788 35
            $ignore
6789
        );
6790
6791 35
        $smallWordsRx = \implode('|', $smallWords);
6792 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6793
6794 35
        $str = self::trim($str);
6795
6796 35
        if (self::has_lowercase($str) === false) {
6797 2
            $str = self::strtolower($str);
6798
        }
6799
6800
        // The main substitutions
6801 35
        $str = (string) \preg_replace_callback(
6802
            '~\b (_*) (?:                                                              # 1. Leading underscore and
6803
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6804 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6805
                        |
6806 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6807
                        |
6808 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6809
                        |
6810 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6811
                      ) (_*) \b                                                           # 6. With trailing underscore
6812
                    ~ux',
6813
            static function ($matches) use ($encoding) {
6814
                // Preserve leading underscore
6815 35
                $str = $matches[1];
6816 35
                if ($matches[2]) {
6817
                    // Preserve URLs, domains, emails and file paths
6818 5
                    $str .= $matches[2];
6819 35
                } elseif ($matches[3]) {
6820
                    // Lower-case small words
6821 25
                    $str .= self::strtolower($matches[3], $encoding);
6822 35
                } elseif ($matches[4]) {
6823
                    // Capitalize word w/o internal caps
6824 34
                    $str .= static::str_upper_first($matches[4], $encoding);
6825
                } else {
6826
                    // Preserve other kinds of word (iPhone)
6827 7
                    $str .= $matches[5];
6828
                }
6829
                // Preserve trailing underscore
6830 35
                $str .= $matches[6];
6831
6832 35
                return $str;
6833 35
            },
6834 35
            $str
6835
        );
6836
6837
        // Exceptions for small words: capitalize at start of title...
6838 35
        $str = (string) \preg_replace_callback(
6839
            '~(  \A [[:punct:]]*                # start of title...
6840
                      |  [:.;?!][ ]+               # or of subsentence...
6841
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6842 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6843
                     ~uxi',
6844
            static function ($matches) use ($encoding) {
6845 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6846 35
            },
6847 35
            $str
6848
        );
6849
6850
        // ...and end of title
6851 35
        $str = (string) \preg_replace_callback(
6852 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
6853
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6854
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6855
                     ~uxi',
6856
            static function ($matches) use ($encoding) {
6857 3
                return static::str_upper_first($matches[1], $encoding);
6858 35
            },
6859 35
            $str
6860
        );
6861
6862
        // Exceptions for small words in hyphenated compound words
6863
        // e.g. "in-flight" -> In-Flight
6864 35
        $str = (string) \preg_replace_callback(
6865
            '~\b
6866
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6867 35
                        ( ' . $smallWordsRx . ' )
6868
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6869
                       ~uxi',
6870
            static function ($matches) use ($encoding) {
6871
                return static::str_upper_first($matches[1], $encoding);
6872 35
            },
6873 35
            $str
6874
        );
6875
6876
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6877 35
        $str = (string) \preg_replace_callback(
6878
            '~\b
6879
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6880
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6881 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6882
                      (?!	- )                   # Negative lookahead for another -
6883
                     ~uxi',
6884
            static function ($matches) use ($encoding) {
6885
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6886 35
            },
6887 35
            $str
6888
        );
6889
6890 35
        return $str;
6891
    }
6892
6893
    /**
6894
     * Get a binary representation of a specific string.
6895
     *
6896
     * @param string $str <p>The input string.</p>
6897
     *
6898
     * @return string
6899
     */
6900 2
    public static function str_to_binary(string $str): string
6901
    {
6902 2
        $value = \unpack('H*', $str);
6903
6904 2
        return \base_convert($value[1], 16, 2);
6905
    }
6906
6907
    /**
6908
     * @param string   $str
6909
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6910
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
6911
     *
6912
     * @return string[]
6913
     */
6914 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6915
    {
6916 17
        if ($str === '') {
6917 1
            return $removeEmptyValues === true ? [] : [''];
6918
        }
6919
6920 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
6921
6922 16
        if ($return === false) {
6923
            return $removeEmptyValues === true ? [] : [''];
6924
        }
6925
6926
        if (
6927 16
            $removeShortValues === null
6928
            &&
6929 16
            $removeEmptyValues === false
6930
        ) {
6931 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6932
        }
6933
6934
        return self::reduce_string_array(
6935
            $return,
6936
            $removeEmptyValues,
6937
            $removeShortValues
6938
        );
6939
    }
6940
6941
    /**
6942
     * Convert a string into an array of words.
6943
     *
6944
     * @param string   $str
6945
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6946
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6947
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
6948
     *
6949
     * @return string[]
6950
     */
6951 23
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6952
    {
6953 23
        if ($str === '') {
6954 4
            return $removeEmptyValues === true ? [] : [''];
6955
        }
6956
6957 23
        $charList = self::rxClass($charList, '\pL');
6958
6959 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
6960
6961 23
        if ($return === false) {
6962
            return $removeEmptyValues === true ? [] : [''];
6963
        }
6964
6965
        if (
6966 23
            $removeShortValues === null
6967
            &&
6968 23
            $removeEmptyValues === false
6969
        ) {
6970 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6971
        }
6972
6973 2
        $tmpReturn = self::reduce_string_array(
6974 2
            $return,
6975 2
            $removeEmptyValues,
6976 2
            $removeShortValues
6977
        );
6978
6979 2
        foreach ($tmpReturn as &$item) {
6980 2
            $item = (string) $item;
6981
        }
6982
6983 2
        return $tmpReturn;
6984
    }
6985
6986
    /**
6987
     * alias for "UTF8::to_ascii()"
6988
     *
6989
     * @see UTF8::to_ascii()
6990
     *
6991
     * @param string $str
6992
     * @param string $unknown
6993
     * @param bool   $strict
6994
     *
6995
     * @return string
6996
     */
6997 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6998
    {
6999 8
        return self::to_ascii($str, $unknown, $strict);
7000
    }
7001
7002
    /**
7003
     * Truncates the string to a given length. If $substring is provided, and
7004
     * truncating occurs, the string is further truncated so that the substring
7005
     * may be appended without exceeding the desired length.
7006
     *
7007
     * @param string $str
7008
     * @param int    $length    <p>Desired length of the truncated string.</p>
7009
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7010
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7011
     *
7012
     * @return string string after truncating
7013
     */
7014 22
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7015
    {
7016
        // init
7017 22
        $str = (string) $str;
7018
7019 22
        if ($str === '') {
7020
            return '';
7021
        }
7022
7023 22
        if ($length >= self::strlen($str, $encoding)) {
7024 4
            return $str;
7025
        }
7026
7027
        // Need to further trim the string so we can append the substring
7028 18
        $substringLength = self::strlen($substring, $encoding);
7029 18
        $length -= $substringLength;
7030
7031 18
        $truncated = self::substr($str, 0, $length, $encoding);
7032
7033 18
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7033
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7034
    }
7035
7036
    /**
7037
     * Truncates the string to a given length, while ensuring that it does not
7038
     * split words. If $substring is provided, and truncating occurs, the
7039
     * string is further truncated so that the substring may be appended without
7040
     * exceeding the desired length.
7041
     *
7042
     * @param string $str
7043
     * @param int    $length    <p>Desired length of the truncated string.</p>
7044
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7045
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7046
     *
7047
     * @return string string after truncating
7048
     */
7049 23
    public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7050
    {
7051 23
        if ($length >= self::strlen($str, $encoding)) {
7052 4
            return $str;
7053
        }
7054
7055
        // need to further trim the string so we can append the substring
7056 19
        $substringLength = self::strlen($substring, $encoding);
7057 19
        $length -= $substringLength;
7058
7059 19
        $truncated = self::substr($str, 0, $length, $encoding);
7060 19
        if ($truncated === false) {
7061
            return '';
7062
        }
7063
7064
        // if the last word was truncated
7065 19
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7066 19
        if ($strPosSpace !== $length) {
7067
            // find pos of the last occurrence of a space, get up to that
7068 12
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7069
7070 12
            if ($lastPos !== false || $strPosSpace !== false) {
7071 11
                $truncated = self::substr($truncated, 0, (int) $lastPos, $encoding);
7072
            }
7073
        }
7074
7075 19
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7075
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7076
    }
7077
7078
    /**
7079
     * Returns a lowercase and trimmed string separated by underscores.
7080
     * Underscores are inserted before uppercase characters (with the exception
7081
     * of the first character of the string), and in place of spaces as well as
7082
     * dashes.
7083
     *
7084
     * @param string $str
7085
     *
7086
     * @return string the underscored string
7087
     */
7088 16
    public static function str_underscored(string $str): string
7089
    {
7090 16
        return self::str_delimit($str, '_');
7091
    }
7092
7093
    /**
7094
     * Returns an UpperCamelCase version of the supplied string. It trims
7095
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7096
     * and underscores, and removes spaces, dashes, underscores.
7097
     *
7098
     * @param string      $str                   <p>The input string.</p>
7099
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7100
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7101
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7102
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7103
     *
7104
     * @return string string in UpperCamelCase
7105
     */
7106 13
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7107
    {
7108 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7109
    }
7110
7111
    /**
7112
     * alias for "UTF8::ucfirst()"
7113
     *
7114
     * @see UTF8::ucfirst()
7115
     *
7116
     * @param string      $str
7117
     * @param string      $encoding
7118
     * @param bool        $cleanUtf8
7119
     * @param string|null $lang
7120
     * @param bool        $tryToKeepStringLength
7121
     *
7122
     * @return string
7123
     */
7124 63
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7125
    {
7126 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7127
    }
7128
7129
    /**
7130
     * Counts number of words in the UTF-8 string.
7131
     *
7132
     * @param string $str      <p>The input string.</p>
7133
     * @param int    $format   [optional] <p>
7134
     *                         <strong>0</strong> => return a number of words (default)<br>
7135
     *                         <strong>1</strong> => return an array of words<br>
7136
     *                         <strong>2</strong> => return an array of words with word-offset as key
7137
     *                         </p>
7138
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7139
     *
7140
     * @return int|string[] The number of words in the string
7141
     */
7142 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7143
    {
7144 2
        $strParts = self::str_to_words($str, $charlist);
7145
7146 2
        $len = \count($strParts);
7147
7148 2
        if ($format === 1) {
7149 2
            $numberOfWords = [];
7150 2
            for ($i = 1; $i < $len; $i += 2) {
7151 2
                $numberOfWords[] = $strParts[$i];
7152
            }
7153 2
        } elseif ($format === 2) {
7154 2
            $numberOfWords = [];
7155 2
            $offset = self::strlen($strParts[0]);
7156 2
            for ($i = 1; $i < $len; $i += 2) {
7157 2
                $numberOfWords[$offset] = $strParts[$i];
7158 2
                $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7159
            }
7160
        } else {
7161 2
            $numberOfWords = (int) (($len - 1) / 2);
7162
        }
7163
7164 2
        return $numberOfWords;
7165
    }
7166
7167
    /**
7168
     * Case-insensitive string comparison.
7169
     *
7170
     * INFO: Case-insensitive version of UTF8::strcmp()
7171
     *
7172
     * @param string $str1     <p>The first string.</p>
7173
     * @param string $str2     <p>The second string.</p>
7174
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7175
     *
7176
     * @return int
7177
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7178
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7179
     *             <strong>0</strong> if they are equal
7180
     */
7181 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7182
    {
7183 23
        return self::strcmp(
7184 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7185 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7186
        );
7187
    }
7188
7189
    /**
7190
     * alias for "UTF8::strstr()"
7191
     *
7192
     * @see UTF8::strstr()
7193
     *
7194
     * @param string $haystack
7195
     * @param string $needle
7196
     * @param bool   $before_needle
7197
     * @param string $encoding
7198
     * @param bool   $cleanUtf8
7199
     *
7200
     * @return false|string
7201
     */
7202 2
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7203
    {
7204 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7205
    }
7206
7207
    /**
7208
     * Case-sensitive string comparison.
7209
     *
7210
     * @param string $str1 <p>The first string.</p>
7211
     * @param string $str2 <p>The second string.</p>
7212
     *
7213
     * @return int
7214
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7215
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7216
     *             <strong>0</strong> if they are equal
7217
     */
7218 29
    public static function strcmp(string $str1, string $str2): int
7219
    {
7220
        /** @noinspection PhpUndefinedClassInspection */
7221 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7222 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7223 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7224
        );
7225
    }
7226
7227
    /**
7228
     * Find length of initial segment not matching mask.
7229
     *
7230
     * @param string $str
7231
     * @param string $charList
7232
     * @param int    $offset
7233
     * @param int    $length
7234
     *
7235
     * @return int
7236
     */
7237 12
    public static function strcspn(string $str, string $charList, int $offset = null, int $length = null): int
7238
    {
7239 12
        if ($charList === '') {
7240 2
            return (int) self::strlen($str);
7241
        }
7242
7243 11
        if ($offset !== null || $length !== null) {
7244
            /** @noinspection UnnecessaryCastingInspection */
7245 3
            $strTmp = self::substr($str, (int) $offset, $length);
7246 3
            if ($strTmp === false) {
7247
                return 0;
7248
            }
7249 3
            $str = $strTmp;
7250
        }
7251
7252 11
        if ($str === '') {
7253 2
            return 0;
7254
        }
7255
7256 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7256
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7257 9
            $return = self::strlen($length[1]);
7258 9
            if ($return === false) {
7259
                return 0;
7260
            }
7261
7262 9
            return $return;
7263
        }
7264
7265 2
        return (int) self::strlen($str);
7266
    }
7267
7268
    /**
7269
     * alias for "UTF8::stristr()"
7270
     *
7271
     * @see UTF8::stristr()
7272
     *
7273
     * @param string $haystack
7274
     * @param string $needle
7275
     * @param bool   $before_needle
7276
     * @param string $encoding
7277
     * @param bool   $cleanUtf8
7278
     *
7279
     * @return false|string
7280
     */
7281 1
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7282
    {
7283 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7284
    }
7285
7286
    /**
7287
     * Create a UTF-8 string from code points.
7288
     *
7289
     * INFO: opposite to UTF8::codepoints()
7290
     *
7291
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7292
     *
7293
     * @return string UTF-8 encoded string
7294
     */
7295 4
    public static function string(array $array): string
7296
    {
7297 4
        return \implode(
7298 4
            '',
7299 4
            \array_map(
7300
                [
7301 4
                    self::class,
7302
                    'chr',
7303
                ],
7304 4
                $array
7305
            )
7306
        );
7307
    }
7308
7309
    /**
7310
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7311
     *
7312
     * @param string $str <p>The input string.</p>
7313
     *
7314
     * @return bool
7315
     *              <strong>true</strong> if the string has BOM at the start,<br>
7316
     *              <strong>false</strong> otherwise
7317
     */
7318 6
    public static function string_has_bom(string $str): bool
7319
    {
7320 6
        foreach (self::$BOM as $bomString => $bomByteLength) {
7321 6
            if (\strpos($str, $bomString) === 0) {
7322 6
                return true;
7323
            }
7324
        }
7325
7326 6
        return false;
7327
    }
7328
7329
    /**
7330
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7331
     *
7332
     * @see http://php.net/manual/en/function.strip-tags.php
7333
     *
7334
     * @param string $str            <p>
7335
     *                               The input string.
7336
     *                               </p>
7337
     * @param string $allowable_tags [optional] <p>
7338
     *                               You can use the optional second parameter to specify tags which should
7339
     *                               not be stripped.
7340
     *                               </p>
7341
     *                               <p>
7342
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
7343
     *                               can not be changed with allowable_tags.
7344
     *                               </p>
7345
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7346
     *
7347
     * @return string the stripped string
7348
     */
7349 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7350
    {
7351 4
        if ($str === '') {
7352 1
            return '';
7353
        }
7354
7355 4
        if ($cleanUtf8 === true) {
7356 2
            $str = self::clean($str);
7357
        }
7358
7359
        /** @noinspection UnnecessaryCastingInspection */
7360 4
        return \strip_tags($str, (string) $allowable_tags);
7361
    }
7362
7363
    /**
7364
     * Strip all whitespace characters. This includes tabs and newline
7365
     * characters, as well as multibyte whitespace such as the thin space
7366
     * and ideographic space.
7367
     *
7368
     * @param string $str
7369
     *
7370
     * @return string
7371
     */
7372 36
    public static function strip_whitespace(string $str): string
7373
    {
7374 36
        if ($str === '') {
7375 3
            return '';
7376
        }
7377
7378 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7379
    }
7380
7381
    /**
7382
     * Finds position of first occurrence of a string within another, case insensitive.
7383
     *
7384
     * @see http://php.net/manual/en/function.mb-stripos.php
7385
     *
7386
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7387
     * @param string $needle    <p>The string to find in haystack.</p>
7388
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7389
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7390
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7391
     *
7392
     * @return false|int
7393
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7394
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7395
     */
7396 75
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7397
    {
7398 75
        if ($haystack === '' || $needle === '') {
7399 5
            return false;
7400
        }
7401
7402 74
        if ($cleanUtf8 === true) {
7403
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7404
            // if invalid characters are found in $haystack before $needle
7405 1
            $haystack = self::clean($haystack);
7406 1
            $needle = self::clean($needle);
7407
        }
7408
7409 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7410 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7411
        }
7412
7413 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7414
            self::checkForSupport();
7415
        }
7416
7417 74
        if (self::$SUPPORT['mbstring'] === true) {
7418 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7419 74
            if ($returnTmp !== false) {
7420 54
                return $returnTmp;
7421
            }
7422
        }
7423
7424
        if (
7425 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7426
            &&
7427 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7428
            &&
7429 31
            self::$SUPPORT['intl'] === true
7430
        ) {
7431 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7432 31
            if ($returnTmp !== false) {
7433
                return $returnTmp;
7434
            }
7435
        }
7436
7437
        //
7438
        // fallback for ascii only
7439
        //
7440
7441 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7442 15
            return \stripos($haystack, $needle, $offset);
7443
        }
7444
7445
        //
7446
        // fallback via vanilla php
7447
        //
7448
7449 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7450 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7451
7452 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7453
    }
7454
7455
    /**
7456
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7457
     *
7458
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
7459
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
7460
     * @param bool   $before_needle [optional] <p>
7461
     *                              If <b>TRUE</b>, it returns the part of the
7462
     *                              haystack before the first occurrence of the needle (excluding the needle).
7463
     *                              </p>
7464
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7465
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7466
     *
7467
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7468
     */
7469 12
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7470
    {
7471 12
        if ($haystack === '' || $needle === '') {
7472 3
            return false;
7473
        }
7474
7475 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7476 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7477
        }
7478
7479 9
        if ($cleanUtf8 === true) {
7480
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7481
            // if invalid characters are found in $haystack before $needle
7482 1
            $needle = self::clean($needle);
7483 1
            $haystack = self::clean($haystack);
7484
        }
7485
7486 9
        if (!$needle) {
7487
            return $haystack;
7488
        }
7489
7490 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7491
            self::checkForSupport();
7492
        }
7493
7494
        if (
7495 9
            $encoding !== 'UTF-8'
7496
            &&
7497 9
            self::$SUPPORT['mbstring'] === false
7498
        ) {
7499
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7500
        }
7501
7502 9
        if (self::$SUPPORT['mbstring'] === true) {
7503 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7504
        }
7505
7506
        if (
7507
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7508
            &&
7509
            self::$SUPPORT['intl'] === true
7510
        ) {
7511
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7512
            if ($returnTmp !== false) {
7513
                return $returnTmp;
7514
            }
7515
        }
7516
7517
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7518
            return \stristr($haystack, $needle, $before_needle);
7519
        }
7520
7521
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7522
7523
        if (!isset($match[1])) {
7524
            return false;
7525
        }
7526
7527
        if ($before_needle) {
7528
            return $match[1];
7529
        }
7530
7531
        return self::substr($haystack, (int) self::strlen($match[1]));
7532
    }
7533
7534
    /**
7535
     * Get the string length, not the byte-length!
7536
     *
7537
     * @see     http://php.net/manual/en/function.mb-strlen.php
7538
     *
7539
     * @param string $str       <p>The string being checked for length.</p>
7540
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7541
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7542
     *
7543
     * @return false|int
7544
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
7545
     *                   $encoding.
7546
     *                   (One multi-byte character counted as +1).
7547
     *                   <br>
7548
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
7549
     *                   chars.
7550
     */
7551 261
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7552
    {
7553 261
        if ($str === '') {
7554 37
            return 0;
7555
        }
7556
7557 259
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7558 83
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7559
        }
7560
7561
        //
7562
        // fallback for binary || ascii only
7563
        //
7564
7565
        if (
7566 259
            $encoding === 'CP850'
7567
            ||
7568 259
            $encoding === 'ASCII'
7569
        ) {
7570 2
            return self::strlen_in_byte($str);
7571
        }
7572
7573 259
        if ($cleanUtf8 === true) {
7574
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7575
            // if invalid characters are found in $str
7576 4
            $str = self::clean($str);
7577
        }
7578
7579 259
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7580
            self::checkForSupport();
7581
        }
7582
7583
        if (
7584 259
            $encoding !== 'UTF-8'
7585
            &&
7586 259
            self::$SUPPORT['mbstring'] === false
7587
            &&
7588 259
            self::$SUPPORT['iconv'] === false
7589
        ) {
7590 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7591
        }
7592
7593
        //
7594
        // fallback via mbstring
7595
        //
7596
7597 259
        if (self::$SUPPORT['mbstring'] === true) {
7598 253
            $returnTmp = \mb_strlen($str, $encoding);
7599 253
            if ($returnTmp !== false) {
7600 253
                return $returnTmp;
7601
            }
7602
        }
7603
7604
        //
7605
        // fallback via iconv
7606
        //
7607
7608 8
        if (self::$SUPPORT['iconv'] === true) {
7609
            $returnTmp = \iconv_strlen($str, $encoding);
7610
            if ($returnTmp !== false) {
7611
                return $returnTmp;
7612
            }
7613
        }
7614
7615
        //
7616
        // fallback via intl
7617
        //
7618
7619
        if (
7620 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7621
            &&
7622 8
            self::$SUPPORT['intl'] === true
7623
        ) {
7624
            $returnTmp = \grapheme_strlen($str);
7625
            if ($returnTmp !== null) {
7626
                return $returnTmp;
7627
            }
7628
        }
7629
7630
        //
7631
        // fallback for ascii only
7632
        //
7633
7634 8
        if (self::is_ascii($str)) {
7635 4
            return \strlen($str);
7636
        }
7637
7638
        //
7639
        // fallback via vanilla php
7640
        //
7641
7642 8
        \preg_match_all('/./us', $str, $parts);
7643
7644 8
        $returnTmp = \count($parts[0]);
7645 8
        if ($returnTmp === 0 && isset($str[0])) {
7646
            return false;
7647
        }
7648
7649 8
        return $returnTmp;
7650
    }
7651
7652
    /**
7653
     * Get string length in byte.
7654
     *
7655
     * @param string $str
7656
     *
7657
     * @return int
7658
     */
7659 192
    public static function strlen_in_byte(string $str): int
7660
    {
7661 192
        if ($str === '') {
7662
            return 0;
7663
        }
7664
7665 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7666
            self::checkForSupport();
7667
        }
7668
7669 192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7670
            // "mb_" is available if overload is used, so use it ...
7671
            return \mb_strlen($str, 'CP850'); // 8-BIT
7672
        }
7673
7674 192
        return \strlen($str);
7675
    }
7676
7677
    /**
7678
     * Case insensitive string comparisons using a "natural order" algorithm.
7679
     *
7680
     * INFO: natural order version of UTF8::strcasecmp()
7681
     *
7682
     * @param string $str1     <p>The first string.</p>
7683
     * @param string $str2     <p>The second string.</p>
7684
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7685
     *
7686
     * @return int
7687
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7688
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7689
     *             <strong>0</strong> if they are equal
7690
     */
7691 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7692
    {
7693 2
        return self::strnatcmp(
7694 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7695 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
7696
        );
7697
    }
7698
7699
    /**
7700
     * String comparisons using a "natural order" algorithm
7701
     *
7702
     * INFO: natural order version of UTF8::strcmp()
7703
     *
7704
     * @see  http://php.net/manual/en/function.strnatcmp.php
7705
     *
7706
     * @param string $str1 <p>The first string.</p>
7707
     * @param string $str2 <p>The second string.</p>
7708
     *
7709
     * @return int
7710
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7711
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7712
     *             <strong>0</strong> if they are equal
7713
     */
7714 4
    public static function strnatcmp(string $str1, string $str2): int
7715
    {
7716 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
7717
    }
7718
7719
    /**
7720
     * Case-insensitive string comparison of the first n characters.
7721
     *
7722
     * @see  http://php.net/manual/en/function.strncasecmp.php
7723
     *
7724
     * @param string $str1     <p>The first string.</p>
7725
     * @param string $str2     <p>The second string.</p>
7726
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7727
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7728
     *
7729
     * @return int
7730
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7731
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7732
     *             <strong>0</strong> if they are equal
7733
     */
7734 2
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7735
    {
7736 2
        return self::strncmp(
7737 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7738 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
7739 2
            $len
7740
        );
7741
    }
7742
7743
    /**
7744
     * String comparison of the first n characters.
7745
     *
7746
     * @see  http://php.net/manual/en/function.strncmp.php
7747
     *
7748
     * @param string $str1 <p>The first string.</p>
7749
     * @param string $str2 <p>The second string.</p>
7750
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
7751
     *
7752
     * @return int
7753
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7754
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7755
     *             <strong>0</strong> if they are equal
7756
     */
7757 4
    public static function strncmp(string $str1, string $str2, int $len): int
7758
    {
7759 4
        $str1 = (string) self::substr($str1, 0, $len);
7760 4
        $str2 = (string) self::substr($str2, 0, $len);
7761
7762 4
        return self::strcmp($str1, $str2);
7763
    }
7764
7765
    /**
7766
     * Search a string for any of a set of characters.
7767
     *
7768
     * @see  http://php.net/manual/en/function.strpbrk.php
7769
     *
7770
     * @param string $haystack  <p>The string where char_list is looked for.</p>
7771
     * @param string $char_list <p>This parameter is case sensitive.</p>
7772
     *
7773
     * @return false|string string starting from the character found, or false if it is not found
7774
     */
7775 2
    public static function strpbrk(string $haystack, string $char_list)
7776
    {
7777 2
        if ($haystack === '' || $char_list === '') {
7778 2
            return false;
7779
        }
7780
7781 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7782 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
7783
        }
7784
7785 2
        return false;
7786
    }
7787
7788
    /**
7789
     * Find position of first occurrence of string in a string.
7790
     *
7791
     * @see http://php.net/manual/en/function.mb-strpos.php
7792
     *
7793
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7794
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7795
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7796
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7797
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7798
     *
7799
     * @return false|int
7800
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7801
     *                   string.<br> If needle is not found it returns false.
7802
     */
7803 142
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7804
    {
7805 142
        if ($haystack === '') {
7806 4
            return false;
7807
        }
7808
7809
        // iconv and mbstring do not support integer $needle
7810 141
        if ((int) $needle === $needle && $needle >= 0) {
7811
            $needle = (string) self::chr($needle);
7812
        }
7813 141
        $needle = (string) $needle;
7814
7815 141
        if ($needle === '') {
7816 2
            return false;
7817
        }
7818
7819 141
        if ($cleanUtf8 === true) {
7820
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7821
            // if invalid characters are found in $haystack before $needle
7822 3
            $needle = self::clean($needle);
7823 3
            $haystack = self::clean($haystack);
7824
        }
7825
7826 141
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7827 55
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7828
        }
7829
7830 141
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7831
            self::checkForSupport();
7832
        }
7833
7834
        //
7835
        // fallback for binary || ascii only
7836
        //
7837
7838
        if (
7839 141
            $encoding === 'CP850'
7840
            ||
7841 141
            $encoding === 'ASCII'
7842
        ) {
7843 2
            return self::strpos_in_byte($haystack, $needle, $offset);
7844
        }
7845
7846
        if (
7847 141
            $encoding !== 'UTF-8'
7848
            &&
7849 141
            self::$SUPPORT['iconv'] === false
7850
            &&
7851 141
            self::$SUPPORT['mbstring'] === false
7852
        ) {
7853 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7854
        }
7855
7856
        //
7857
        // fallback via mbstring
7858
        //
7859
7860 141
        if (self::$SUPPORT['mbstring'] === true) {
7861 141
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7862 141
            if ($returnTmp !== false) {
7863 86
                return $returnTmp;
7864
            }
7865
        }
7866
7867
        //
7868
        // fallback via intl
7869
        //
7870
7871
        if (
7872 69
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7873
            &&
7874 69
            $offset >= 0 // grapheme_strpos() can't handle negative offset
7875
            &&
7876 69
            self::$SUPPORT['intl'] === true
7877
        ) {
7878 69
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7879 69
            if ($returnTmp !== false) {
7880
                return $returnTmp;
7881
            }
7882
        }
7883
7884
        //
7885
        // fallback via iconv
7886
        //
7887
7888
        if (
7889 69
            $offset >= 0 // iconv_strpos() can't handle negative offset
7890
            &&
7891 69
            self::$SUPPORT['iconv'] === true
7892
        ) {
7893
            // ignore invalid negative offset to keep compatibility
7894
            // with php < 5.5.35, < 5.6.21, < 7.0.6
7895 69
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7896 69
            if ($returnTmp !== false) {
7897
                return $returnTmp;
7898
            }
7899
        }
7900
7901
        //
7902
        // fallback for ascii only
7903
        //
7904
7905 69
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7906 35
            return \strpos($haystack, $needle, $offset);
7907
        }
7908
7909
        //
7910
        // fallback via vanilla php
7911
        //
7912
7913 39
        if ($haystackIsAscii) {
7914
            $haystackTmp = \substr($haystack, $offset);
7915
        } else {
7916 39
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7917
        }
7918 39
        if ($haystackTmp === false) {
7919
            $haystackTmp = '';
7920
        }
7921 39
        $haystack = (string) $haystackTmp;
7922
7923 39
        if ($offset < 0) {
7924 2
            $offset = 0;
7925
        }
7926
7927 39
        $pos = \strpos($haystack, $needle);
7928 39
        if ($pos === false) {
7929 39
            return false;
7930
        }
7931
7932 4
        if ($pos) {
7933 4
            return $offset + (self::strlen(\substr($haystack, 0, $pos), $encoding));
7934
        }
7935
7936 2
        return $offset + 0;
7937
    }
7938
7939
    /**
7940
     * Find position of first occurrence of string in a string.
7941
     *
7942
     * @param string $haystack <p>
7943
     *                         The string being checked.
7944
     *                         </p>
7945
     * @param string $needle   <p>
7946
     *                         The position counted from the beginning of haystack.
7947
     *                         </p>
7948
     * @param int    $offset   [optional] <p>
7949
     *                         The search offset. If it is not specified, 0 is used.
7950
     *                         </p>
7951
     *
7952
     * @return false|int The numeric position of the first occurrence of needle in the
7953
     *                   haystack string. If needle is not found, it returns false.
7954
     */
7955 81
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
7956
    {
7957 81
        if ($haystack === '' || $needle === '') {
7958
            return false;
7959
        }
7960
7961 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7962
            self::checkForSupport();
7963
        }
7964
7965 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7966
            // "mb_" is available if overload is used, so use it ...
7967
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
7968
        }
7969
7970 81
        return \strpos($haystack, $needle, $offset);
7971
    }
7972
7973
    /**
7974
     * Finds the last occurrence of a character in a string within another.
7975
     *
7976
     * @see http://php.net/manual/en/function.mb-strrchr.php
7977
     *
7978
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7979
     * @param string $needle        <p>The string to find in haystack</p>
7980
     * @param bool   $before_needle [optional] <p>
7981
     *                              Determines which portion of haystack
7982
     *                              this function returns.
7983
     *                              If set to true, it returns all of haystack
7984
     *                              from the beginning to the last occurrence of needle.
7985
     *                              If set to false, it returns all of haystack
7986
     *                              from the last occurrence of needle to the end,
7987
     *                              </p>
7988
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7989
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7990
     *
7991
     * @return false|string the portion of haystack or false if needle is not found
7992
     */
7993 4
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7994
    {
7995 4
        if ($haystack === '' || $needle === '') {
7996 2
            return false;
7997
        }
7998
7999 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8000 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8001
        }
8002
8003 4
        if ($cleanUtf8 === true) {
8004
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8005
            // if invalid characters are found in $haystack before $needle
8006 2
            $needle = self::clean($needle);
8007 2
            $haystack = self::clean($haystack);
8008
        }
8009
8010 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8011
            self::checkForSupport();
8012
        }
8013
8014
        if (
8015 4
            $encoding !== 'UTF-8'
8016
            &&
8017 4
            self::$SUPPORT['mbstring'] === false
8018
        ) {
8019
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8020
        }
8021
8022 4
        if (self::$SUPPORT['mbstring'] === true) {
8023 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8024
        }
8025
8026
        //
8027
        // fallback for binary || ascii only
8028
        //
8029
8030
        if (
8031
            $before_needle === false
8032
            &&
8033
            (
8034
                $encoding === 'CP850'
8035
                ||
8036
                $encoding === 'ASCII'
8037
            )
8038
        ) {
8039
            return \strrchr($haystack, $needle);
8040
        }
8041
8042
        //
8043
        // fallback via iconv
8044
        //
8045
8046
        if (self::$SUPPORT['iconv'] === true) {
8047
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8048
            if ($needleTmp === false) {
8049
                return false;
8050
            }
8051
            $needle = (string) $needleTmp;
8052
8053
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8054
            if ($pos === false) {
8055
                return false;
8056
            }
8057
8058
            if ($before_needle) {
8059
                return self::substr($haystack, 0, $pos, $encoding);
8060
            }
8061
8062
            return self::substr($haystack, $pos, null, $encoding);
8063
        }
8064
8065
        //
8066
        // fallback via vanilla php
8067
        //
8068
8069
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8070
        if ($needleTmp === false) {
8071
            return false;
8072
        }
8073
        $needle = (string) $needleTmp;
8074
8075
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
8076
        if ($pos === false) {
8077
            return false;
8078
        }
8079
8080
        if ($before_needle) {
8081
            return self::substr($haystack, 0, $pos, $encoding);
8082
        }
8083
8084
        return self::substr($haystack, $pos, null, $encoding);
8085
    }
8086
8087
    /**
8088
     * Reverses characters order in the string.
8089
     *
8090
     * @param string $str <p>The input string.</p>
8091
     *
8092
     * @return string the string with characters in the reverse sequence
8093
     */
8094 10
    public static function strrev(string $str): string
8095
    {
8096 10
        if ($str === '') {
8097 4
            return '';
8098
        }
8099
8100 8
        $reversed = '';
8101 8
        $i = (int) self::strlen($str);
8102 8
        while ($i--) {
8103 8
            $reversed .= self::substr($str, $i, 1);
8104
        }
8105
8106 8
        return $reversed;
8107
    }
8108
8109
    /**
8110
     * Finds the last occurrence of a character in a string within another, case insensitive.
8111
     *
8112
     * @see http://php.net/manual/en/function.mb-strrichr.php
8113
     *
8114
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8115
     * @param string $needle        <p>The string to find in haystack.</p>
8116
     * @param bool   $before_needle [optional] <p>
8117
     *                              Determines which portion of haystack
8118
     *                              this function returns.
8119
     *                              If set to true, it returns all of haystack
8120
     *                              from the beginning to the last occurrence of needle.
8121
     *                              If set to false, it returns all of haystack
8122
     *                              from the last occurrence of needle to the end,
8123
     *                              </p>
8124
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8125
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8126
     *
8127
     * @return false|string the portion of haystack or<br>false if needle is not found
8128
     */
8129 3
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8130
    {
8131 3
        if ($haystack === '' || $needle === '') {
8132 2
            return false;
8133
        }
8134
8135 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8136 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8137
        }
8138
8139 3
        if ($cleanUtf8 === true) {
8140
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8141
            // if invalid characters are found in $haystack before $needle
8142 2
            $needle = self::clean($needle);
8143 2
            $haystack = self::clean($haystack);
8144
        }
8145
8146 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8147
            self::checkForSupport();
8148
        }
8149
8150
        //
8151
        // fallback via mbstring
8152
        //
8153
8154 3
        if (self::$SUPPORT['mbstring'] === true) {
8155 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8156
        }
8157
8158
        //
8159
        // fallback via vanilla php
8160
        //
8161
8162
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8163
        if ($needleTmp === false) {
8164
            return false;
8165
        }
8166
        $needle = (string) $needleTmp;
8167
8168
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8169
        if ($pos === false) {
8170
            return false;
8171
        }
8172
8173
        if ($before_needle) {
8174
            return self::substr($haystack, 0, $pos, $encoding);
8175
        }
8176
8177
        return self::substr($haystack, $pos, null, $encoding);
8178
    }
8179
8180
    /**
8181
     * Find position of last occurrence of a case-insensitive string.
8182
     *
8183
     * @param string     $haystack  <p>The string to look in.</p>
8184
     * @param int|string $needle    <p>The string to look for.</p>
8185
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8186
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8187
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8188
     *
8189
     * @return false|int
8190
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8191
     *                   string.<br>If needle is not found, it returns false.
8192
     */
8193 4
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8194
    {
8195 4
        if ($haystack === '') {
8196
            return false;
8197
        }
8198
8199
        // iconv and mbstring do not support integer $needle
8200 4
        if ((int) $needle === $needle && $needle >= 0) {
8201
            $needle = (string) self::chr($needle);
8202
        }
8203 4
        $needle = (string) $needle;
8204
8205 4
        if ($needle === '') {
8206
            return false;
8207
        }
8208
8209 4
        if ($cleanUtf8 === true) {
8210
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8211 2
            $needle = self::clean($needle);
8212 2
            $haystack = self::clean($haystack);
8213
        }
8214
8215 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8216 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8217
        }
8218
8219
        //
8220
        // fallback for binary || ascii only
8221
        //
8222
8223
        if (
8224 4
            $encoding === 'CP850'
8225
            ||
8226 4
            $encoding === 'ASCII'
8227
        ) {
8228
            return self::strripos_in_byte($haystack, $needle, $offset);
8229
        }
8230
8231 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8232
            self::checkForSupport();
8233
        }
8234
8235
        if (
8236 4
            $encoding !== 'UTF-8'
8237
            &&
8238 4
            self::$SUPPORT['mbstring'] === false
8239
        ) {
8240
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8241
        }
8242
8243
        //
8244
        // fallback via mbstrig
8245
        //
8246
8247 4
        if (self::$SUPPORT['mbstring'] === true) {
8248 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8249
        }
8250
8251
        //
8252
        // fallback via intl
8253
        //
8254
8255
        if (
8256
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8257
            &&
8258
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8259
            &&
8260
            self::$SUPPORT['intl'] === true
8261
        ) {
8262
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8263
            if ($returnTmp !== false) {
8264
                return $returnTmp;
8265
            }
8266
        }
8267
8268
        //
8269
        // fallback for ascii only
8270
        //
8271
8272
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8273
            return self::strripos_in_byte($haystack, $needle, $offset);
8274
        }
8275
8276
        //
8277
        // fallback via vanilla php
8278
        //
8279
8280
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8281
        $needle = self::strtocasefold($needle, true, false, $encoding);
8282
8283
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8284
    }
8285
8286
    /**
8287
     * Finds position of last occurrence of a string within another, case insensitive.
8288
     *
8289
     * @param string $haystack <p>
8290
     *                         The string from which to get the position of the last occurrence
8291
     *                         of needle.
8292
     *                         </p>
8293
     * @param string $needle   <p>
8294
     *                         The string to find in haystack.
8295
     *                         </p>
8296
     * @param int    $offset   [optional] <p>
8297
     *                         The position in haystack
8298
     *                         to start searching.
8299
     *                         </p>
8300
     *
8301
     * @return false|int return the numeric position of the last occurrence of needle in the
8302
     *                   haystack string, or false if needle is not found
8303
     */
8304
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8305
    {
8306
        if ($haystack === '' || $needle === '') {
8307
            return false;
8308
        }
8309
8310
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8311
            self::checkForSupport();
8312
        }
8313
8314
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8315
            // "mb_" is available if overload is used, so use it ...
8316
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8317
        }
8318
8319
        return \strripos($haystack, $needle, $offset);
8320
    }
8321
8322
    /**
8323
     * Find position of last occurrence of a string in a string.
8324
     *
8325
     * @see http://php.net/manual/en/function.mb-strrpos.php
8326
     *
8327
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8328
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8329
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8330
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8331
     *                              the end of the string.
8332
     *                              </p>
8333
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8334
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8335
     *
8336
     * @return false|int
8337
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8338
     *                   string.<br>If needle is not found, it returns false.
8339
     */
8340 38
    public static function strrpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8341
    {
8342 38
        if ($haystack === '') {
8343 3
            return false;
8344
        }
8345
8346
        // iconv and mbstring do not support integer $needle
8347 37
        if ((int) $needle === $needle && $needle >= 0) {
8348 2
            $needle = (string) self::chr($needle);
8349
        }
8350 37
        $needle = (string) $needle;
8351
8352 37
        if ($needle === '') {
8353 2
            return false;
8354
        }
8355
8356 37
        if ($cleanUtf8 === true) {
8357
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8358 4
            $needle = self::clean($needle);
8359 4
            $haystack = self::clean($haystack);
8360
        }
8361
8362 37
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8363 14
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8364
        }
8365
8366
        //
8367
        // fallback for binary || ascii only
8368
        //
8369
8370
        if (
8371 37
            $encoding === 'CP850'
8372
            ||
8373 37
            $encoding === 'ASCII'
8374
        ) {
8375 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
8376
        }
8377
8378 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8379
            self::checkForSupport();
8380
        }
8381
8382
        if (
8383 37
            $encoding !== 'UTF-8'
8384
            &&
8385 37
            self::$SUPPORT['mbstring'] === false
8386
        ) {
8387
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8388
        }
8389
8390
        //
8391
        // fallback via mbstring
8392
        //
8393
8394 37
        if (self::$SUPPORT['mbstring'] === true) {
8395 37
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8396
        }
8397
8398
        //
8399
        // fallback via intl
8400
        //
8401
8402
        if (
8403
            $offset !== null
8404
            &&
8405
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8406
            &&
8407
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8408
            &&
8409
            self::$SUPPORT['intl'] === true
8410
        ) {
8411
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8412
            if ($returnTmp !== false) {
8413
                return $returnTmp;
8414
            }
8415
        }
8416
8417
        //
8418
        // fallback for ascii only
8419
        //
8420
8421
        if (
8422
            $offset !== null
8423
            &&
8424
            self::is_ascii($haystack)
8425
            &&
8426
            self::is_ascii($needle)
8427
        ) {
8428
            return self::strrpos_in_byte($haystack, $needle, $offset);
8429
        }
8430
8431
        //
8432
        // fallback via vanilla php
8433
        //
8434
8435
        $haystackTmp = null;
8436
        if ($offset > 0) {
8437
            $haystackTmp = self::substr($haystack, $offset);
8438
        } elseif ($offset < 0) {
8439
            $haystackTmp = self::substr($haystack, 0, $offset);
8440
            $offset = 0;
8441
        }
8442
8443
        if ($haystackTmp !== null) {
8444
            if ($haystackTmp === false) {
8445
                $haystackTmp = '';
8446
            }
8447
            $haystack = (string) $haystackTmp;
8448
        }
8449
8450
        $pos = self::strrpos_in_byte($haystack, $needle);
8451
        if ($pos === false) {
8452
            return false;
8453
        }
8454
8455
        $strTmp = self::substr_in_byte($haystack, 0, $pos);
8456
        if ($strTmp === false) {
0 ignored issues
show
introduced by
The condition $strTmp === false is always false.
Loading history...
8457
            return false;
8458
        }
8459
8460
        return $offset + (int) self::strlen($strTmp);
8461
    }
8462
8463
    /**
8464
     * Find position of last occurrence of a string in a string.
8465
     *
8466
     * @param string $haystack <p>
8467
     *                         The string being checked, for the last occurrence
8468
     *                         of needle.
8469
     *                         </p>
8470
     * @param string $needle   <p>
8471
     *                         The string to find in haystack.
8472
     *                         </p>
8473
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8474
     *                         the string. Negative values will stop searching at an arbitrary point
8475
     *                         prior to the end of the string.
8476
     *
8477
     * @return false|int The numeric position of the last occurrence of needle in the
8478
     *                   haystack string. If needle is not found, it returns false.
8479
     */
8480 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8481
    {
8482 2
        if ($haystack === '' || $needle === '') {
8483
            return false;
8484
        }
8485
8486 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8487
            self::checkForSupport();
8488
        }
8489
8490 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8491
            // "mb_" is available if overload is used, so use it ...
8492
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8493
        }
8494
8495 2
        return \strrpos($haystack, $needle, $offset);
8496
    }
8497
8498
    /**
8499
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8500
     * mask.
8501
     *
8502
     * @param string $str    <p>The input string.</p>
8503
     * @param string $mask   <p>The mask of chars</p>
8504
     * @param int    $offset [optional]
8505
     * @param int    $length [optional]
8506
     *
8507
     * @return false|int
8508
     */
8509 10
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null)
8510
    {
8511 10
        if ($offset || $length !== null) {
8512 2
            $strTmp = self::substr($str, $offset, $length);
8513 2
            if ($strTmp === false) {
8514
                $strTmp = '';
8515
            }
8516 2
            $str = (string) $strTmp;
8517
        }
8518
8519 10
        if ($str === '' || $mask === '') {
8520 2
            return 0;
8521
        }
8522
8523 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? (int) self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8523
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? (int) self::strlen($str[0]) : 0;
Loading history...
8524
    }
8525
8526
    /**
8527
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8528
     *
8529
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8530
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8531
     * @param bool   $before_needle [optional] <p>
8532
     *                              If <b>TRUE</b>, strstr() returns the part of the
8533
     *                              haystack before the first occurrence of the needle (excluding the needle).
8534
     *                              </p>
8535
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8536
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8537
     *
8538
     * @return false|string
8539
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
8540
     */
8541 5
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8542
    {
8543 5
        if ($haystack === '' || $needle === '') {
8544 2
            return false;
8545
        }
8546
8547 5
        if ($cleanUtf8 === true) {
8548
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8549
            // if invalid characters are found in $haystack before $needle
8550
            $needle = self::clean($needle);
8551
            $haystack = self::clean($haystack);
8552
        }
8553
8554 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8555 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8556
        }
8557
8558
        //
8559
        // fallback for binary || ascii only
8560
        //
8561
8562
        if (
8563 5
            $encoding === 'CP850'
8564
            ||
8565 5
            $encoding === 'ASCII'
8566
        ) {
8567
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8568
        }
8569
8570 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8571
            self::checkForSupport();
8572
        }
8573
8574
        if (
8575 5
            $encoding !== 'UTF-8'
8576
            &&
8577 5
            self::$SUPPORT['mbstring'] === false
8578
        ) {
8579
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8580
        }
8581
8582
        //
8583
        // fallback via mbstring
8584
        //
8585
8586 5
        if (self::$SUPPORT['mbstring'] === true) {
8587 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8588
        }
8589
8590
        //
8591
        // fallback via intl
8592
        //
8593
8594
        if (
8595
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8596
            &&
8597
            self::$SUPPORT['intl'] === true
8598
        ) {
8599
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8600
            if ($returnTmp !== false) {
8601
                return $returnTmp;
8602
            }
8603
        }
8604
8605
        //
8606
        // fallback for ascii only
8607
        //
8608
8609
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8610
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8611
        }
8612
8613
        //
8614
        // fallback via vanilla php
8615
        //
8616
8617
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8618
8619
        if (!isset($match[1])) {
8620
            return false;
8621
        }
8622
8623
        if ($before_needle) {
8624
            return $match[1];
8625
        }
8626
8627
        return self::substr($haystack, (int) self::strlen($match[1]));
8628
    }
8629
8630
    /**
8631
     *  * Finds first occurrence of a string within another.
8632
     *
8633
     * @param string $haystack      <p>
8634
     *                              The string from which to get the first occurrence
8635
     *                              of needle.
8636
     *                              </p>
8637
     * @param string $needle        <p>
8638
     *                              The string to find in haystack.
8639
     *                              </p>
8640
     * @param bool   $before_needle [optional] <p>
8641
     *                              Determines which portion of haystack
8642
     *                              this function returns.
8643
     *                              If set to true, it returns all of haystack
8644
     *                              from the beginning to the first occurrence of needle.
8645
     *                              If set to false, it returns all of haystack
8646
     *                              from the first occurrence of needle to the end,
8647
     *                              </p>
8648
     *
8649
     * @return false|string the portion of haystack,
8650
     *                      or false if needle is not found
8651
     */
8652
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8653
    {
8654
        if ($haystack === '' || $needle === '') {
8655
            return false;
8656
        }
8657
8658
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8659
            self::checkForSupport();
8660
        }
8661
8662
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8663
            // "mb_" is available if overload is used, so use it ...
8664
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8665
        }
8666
8667
        return \strstr($haystack, $needle, $before_needle);
8668
    }
8669
8670
    /**
8671
     * Unicode transformation for case-less matching.
8672
     *
8673
     * @see http://unicode.org/reports/tr21/tr21-5.html
8674
     *
8675
     * @param string      $str       <p>The input string.</p>
8676
     * @param bool        $full      [optional] <p>
8677
     *                               <b>true</b>, replace full case folding chars (default)<br>
8678
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8679
     *                               </p>
8680
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8681
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8682
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8683
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8684
     *                               is for some languages better ...</p>
8685
     *
8686
     * @return string
8687
     */
8688 53
    public static function strtocasefold(
8689
        string $str,
8690
        bool $full = true,
8691
        bool $cleanUtf8 = false,
8692
        string $encoding = 'UTF-8',
8693
        string $lang = null,
8694
        $lower = true
8695
    ): string {
8696 53
        if ($str === '') {
8697 5
            return '';
8698
        }
8699
8700 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
8701
8702 52
        if ($lower === true) {
8703 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8704
        }
8705
8706 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8707
    }
8708
8709
    /**
8710
     * Make a string lowercase.
8711
     *
8712
     * @see http://php.net/manual/en/function.mb-strtolower.php
8713
     *
8714
     * @param string      $str                   <p>The string being lowercased.</p>
8715
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8716
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8717
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8718
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8719
     *
8720
     * @return string string with all alphabetic characters converted to lowercase
8721
     */
8722 156
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8723
    {
8724
        // init
8725 156
        $str = (string) $str;
8726
8727 156
        if ($str === '') {
8728 12
            return '';
8729
        }
8730
8731 154
        if ($cleanUtf8 === true) {
8732
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8733
            // if invalid characters are found in $haystack before $needle
8734 4
            $str = self::clean($str);
8735
        }
8736
8737 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8738 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8739
        }
8740
8741
        // hack for old php version or for the polyfill ...
8742 154
        if ($tryToKeepStringLength === true) {
8743
            $str = self::fixStrCaseHelper($str, true);
8744
        }
8745
8746 154
        if ($lang !== null) {
8747 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8748
                self::checkForSupport();
8749
            }
8750
8751 2
            if (self::$SUPPORT['intl'] === true) {
8752 2
                $langCode = $lang . '-Lower';
8753 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8754
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
8755
8756
                    $langCode = 'Any-Lower';
8757
                }
8758
8759
                /** @noinspection PhpComposerExtensionStubsInspection */
8760 2
                return \transliterator_transliterate($langCode, $str);
8761
            }
8762
8763
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
8764
        }
8765
8766
        // always fallback via symfony polyfill
8767 154
        return \mb_strtolower($str, $encoding);
8768
    }
8769
8770
    /**
8771
     * Make a string uppercase.
8772
     *
8773
     * @see http://php.net/manual/en/function.mb-strtoupper.php
8774
     *
8775
     * @param string      $str                   <p>The string being uppercased.</p>
8776
     * @param string      $encoding              [optional] <p>Set the charset.</p>
8777
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8778
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8779
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8780
     *
8781
     * @return string string with all alphabetic characters converted to uppercase
8782
     */
8783 163
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8784
    {
8785
        // init
8786 163
        $str = (string) $str;
8787
8788 163
        if ($str === '') {
8789 12
            return '';
8790
        }
8791
8792 161
        if ($cleanUtf8 === true) {
8793
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8794
            // if invalid characters are found in $haystack before $needle
8795 3
            $str = self::clean($str);
8796
        }
8797
8798 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8799 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8800
        }
8801
8802
        // hack for old php version or for the polyfill ...
8803 161
        if ($tryToKeepStringLength === true) {
8804 2
            $str = self::fixStrCaseHelper($str, false);
8805
        }
8806
8807 161
        if ($lang !== null) {
8808 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8809
                self::checkForSupport();
8810
            }
8811
8812 2
            if (self::$SUPPORT['intl'] === true) {
8813 2
                $langCode = $lang . '-Upper';
8814 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8815
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
8816
8817
                    $langCode = 'Any-Upper';
8818
                }
8819
8820
                /** @noinspection PhpComposerExtensionStubsInspection */
8821 2
                return \transliterator_transliterate($langCode, $str);
8822
            }
8823
8824
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
8825
        }
8826
8827
        // always fallback via symfony polyfill
8828 161
        return \mb_strtoupper($str, $encoding);
8829
    }
8830
8831
    /**
8832
     * Translate characters or replace sub-strings.
8833
     *
8834
     * @see  http://php.net/manual/en/function.strtr.php
8835
     *
8836
     * @param string          $str  <p>The string being translated.</p>
8837
     * @param string|string[] $from <p>The string replacing from.</p>
8838
     * @param string|string[] $to   <p>The string being translated to to.</p>
8839
     *
8840
     * @return string
8841
     *                This function returns a copy of str, translating all occurrences of each character in from to the
8842
     *                corresponding character in to
8843
     */
8844 2
    public static function strtr(string $str, $from, $to = \INF): string
8845
    {
8846 2
        if ($str === '') {
8847
            return '';
8848
        }
8849
8850 2
        if ($from === $to) {
8851
            return $str;
8852
        }
8853
8854 2
        if ($to !== \INF) {
8855 2
            $from = self::str_split($from);
8856 2
            $to = self::str_split($to);
8857 2
            $countFrom = \count($from);
8858 2
            $countTo = \count($to);
8859
8860 2
            if ($countFrom > $countTo) {
8861 2
                $from = \array_slice($from, 0, $countTo);
8862 2
            } elseif ($countFrom < $countTo) {
8863 2
                $to = \array_slice($to, 0, $countFrom);
8864
            }
8865
8866 2
            $from = \array_combine($from, $to);
8867 2
            if ($from === false) {
8868
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
8869
            }
8870
        }
8871
8872 2
        if (\is_string($from)) {
8873 2
            return \str_replace($from, '', $str);
8874
        }
8875
8876 2
        return \strtr($str, $from);
8877
    }
8878
8879
    /**
8880
     * Return the width of a string.
8881
     *
8882
     * @param string $str       <p>The input string.</p>
8883
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8884
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8885
     *
8886
     * @return int
8887
     */
8888 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8889
    {
8890 2
        if ($str === '') {
8891 2
            return 0;
8892
        }
8893
8894 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8895 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8896
        }
8897
8898 2
        if ($cleanUtf8 === true) {
8899
            // iconv and mbstring are not tolerant to invalid encoding
8900
            // further, their behaviour is inconsistent with that of PHP's substr
8901 2
            $str = self::clean($str);
8902
        }
8903
8904 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8905
            self::checkForSupport();
8906
        }
8907
8908
        //
8909
        // fallback via mbstring
8910
        //
8911
8912 2
        if (self::$SUPPORT['mbstring'] === true) {
8913 2
            return \mb_strwidth($str, $encoding);
8914
        }
8915
8916
        //
8917
        // fallback via vanilla php
8918
        //
8919
8920
        if ($encoding !== 'UTF-8') {
8921
            $str = self::encode('UTF-8', $str, false, $encoding);
8922
        }
8923
8924
        $wide = 0;
8925
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
8926
8927
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
8928
    }
8929
8930
    /**
8931
     * Get part of a string.
8932
     *
8933
     * @see http://php.net/manual/en/function.mb-substr.php
8934
     *
8935
     * @param string $str       <p>The string being checked.</p>
8936
     * @param int    $offset    <p>The first position used in str.</p>
8937
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8938
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8939
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8940
     *
8941
     * @return false|string
8942
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
8943
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8944
     *                      characters long, <b>FALSE</b> will be returned.
8945
     */
8946 402
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8947
    {
8948 402
        if ($str === '') {
8949 26
            return '';
8950
        }
8951
8952
        // Empty string
8953 397
        if ($length === 0) {
8954 20
            return '';
8955
        }
8956
8957 394
        if ($cleanUtf8 === true) {
8958
            // iconv and mbstring are not tolerant to invalid encoding
8959
            // further, their behaviour is inconsistent with that of PHP's substr
8960 2
            $str = self::clean($str);
8961
        }
8962
8963
        // Whole string
8964 394
        if (!$offset && $length === null) {
8965 40
            return $str;
8966
        }
8967
8968 365
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8969 161
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8970
        }
8971
8972 365
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8973
            self::checkForSupport();
8974
        }
8975
8976
        //
8977
        // fallback for binary || ascii only
8978
        //
8979
8980
        if (
8981 365
            $encoding === 'CP850'
8982
            ||
8983 365
            $encoding === 'ASCII'
8984
        ) {
8985 2
            return self::substr_in_byte($str, $offset, $length);
8986
        }
8987
8988
        //
8989
        // fallback via mbstring
8990
        //
8991
8992 363
        if (self::$SUPPORT['mbstring'] === true) {
8993 363
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
8994 363
            if ($return !== false) {
8995 363
                return $return;
8996
            }
8997
        }
8998
8999
        // otherwise we need the string-length and can't fake it via "2147483647"
9000 4
        $str_length = 0;
9001 4
        if ($offset || $length === null) {
9002 4
            $str_length = self::strlen($str, $encoding);
9003
        }
9004
9005
        // e.g.: invalid chars + mbstring not installed
9006 4
        if ($str_length === false) {
9007
            return false;
9008
        }
9009
9010
        // Empty string
9011 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9012
            return '';
9013
        }
9014
9015
        // Impossible
9016 4
        if ($offset && $offset > $str_length) {
9017
            // "false" is the php native return type here,
9018
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9019
            return '';
9020
        }
9021
9022 4
        if ($length === null) {
9023 4
            $length = (int) $str_length;
9024
        } else {
9025 2
            $length = (int) $length;
9026
        }
9027
9028
        if (
9029 4
            $encoding !== 'UTF-8'
9030
            &&
9031 4
            self::$SUPPORT['mbstring'] === false
9032
        ) {
9033 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9034
        }
9035
9036
        //
9037
        // fallback via intl
9038
        //
9039
9040
        if (
9041 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9042
            &&
9043 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
9044
            &&
9045 4
            self::$SUPPORT['intl'] === true
9046
        ) {
9047
            $returnTmp = \grapheme_substr($str, $offset, $length);
9048
            if ($returnTmp !== false) {
9049
                return $returnTmp;
9050
            }
9051
        }
9052
9053
        //
9054
        // fallback via iconv
9055
        //
9056
9057
        if (
9058 4
            $length >= 0 // "iconv_substr()" can't handle negative length
9059
            &&
9060 4
            self::$SUPPORT['iconv'] === true
9061
        ) {
9062
            $returnTmp = \iconv_substr($str, $offset, $length);
9063
            if ($returnTmp !== false) {
9064
                return $returnTmp;
9065
            }
9066
        }
9067
9068
        //
9069
        // fallback for ascii only
9070
        //
9071
9072 4
        if (self::is_ascii($str)) {
9073
            return \substr($str, $offset, $length);
9074
        }
9075
9076
        //
9077
        // fallback via vanilla php
9078
        //
9079
9080
        // split to array, and remove invalid characters
9081 4
        $array = self::split($str);
9082
9083
        // extract relevant part, and join to make sting again
9084 4
        return \implode('', \array_slice($array, $offset, $length));
9085
    }
9086
9087
    /**
9088
     * Binary safe comparison of two strings from an offset, up to length characters.
9089
     *
9090
     * @param string   $str1               <p>The main string being compared.</p>
9091
     * @param string   $str2               <p>The secondary string being compared.</p>
9092
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9093
     *                                     counting from the end of the string.</p>
9094
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9095
     *                                     of the length of the str compared to the length of main_str less the
9096
     *                                     offset.</p>
9097
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9098
     *                                     insensitive.</p>
9099
     *
9100
     * @return int
9101
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9102
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9103
     *             <strong>0</strong> if they are equal
9104
     */
9105 2
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9106
    {
9107
        if (
9108 2
            $offset !== 0
9109
            ||
9110 2
            $length !== null
9111
        ) {
9112 2
            $str1Tmp = self::substr($str1, $offset, $length);
9113 2
            if ($str1Tmp === false) {
9114
                $str1Tmp = '';
9115
            }
9116 2
            $str1 = (string) $str1Tmp;
9117
9118 2
            $str2Tmp = self::substr($str2, 0, (int) self::strlen($str1));
9119 2
            if ($str2Tmp === false) {
9120
                $str2Tmp = '';
9121
            }
9122 2
            $str2 = (string) $str2Tmp;
9123
        }
9124
9125 2
        if ($case_insensitivity === true) {
9126 2
            return self::strcasecmp($str1, $str2);
9127
        }
9128
9129 2
        return self::strcmp($str1, $str2);
9130
    }
9131
9132
    /**
9133
     * Count the number of substring occurrences.
9134
     *
9135
     * @see  http://php.net/manual/en/function.substr-count.php
9136
     *
9137
     * @param string $haystack  <p>The string to search in.</p>
9138
     * @param string $needle    <p>The substring to search for.</p>
9139
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
9140
     * @param int    $length    [optional] <p>
9141
     *                          The maximum length after the specified offset to search for the
9142
     *                          substring. It outputs a warning if the offset plus the length is
9143
     *                          greater than the haystack length.
9144
     *                          </p>
9145
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9146
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9147
     *
9148
     * @return false|int this functions returns an integer or false if there isn't a string
9149
     */
9150 18
    public static function substr_count(
9151
        string $haystack,
9152
        string $needle,
9153
        int $offset = 0,
9154
        int $length = null,
9155
        string $encoding = 'UTF-8',
9156
        bool $cleanUtf8 = false
9157
    ) {
9158 18
        if ($haystack === '' || $needle === '') {
9159 2
            return false;
9160
        }
9161
9162 18
        if ($offset || $length !== null) {
9163 2
            if ($length === null) {
9164 2
                $lengthTmp = self::strlen($haystack);
9165 2
                if ($lengthTmp === false) {
9166
                    return false;
9167
                }
9168 2
                $length = (int) $lengthTmp;
9169
            }
9170
9171
            if (
9172
                (
9173 2
                    $length !== 0
9174
                    &&
9175 2
                    $offset !== 0
9176
                )
9177
                &&
9178 2
                ($length + $offset) <= 0
9179
                &&
9180 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9181
            ) {
9182
                return false;
9183
            }
9184
9185 2
            $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9186 2
            if ($haystackTmp === false) {
9187
                $haystackTmp = '';
9188
            }
9189 2
            $haystack = (string) $haystackTmp;
9190
        }
9191
9192 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9193 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9194
        }
9195
9196 18
        if ($cleanUtf8 === true) {
9197
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9198
            // if invalid characters are found in $haystack before $needle
9199
            $needle = self::clean($needle);
9200
            $haystack = self::clean($haystack);
9201
        }
9202
9203 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9204
            self::checkForSupport();
9205
        }
9206
9207
        if (
9208 18
            $encoding !== 'UTF-8'
9209
            &&
9210 18
            self::$SUPPORT['mbstring'] === false
9211
        ) {
9212
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9213
        }
9214
9215 18
        if (self::$SUPPORT['mbstring'] === true) {
9216 18
            return \mb_substr_count($haystack, $needle, $encoding);
9217
        }
9218
9219
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9220
9221
        return \count($matches);
9222
    }
9223
9224
    /**
9225
     * Count the number of substring occurrences.
9226
     *
9227
     * @param string $haystack <p>
9228
     *                         The string being checked.
9229
     *                         </p>
9230
     * @param string $needle   <p>
9231
     *                         The string being found.
9232
     *                         </p>
9233
     * @param int    $offset   [optional] <p>
9234
     *                         The offset where to start counting
9235
     *                         </p>
9236
     * @param int    $length   [optional] <p>
9237
     *                         The maximum length after the specified offset to search for the
9238
     *                         substring. It outputs a warning if the offset plus the length is
9239
     *                         greater than the haystack length.
9240
     *                         </p>
9241
     *
9242
     * @return false|int the number of times the
9243
     *                   needle substring occurs in the
9244
     *                   haystack string
9245
     */
9246 36
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9247
    {
9248 36
        if ($haystack === '' || $needle === '') {
9249
            return 0;
9250
        }
9251
9252 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9253
            self::checkForSupport();
9254
        }
9255
9256
        if (
9257 36
            ($offset || $length !== null)
9258
            &&
9259 36
            self::$SUPPORT['mbstring_func_overload'] === true
9260
        ) {
9261
            if ($length === null) {
9262
                $lengthTmp = self::strlen($haystack);
9263
                if ($lengthTmp === false) {
9264
                    return false;
9265
                }
9266
                $length = (int) $lengthTmp;
9267
            }
9268
9269
            if (
9270
                (
9271
                    $length !== 0
9272
                    &&
9273
                    $offset !== 0
9274
                )
9275
                &&
9276
                ($length + $offset) <= 0
9277
                &&
9278
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9279
            ) {
9280
                return false;
9281
            }
9282
9283
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9284
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9285
                $haystackTmp = '';
9286
            }
9287
            $haystack = (string) $haystackTmp;
9288
        }
9289
9290 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9291
            // "mb_" is available if overload is used, so use it ...
9292
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9293
        }
9294
9295 36
        if ($length === null) {
9296
            return \substr_count($haystack, $needle, $offset);
9297
        }
9298
9299 36
        return \substr_count($haystack, $needle, $offset, $length);
9300
    }
9301
9302
    /**
9303
     * Returns the number of occurrences of $substring in the given string.
9304
     * By default, the comparison is case-sensitive, but can be made insensitive
9305
     * by setting $caseSensitive to false.
9306
     *
9307
     * @param string $str           <p>The input string.</p>
9308
     * @param string $substring     <p>The substring to search for.</p>
9309
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9310
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9311
     *
9312
     * @return int
9313
     */
9314 15
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9315
    {
9316 15
        if ($str === '' || $substring === '') {
9317 2
            return 0;
9318
        }
9319
9320
        // only a fallback to prevent BC in the api ...
9321 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9322 4
            $encoding = (string) $caseSensitive;
9323
        }
9324
9325 13
        if (!$caseSensitive) {
9326 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9327 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9328
        }
9329
9330 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9331
    }
9332
9333
    /**
9334
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9335
     *
9336
     * @param string $haystack <p>The string to search in.</p>
9337
     * @param string $needle   <p>The substring to search for.</p>
9338
     *
9339
     * @return string return the sub-string
9340
     */
9341 2
    public static function substr_ileft(string $haystack, string $needle): string
9342
    {
9343 2
        if ($haystack === '') {
9344 2
            return '';
9345
        }
9346
9347 2
        if ($needle === '') {
9348 2
            return $haystack;
9349
        }
9350
9351 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9352 2
            $haystackTmp = self::substr($haystack, (int) self::strlen($needle));
9353 2
            if ($haystackTmp === false) {
9354
                $haystackTmp = '';
9355
            }
9356 2
            $haystack = (string) $haystackTmp;
9357
        }
9358
9359 2
        return $haystack;
9360
    }
9361
9362
    /**
9363
     * Get part of a string process in bytes.
9364
     *
9365
     * @param string $str    <p>The string being checked.</p>
9366
     * @param int    $offset <p>The first position used in str.</p>
9367
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9368
     *
9369
     * @return false|string
9370
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9371
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9372
     *                      characters long, <b>FALSE</b> will be returned.
9373
     */
9374 51
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9375
    {
9376 51
        if ($str === '') {
9377
            return '';
9378
        }
9379
9380
        // Empty string
9381 51
        if ($length === 0) {
9382
            return '';
9383
        }
9384
9385
        // Whole string
9386 51
        if (!$offset && $length === null) {
9387
            return $str;
9388
        }
9389
9390 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9391
            self::checkForSupport();
9392
        }
9393
9394 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9395
            // "mb_" is available if overload is used, so use it ...
9396
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9397
        }
9398
9399 51
        return \substr($str, $offset, $length ?? 2147483647);
9400
    }
9401
9402
    /**
9403
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9404
     *
9405
     * @param string $haystack <p>The string to search in.</p>
9406
     * @param string $needle   <p>The substring to search for.</p>
9407
     *
9408
     * @return string return the sub-string
9409
     */
9410 2
    public static function substr_iright(string $haystack, string $needle): string
9411
    {
9412 2
        if ($haystack === '') {
9413 2
            return '';
9414
        }
9415
9416 2
        if ($needle === '') {
9417 2
            return $haystack;
9418
        }
9419
9420 2
        if (self::str_iends_with($haystack, $needle) === true) {
9421 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9422 2
            if ($haystackTmp === false) {
9423
                $haystackTmp = '';
9424
            }
9425 2
            $haystack = (string) $haystackTmp;
9426
        }
9427
9428 2
        return $haystack;
9429
    }
9430
9431
    /**
9432
     * Removes an prefix ($needle) from start of the string ($haystack).
9433
     *
9434
     * @param string $haystack <p>The string to search in.</p>
9435
     * @param string $needle   <p>The substring to search for.</p>
9436
     *
9437
     * @return string return the sub-string
9438
     */
9439 2
    public static function substr_left(string $haystack, string $needle): string
9440
    {
9441 2
        if ($haystack === '') {
9442 2
            return '';
9443
        }
9444
9445 2
        if ($needle === '') {
9446 2
            return $haystack;
9447
        }
9448
9449 2
        if (self::str_starts_with($haystack, $needle) === true) {
9450 2
            $haystackTmp = self::substr($haystack, (int) self::strlen($needle));
9451 2
            if ($haystackTmp === false) {
9452
                $haystackTmp = '';
9453
            }
9454 2
            $haystack = (string) $haystackTmp;
9455
        }
9456
9457 2
        return $haystack;
9458
    }
9459
9460
    /**
9461
     * Replace text within a portion of a string.
9462
     *
9463
     * source: https://gist.github.com/stemar/8287074
9464
     *
9465
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
9466
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
9467
     * @param int|int[]       $offset      <p>
9468
     *                                     If start is positive, the replacing will begin at the start'th offset
9469
     *                                     into string.
9470
     *                                     <br><br>
9471
     *                                     If start is negative, the replacing will begin at the start'th character
9472
     *                                     from the end of string.
9473
     *                                     </p>
9474
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
9475
     *                                     portion of string which is to be replaced. If it is negative, it
9476
     *                                     represents the number of characters from the end of string at which to
9477
     *                                     stop replacing. If it is not given, then it will default to strlen(
9478
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
9479
     *                                     length is zero then this function will have the effect of inserting
9480
     *                                     replacement into string at the given start offset.</p>
9481
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
9482
     *
9483
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9484
     */
9485 10
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9486
    {
9487 10
        if (\is_array($str) === true) {
9488 1
            $num = \count($str);
9489
9490
            // the replacement
9491 1
            if (\is_array($replacement) === true) {
9492 1
                $replacement = \array_slice($replacement, 0, $num);
9493
            } else {
9494 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9495
            }
9496
9497
            // the offset
9498 1
            if (\is_array($offset) === true) {
9499 1
                $offset = \array_slice($offset, 0, $num);
9500 1
                foreach ($offset as &$valueTmp) {
9501 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9502
                }
9503 1
                unset($valueTmp);
9504
            } else {
9505 1
                $offset = \array_pad([$offset], $num, $offset);
9506
            }
9507
9508
            // the length
9509 1
            if ($length === null) {
9510 1
                $length = \array_fill(0, $num, 0);
9511 1
            } elseif (\is_array($length) === true) {
9512 1
                $length = \array_slice($length, 0, $num);
9513 1
                foreach ($length as &$valueTmpV2) {
9514 1
                    if ($valueTmpV2 !== null) {
9515 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9516
                    } else {
9517 1
                        $valueTmpV2 = 0;
9518
                    }
9519
                }
9520 1
                unset($valueTmpV2);
9521
            } else {
9522 1
                $length = \array_pad([$length], $num, $length);
9523
            }
9524
9525
            // recursive call
9526 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9527
        }
9528
9529 10
        if (\is_array($replacement) === true) {
9530 1
            if (\count($replacement) > 0) {
9531 1
                $replacement = $replacement[0];
9532
            } else {
9533 1
                $replacement = '';
9534
            }
9535
        }
9536
9537
        // init
9538 10
        $str = (string) $str;
9539 10
        $replacement = (string) $replacement;
9540
9541 10
        if (\is_array($length) === true) {
9542
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
9543
        }
9544
9545 10
        if (\is_array($offset) === true) {
9546
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
9547
        }
9548
9549 10
        if ($str === '') {
9550 1
            return $replacement;
9551
        }
9552
9553 9
        if (self::is_ascii($str)) {
9554 6
            return ($length === null) ?
9555
                \substr_replace($str, $replacement, $offset) :
9556 6
                \substr_replace($str, $replacement, $offset, $length);
9557
        }
9558
9559 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9560
            self::checkForSupport();
9561
        }
9562
9563 8
        if (self::$SUPPORT['mbstring'] === true) {
9564 8
            $string_length = self::strlen($str, $encoding);
9565
9566 8
            if ($offset < 0) {
9567 1
                $offset = (int) \max(0, $string_length + $offset);
9568 8
            } elseif ($offset > $string_length) {
9569
                $offset = (int) $string_length;
9570
            }
9571
9572 8
            if ($length < 0) {
9573 1
                $length = (int) \max(0, $string_length - $offset + $length);
9574 8
            } elseif ($length === null || $length > $string_length) {
9575 3
                $length = (int) $string_length;
9576
            }
9577
9578 8
            if (($offset + $length) > $string_length) {
9579 3
                $length = $string_length - $offset;
9580
            }
9581
9582 8
            return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9582
            return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9582
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9583
        }
9584
9585
        \preg_match_all('/./us', $str, $smatches);
9586
        \preg_match_all('/./us', $replacement, $rmatches);
9587
9588
        if ($length === null) {
9589
            $lengthTmp = self::strlen($str, $encoding);
9590
            if ($lengthTmp === false) {
9591
                // e.g.: non mbstring support + invalid chars
9592
                return '';
9593
            }
9594
            $length = (int) $lengthTmp;
9595
        }
9596
9597
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
9598
9599
        return \implode('', $smatches[0]);
9600
    }
9601
9602
    /**
9603
     * Removes an suffix ($needle) from end of the string ($haystack).
9604
     *
9605
     * @param string $haystack <p>The string to search in.</p>
9606
     * @param string $needle   <p>The substring to search for.</p>
9607
     *
9608
     * @return string return the sub-string
9609
     */
9610 2
    public static function substr_right(string $haystack, string $needle): string
9611
    {
9612 2
        if ($haystack === '') {
9613 2
            return '';
9614
        }
9615
9616 2
        if ($needle === '') {
9617 2
            return $haystack;
9618
        }
9619
9620 2
        if (self::str_ends_with($haystack, $needle) === true) {
9621 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9622 2
            if ($haystackTmp === false) {
9623
                $haystackTmp = '';
9624
            }
9625 2
            $haystack = (string) $haystackTmp;
9626
        }
9627
9628 2
        return $haystack;
9629
    }
9630
9631
    /**
9632
     * Returns a case swapped version of the string.
9633
     *
9634
     * @param string $str       <p>The input string.</p>
9635
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9636
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9637
     *
9638
     * @return string each character's case swapped
9639
     */
9640 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9641
    {
9642 6
        if ($str === '') {
9643 1
            return '';
9644
        }
9645
9646 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9647 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9648
        }
9649
9650 6
        if ($cleanUtf8 === true) {
9651
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9652
            // if invalid characters are found in $haystack before $needle
9653 2
            $str = self::clean($str);
9654
        }
9655
9656 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9657
    }
9658
9659
    /**
9660
     * Checks whether symfony-polyfills are used.
9661
     *
9662
     * @return bool
9663
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
9664
     */
9665
    public static function symfony_polyfill_used(): bool
9666
    {
9667
        // init
9668
        $return = false;
9669
9670
        $returnTmp = \extension_loaded('mbstring');
9671
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9672
            $return = true;
9673
        }
9674
9675
        $returnTmp = \extension_loaded('iconv');
9676
        if ($returnTmp === false && \function_exists('iconv')) {
9677
            $return = true;
9678
        }
9679
9680
        return $return;
9681
    }
9682
9683
    /**
9684
     * @param string $str
9685
     * @param int    $tabLength
9686
     *
9687
     * @return string
9688
     */
9689 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9690
    {
9691 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9692
    }
9693
9694
    /**
9695
     * Converts the first character of each word in the string to uppercase
9696
     * and all other chars to lowercase.
9697
     *
9698
     * @param string      $str                   <p>The input string.</p>
9699
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9700
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9701
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9702
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9703
     *
9704
     * @return string string with all characters of $str being title-cased
9705
     */
9706 5
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9707
    {
9708 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9709 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9710
        }
9711
9712 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9713
    }
9714
9715
    /**
9716
     * alias for "UTF8::to_ascii()"
9717
     *
9718
     * @see        UTF8::to_ascii()
9719
     *
9720
     * @param string $str
9721
     * @param string $subst_chr
9722
     * @param bool   $strict
9723
     *
9724
     * @return string
9725
     *
9726
     * @deprecated <p>use "UTF8::to_ascii()"</p>
9727
     */
9728 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9729
    {
9730 7
        return self::to_ascii($str, $subst_chr, $strict);
9731
    }
9732
9733
    /**
9734
     * alias for "UTF8::to_iso8859()"
9735
     *
9736
     * @see        UTF8::to_iso8859()
9737
     *
9738
     * @param string|string[] $str
9739
     *
9740
     * @return string|string[]
9741
     *
9742
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
9743
     */
9744 2
    public static function toIso8859($str)
9745
    {
9746 2
        return self::to_iso8859($str);
9747
    }
9748
9749
    /**
9750
     * alias for "UTF8::to_latin1()"
9751
     *
9752
     * @see        UTF8::to_latin1()
9753
     *
9754
     * @param string|string[] $str
9755
     *
9756
     * @return string|string[]
9757
     *
9758
     * @deprecated <p>use "UTF8::to_latin1()"</p>
9759
     */
9760 2
    public static function toLatin1($str)
9761
    {
9762 2
        return self::to_latin1($str);
9763
    }
9764
9765
    /**
9766
     * alias for "UTF8::to_utf8()"
9767
     *
9768
     * @see        UTF8::to_utf8()
9769
     *
9770
     * @param string|string[] $str
9771
     *
9772
     * @return string|string[]
9773
     *
9774
     * @deprecated <p>use "UTF8::to_utf8()"</p>
9775
     */
9776 2
    public static function toUTF8($str)
9777
    {
9778 2
        return self::to_utf8($str);
9779
    }
9780
9781
    /**
9782
     * Convert a string into ASCII.
9783
     *
9784
     * @param string $str     <p>The input string.</p>
9785
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9786
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9787
     *                        performance</p>
9788
     *
9789
     * @return string
9790
     */
9791 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9792
    {
9793 38
        static $UTF8_TO_ASCII;
9794
9795 38
        if ($str === '') {
9796 3
            return '';
9797
        }
9798
9799
        // check if we only have ASCII, first (better performance)
9800 35
        if (self::is_ascii($str) === true) {
9801 9
            return $str;
9802
        }
9803
9804 28
        $str = self::clean(
9805 28
            $str,
9806 28
            true,
9807 28
            true,
9808 28
            true,
9809 28
            false,
9810 28
            true,
9811 28
            true
9812
        );
9813
9814
        // check again, if we only have ASCII, now ...
9815 28
        if (self::is_ascii($str) === true) {
9816 10
            return $str;
9817
        }
9818
9819 19
        if ($strict === true) {
9820 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9821
                self::checkForSupport();
9822
            }
9823
9824 1
            if (self::$SUPPORT['intl'] === true) {
9825
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9826
                /** @noinspection PhpComposerExtensionStubsInspection */
9827 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9828
9829
                // check again, if we only have ASCII, now ...
9830 1
                if (self::is_ascii($str) === true) {
9831 1
                    return $str;
9832
                }
9833
            }
9834
        }
9835
9836 19
        if (self::$ORD === null) {
9837
            self::$ORD = self::getData('ord');
9838
        }
9839
9840 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9841 19
        $chars = $ar[0];
9842 19
        $ord = null;
9843 19
        foreach ($chars as &$c) {
9844 19
            $ordC0 = self::$ORD[$c[0]];
9845
9846 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
9847 15
                continue;
9848
            }
9849
9850 19
            $ordC1 = self::$ORD[$c[1]];
9851
9852
            // ASCII - next please
9853 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
9854 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9855
            }
9856
9857 19
            if ($ordC0 >= 224) {
9858 8
                $ordC2 = self::$ORD[$c[2]];
9859
9860 8
                if ($ordC0 <= 239) {
9861 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9862
                }
9863
9864 8
                if ($ordC0 >= 240) {
9865 2
                    $ordC3 = self::$ORD[$c[3]];
9866
9867 2
                    if ($ordC0 <= 247) {
9868 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9869
                    }
9870
9871 2
                    if ($ordC0 >= 248) {
9872
                        $ordC4 = self::$ORD[$c[4]];
9873
9874
                        if ($ordC0 <= 251) {
9875
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9876
                        }
9877
9878
                        if ($ordC0 >= 252) {
9879
                            $ordC5 = self::$ORD[$c[5]];
9880
9881
                            if ($ordC0 <= 253) {
9882
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9883
                            }
9884
                        }
9885
                    }
9886
                }
9887
            }
9888
9889 19
            if ($ordC0 === 254 || $ordC0 === 255) {
9890
                $c = $unknown;
9891
9892
                continue;
9893
            }
9894
9895 19
            if ($ord === null) {
9896
                $c = $unknown;
9897
9898
                continue;
9899
            }
9900
9901 19
            $bank = $ord >> 8;
9902 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
9903 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
9904 9
                if ($UTF8_TO_ASCII[$bank] === false) {
9905 2
                    $UTF8_TO_ASCII[$bank] = [];
9906
                }
9907
            }
9908
9909 19
            $newchar = $ord & 255;
9910
9911 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9912
9913
                // keep for debugging
9914
                /*
9915
                echo "file: " . sprintf('x%02x', $bank) . "\n";
9916
                echo "char: " . $c . "\n";
9917
                echo "ord: " . $ord . "\n";
9918
                echo "newchar: " . $newchar . "\n";
9919
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9920
                echo "bank:" . $bank . "\n\n";
9921
                 */
9922
9923 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
9924
            } else {
9925
9926
                // keep for debugging missing chars
9927
                /*
9928
                echo "file: " . sprintf('x%02x', $bank) . "\n";
9929
                echo "char: " . $c . "\n";
9930
                echo "ord: " . $ord . "\n";
9931
                echo "newchar: " . $newchar . "\n";
9932
                echo "bank:" . $bank . "\n\n";
9933
                 */
9934
9935 19
                $c = $unknown;
9936
            }
9937
        }
9938
9939 19
        return \implode('', $chars);
9940
    }
9941
9942
    /**
9943
     * @param mixed $str
9944
     *
9945
     * @return bool
9946
     */
9947 19
    public static function to_boolean($str): bool
9948
    {
9949
        // init
9950 19
        $str = (string) $str;
9951
9952 19
        if ($str === '') {
9953 2
            return false;
9954
        }
9955
9956 17
        $key = \strtolower($str);
9957
9958
        // Info: http://php.net/manual/en/filter.filters.validate.php
9959
        $map = [
9960 17
            'true'  => true,
9961
            '1'     => true,
9962
            'on'    => true,
9963
            'yes'   => true,
9964
            'false' => false,
9965
            '0'     => false,
9966
            'off'   => false,
9967
            'no'    => false,
9968
        ];
9969
9970 17
        if (isset($map[$key])) {
9971 13
            return $map[$key];
9972
        }
9973
9974
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9975 4
        if (\is_numeric($str)) {
9976 2
            return ((float) $str + 0) > 0;
9977
        }
9978
9979 2
        return (bool) self::trim($str);
9980
    }
9981
9982
    /**
9983
     * Convert given string to safe filename (and keep string case).
9984
     *
9985
     * @param string $string
9986
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
9987
     *                                  simply replaced with hyphen.
9988
     * @param string $fallback_char
9989
     *
9990
     * @return string
9991
     */
9992 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
9993
    {
9994 1
        if ($use_transliterate === true) {
9995 1
            $string = self::str_transliterate($string, $fallback_char);
9996
        }
9997
9998 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
9999
10000 1
        $string = (string) \preg_replace(
10001
            [
10002 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10003 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10004 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10005
            ],
10006
            [
10007 1
                '',
10008 1
                $fallback_char,
10009 1
                $fallback_char,
10010
            ],
10011 1
            $string
10012
        );
10013
10014
        // trim "$fallback_char" from beginning and end of the string
10015 1
        return \trim($string, $fallback_char);
10016
    }
10017
10018
    /**
10019
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10020
     *
10021
     * @param string|string[] $str
10022
     *
10023
     * @return string|string[]
10024
     */
10025 7
    public static function to_iso8859($str)
10026
    {
10027 7
        if (\is_array($str) === true) {
10028 2
            foreach ($str as $k => $v) {
10029 2
                $str[$k] = self::to_iso8859($v);
10030
            }
10031
10032 2
            return $str;
10033
        }
10034
10035 7
        $str = (string) $str;
10036 7
        if ($str === '') {
10037 2
            return '';
10038
        }
10039
10040 7
        return self::utf8_decode($str);
10041
    }
10042
10043
    /**
10044
     * alias for "UTF8::to_iso8859()"
10045
     *
10046
     * @see UTF8::to_iso8859()
10047
     *
10048
     * @param string|string[] $str
10049
     *
10050
     * @return string|string[]
10051
     */
10052 2
    public static function to_latin1($str)
10053
    {
10054 2
        return self::to_iso8859($str);
10055
    }
10056
10057
    /**
10058
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10059
     *
10060
     * <ul>
10061
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10062
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10063
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10064
     * case.</li>
10065
     * </ul>
10066
     *
10067
     * @param string|string[] $str                    <p>Any string or array.</p>
10068
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10069
     *
10070
     * @return string|string[] the UTF-8 encoded string
10071
     */
10072 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10073
    {
10074 37
        if (\is_array($str) === true) {
10075 4
            foreach ($str as $k => $v) {
10076 4
                $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10077
            }
10078
10079 4
            return $str;
10080
        }
10081
10082 37
        $str = (string) $str;
10083 37
        if ($str === '') {
10084 6
            return $str;
10085
        }
10086
10087 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10088
            self::checkForSupport();
10089
        }
10090
10091 37
        $max = self::strlen_in_byte($str);
10092 37
        $buf = '';
10093
10094
        /** @noinspection ForeachInvariantsInspection */
10095 37
        for ($i = 0; $i < $max; ++$i) {
10096 37
            $c1 = $str[$i];
10097
10098 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10099
10100 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10101
10102 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10103
10104 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10105 17
                        $buf .= $c1 . $c2;
10106 17
                        ++$i;
10107
                    } else { // not valid UTF8 - convert it
10108 31
                        $buf .= self::to_utf8_convert_helper($c1);
10109
                    }
10110 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10111
10112 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10113 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10114
10115 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10116 14
                        $buf .= $c1 . $c2 . $c3;
10117 14
                        $i += 2;
10118
                    } else { // not valid UTF8 - convert it
10119 32
                        $buf .= self::to_utf8_convert_helper($c1);
10120
                    }
10121 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10122
10123 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10124 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10125 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10126
10127 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10128 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10129 8
                        $i += 3;
10130
                    } else { // not valid UTF8 - convert it
10131 26
                        $buf .= self::to_utf8_convert_helper($c1);
10132
                    }
10133
                } else { // doesn't look like UTF8, but should be converted
10134 34
                    $buf .= self::to_utf8_convert_helper($c1);
10135
                }
10136 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10137
10138 4
                $buf .= self::to_utf8_convert_helper($c1);
10139
            } else { // it doesn't need conversion
10140 34
                $buf .= $c1;
10141
            }
10142
        }
10143
10144
        // decode unicode escape sequences
10145 37
        $buf = \preg_replace_callback(
10146 37
            '/\\\\u([0-9a-f]{4})/i',
10147
            static function ($match) {
10148
                // always fallback via symfony polyfill
10149 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10150 37
            },
10151 37
            $buf
10152
        );
10153
10154 37
        if ($buf === null) {
10155
            return '';
10156
        }
10157
10158
        // decode UTF-8 codepoints
10159 37
        if ($decodeHtmlEntityToUtf8 === true) {
10160 2
            $buf = self::html_entity_decode($buf);
10161
        }
10162
10163 37
        return $buf;
10164
    }
10165
10166
    /**
10167
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10168
     *
10169
     * INFO: This is slower then "trim()"
10170
     *
10171
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10172
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10173
     *
10174
     * @param string $str   <p>The string to be trimmed</p>
10175
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10176
     *
10177
     * @return string the trimmed string
10178
     */
10179 214
    public static function trim(string $str = '', $chars = \INF): string
10180
    {
10181 214
        if ($str === '') {
10182 11
            return '';
10183
        }
10184
10185
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10186 206
        if ($chars === \INF || !$chars) {
10187 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10188
        } else {
10189 47
            $chars = \preg_quote($chars, '/');
10190 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10191
        }
10192
10193 206
        return self::regex_replace($str, $pattern, '', '', '/');
10194
    }
10195
10196
    /**
10197
     * Makes string's first char uppercase.
10198
     *
10199
     * @param string      $str                   <p>The input string.</p>
10200
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10201
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10202
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10203
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10204
     *
10205
     * @return string the resulting string
10206
     */
10207 79
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10208
    {
10209 79
        if ($cleanUtf8 === true) {
10210
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10211
            // if invalid characters are found in $haystack before $needle
10212 1
            $str = self::clean($str);
10213
        }
10214
10215 79
        $strPartTwo = self::substr($str, 1, null, $encoding);
10216 79
        if ($strPartTwo === false) {
10217
            $strPartTwo = '';
10218
        }
10219
10220 79
        $strPartOne = self::strtoupper(
10221 79
            (string) self::substr($str, 0, 1, $encoding),
10222 79
            $encoding,
10223 79
            $cleanUtf8,
10224 79
            $lang,
10225 79
            $tryToKeepStringLength
10226
        );
10227
10228 79
        return $strPartOne . $strPartTwo;
10229
    }
10230
10231
    /**
10232
     * alias for "UTF8::ucfirst()"
10233
     *
10234
     * @see UTF8::ucfirst()
10235
     *
10236
     * @param string $str
10237
     * @param string $encoding
10238
     * @param bool   $cleanUtf8
10239
     *
10240
     * @return string
10241
     */
10242 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10243
    {
10244 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10245
    }
10246
10247
    /**
10248
     * Uppercase for all words in the string.
10249
     *
10250
     * @param string   $str        <p>The input string.</p>
10251
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10252
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10253
     *                             word.</p>
10254
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10255
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10256
     *
10257
     * @return string
10258
     */
10259 8
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10260
    {
10261 8
        if (!$str) {
10262 2
            return '';
10263
        }
10264
10265
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10266
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10267
10268 7
        if ($cleanUtf8 === true) {
10269
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10270
            // if invalid characters are found in $haystack before $needle
10271 1
            $str = self::clean($str);
10272
        }
10273
10274 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10275
10276
        if (
10277 7
            $usePhpDefaultFunctions === true
10278
            &&
10279 7
            self::is_ascii($str) === true
10280
        ) {
10281
            return \ucwords($str);
10282
        }
10283
10284 7
        $words = self::str_to_words($str, $charlist);
10285 7
        $newWords = [];
10286
10287 7
        $useExceptions = \count($exceptions) > 0;
10288
10289 7
        foreach ($words as $word) {
10290 7
            if (!$word) {
10291 7
                continue;
10292
            }
10293
10294
            if (
10295 7
                $useExceptions === false
10296
                ||
10297
                (
10298 1
                    $useExceptions === true
10299
                    &&
10300 7
                    !\in_array($word, $exceptions, true)
10301
                )
10302
            ) {
10303 7
                $word = self::ucfirst($word, $encoding);
10304
            }
10305
10306 7
            $newWords[] = $word;
10307
        }
10308
10309 7
        return \implode('', $newWords);
10310
    }
10311
10312
    /**
10313
     * Multi decode html entity & fix urlencoded-win1252-chars.
10314
     *
10315
     * e.g:
10316
     * 'test+test'                     => 'test test'
10317
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10318
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10319
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10320
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10321
     * 'Düsseldorf'                   => 'Düsseldorf'
10322
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10323
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10324
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10325
     *
10326
     * @param string $str          <p>The input string.</p>
10327
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10328
     *
10329
     * @return string
10330
     */
10331 2
    public static function urldecode(string $str, bool $multi_decode = true): string
10332
    {
10333 2
        if ($str === '') {
10334 2
            return '';
10335
        }
10336
10337 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10338 2
        if (\preg_match($pattern, $str)) {
10339 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10340
        }
10341
10342 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10343
10344
        do {
10345 2
            $str_compare = $str;
10346
10347 2
            $str = self::fix_simple_utf8(
10348 2
                \urldecode(
10349 2
                    self::html_entity_decode(
10350 2
                        self::to_utf8($str),
10351 2
                        $flags
10352
                    )
10353
                )
10354
            );
10355 2
        } while ($multi_decode === true && $str_compare !== $str);
10356
10357 2
        return $str;
10358
    }
10359
10360
    /**
10361
     * Return a array with "urlencoded"-win1252 -> UTF-8
10362
     *
10363
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10364
     *
10365
     * @return string[]
10366
     */
10367 2
    public static function urldecode_fix_win1252_chars(): array
10368
    {
10369
        return [
10370 2
            '%20' => ' ',
10371
            '%21' => '!',
10372
            '%22' => '"',
10373
            '%23' => '#',
10374
            '%24' => '$',
10375
            '%25' => '%',
10376
            '%26' => '&',
10377
            '%27' => "'",
10378
            '%28' => '(',
10379
            '%29' => ')',
10380
            '%2A' => '*',
10381
            '%2B' => '+',
10382
            '%2C' => ',',
10383
            '%2D' => '-',
10384
            '%2E' => '.',
10385
            '%2F' => '/',
10386
            '%30' => '0',
10387
            '%31' => '1',
10388
            '%32' => '2',
10389
            '%33' => '3',
10390
            '%34' => '4',
10391
            '%35' => '5',
10392
            '%36' => '6',
10393
            '%37' => '7',
10394
            '%38' => '8',
10395
            '%39' => '9',
10396
            '%3A' => ':',
10397
            '%3B' => ';',
10398
            '%3C' => '<',
10399
            '%3D' => '=',
10400
            '%3E' => '>',
10401
            '%3F' => '?',
10402
            '%40' => '@',
10403
            '%41' => 'A',
10404
            '%42' => 'B',
10405
            '%43' => 'C',
10406
            '%44' => 'D',
10407
            '%45' => 'E',
10408
            '%46' => 'F',
10409
            '%47' => 'G',
10410
            '%48' => 'H',
10411
            '%49' => 'I',
10412
            '%4A' => 'J',
10413
            '%4B' => 'K',
10414
            '%4C' => 'L',
10415
            '%4D' => 'M',
10416
            '%4E' => 'N',
10417
            '%4F' => 'O',
10418
            '%50' => 'P',
10419
            '%51' => 'Q',
10420
            '%52' => 'R',
10421
            '%53' => 'S',
10422
            '%54' => 'T',
10423
            '%55' => 'U',
10424
            '%56' => 'V',
10425
            '%57' => 'W',
10426
            '%58' => 'X',
10427
            '%59' => 'Y',
10428
            '%5A' => 'Z',
10429
            '%5B' => '[',
10430
            '%5C' => '\\',
10431
            '%5D' => ']',
10432
            '%5E' => '^',
10433
            '%5F' => '_',
10434
            '%60' => '`',
10435
            '%61' => 'a',
10436
            '%62' => 'b',
10437
            '%63' => 'c',
10438
            '%64' => 'd',
10439
            '%65' => 'e',
10440
            '%66' => 'f',
10441
            '%67' => 'g',
10442
            '%68' => 'h',
10443
            '%69' => 'i',
10444
            '%6A' => 'j',
10445
            '%6B' => 'k',
10446
            '%6C' => 'l',
10447
            '%6D' => 'm',
10448
            '%6E' => 'n',
10449
            '%6F' => 'o',
10450
            '%70' => 'p',
10451
            '%71' => 'q',
10452
            '%72' => 'r',
10453
            '%73' => 's',
10454
            '%74' => 't',
10455
            '%75' => 'u',
10456
            '%76' => 'v',
10457
            '%77' => 'w',
10458
            '%78' => 'x',
10459
            '%79' => 'y',
10460
            '%7A' => 'z',
10461
            '%7B' => '{',
10462
            '%7C' => '|',
10463
            '%7D' => '}',
10464
            '%7E' => '~',
10465
            '%7F' => '',
10466
            '%80' => '`',
10467
            '%81' => '',
10468
            '%82' => '‚',
10469
            '%83' => 'ƒ',
10470
            '%84' => '„',
10471
            '%85' => '…',
10472
            '%86' => '†',
10473
            '%87' => '‡',
10474
            '%88' => 'ˆ',
10475
            '%89' => '‰',
10476
            '%8A' => 'Š',
10477
            '%8B' => '‹',
10478
            '%8C' => 'Œ',
10479
            '%8D' => '',
10480
            '%8E' => 'Ž',
10481
            '%8F' => '',
10482
            '%90' => '',
10483
            '%91' => '‘',
10484
            '%92' => '’',
10485
            '%93' => '“',
10486
            '%94' => '”',
10487
            '%95' => '•',
10488
            '%96' => '–',
10489
            '%97' => '—',
10490
            '%98' => '˜',
10491
            '%99' => '™',
10492
            '%9A' => 'š',
10493
            '%9B' => '›',
10494
            '%9C' => 'œ',
10495
            '%9D' => '',
10496
            '%9E' => 'ž',
10497
            '%9F' => 'Ÿ',
10498
            '%A0' => '',
10499
            '%A1' => '¡',
10500
            '%A2' => '¢',
10501
            '%A3' => '£',
10502
            '%A4' => '¤',
10503
            '%A5' => '¥',
10504
            '%A6' => '¦',
10505
            '%A7' => '§',
10506
            '%A8' => '¨',
10507
            '%A9' => '©',
10508
            '%AA' => 'ª',
10509
            '%AB' => '«',
10510
            '%AC' => '¬',
10511
            '%AD' => '',
10512
            '%AE' => '®',
10513
            '%AF' => '¯',
10514
            '%B0' => '°',
10515
            '%B1' => '±',
10516
            '%B2' => '²',
10517
            '%B3' => '³',
10518
            '%B4' => '´',
10519
            '%B5' => 'µ',
10520
            '%B6' => '¶',
10521
            '%B7' => '·',
10522
            '%B8' => '¸',
10523
            '%B9' => '¹',
10524
            '%BA' => 'º',
10525
            '%BB' => '»',
10526
            '%BC' => '¼',
10527
            '%BD' => '½',
10528
            '%BE' => '¾',
10529
            '%BF' => '¿',
10530
            '%C0' => 'À',
10531
            '%C1' => 'Á',
10532
            '%C2' => 'Â',
10533
            '%C3' => 'Ã',
10534
            '%C4' => 'Ä',
10535
            '%C5' => 'Å',
10536
            '%C6' => 'Æ',
10537
            '%C7' => 'Ç',
10538
            '%C8' => 'È',
10539
            '%C9' => 'É',
10540
            '%CA' => 'Ê',
10541
            '%CB' => 'Ë',
10542
            '%CC' => 'Ì',
10543
            '%CD' => 'Í',
10544
            '%CE' => 'Î',
10545
            '%CF' => 'Ï',
10546
            '%D0' => 'Ð',
10547
            '%D1' => 'Ñ',
10548
            '%D2' => 'Ò',
10549
            '%D3' => 'Ó',
10550
            '%D4' => 'Ô',
10551
            '%D5' => 'Õ',
10552
            '%D6' => 'Ö',
10553
            '%D7' => '×',
10554
            '%D8' => 'Ø',
10555
            '%D9' => 'Ù',
10556
            '%DA' => 'Ú',
10557
            '%DB' => 'Û',
10558
            '%DC' => 'Ü',
10559
            '%DD' => 'Ý',
10560
            '%DE' => 'Þ',
10561
            '%DF' => 'ß',
10562
            '%E0' => 'à',
10563
            '%E1' => 'á',
10564
            '%E2' => 'â',
10565
            '%E3' => 'ã',
10566
            '%E4' => 'ä',
10567
            '%E5' => 'å',
10568
            '%E6' => 'æ',
10569
            '%E7' => 'ç',
10570
            '%E8' => 'è',
10571
            '%E9' => 'é',
10572
            '%EA' => 'ê',
10573
            '%EB' => 'ë',
10574
            '%EC' => 'ì',
10575
            '%ED' => 'í',
10576
            '%EE' => 'î',
10577
            '%EF' => 'ï',
10578
            '%F0' => 'ð',
10579
            '%F1' => 'ñ',
10580
            '%F2' => 'ò',
10581
            '%F3' => 'ó',
10582
            '%F4' => 'ô',
10583
            '%F5' => 'õ',
10584
            '%F6' => 'ö',
10585
            '%F7' => '÷',
10586
            '%F8' => 'ø',
10587
            '%F9' => 'ù',
10588
            '%FA' => 'ú',
10589
            '%FB' => 'û',
10590
            '%FC' => 'ü',
10591
            '%FD' => 'ý',
10592
            '%FE' => 'þ',
10593
            '%FF' => 'ÿ',
10594
        ];
10595
    }
10596
10597
    /**
10598
     * Decodes an UTF-8 string to ISO-8859-1.
10599
     *
10600
     * @param string $str           <p>The input string.</p>
10601
     * @param bool   $keepUtf8Chars
10602
     *
10603
     * @return string
10604
     */
10605 13
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10606
    {
10607 13
        if ($str === '') {
10608 5
            return '';
10609
        }
10610
10611 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10612 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10613
10614 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10615 1
            if (self::$WIN1252_TO_UTF8 === null) {
10616
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10617
            }
10618
10619 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10620 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10621
        }
10622
10623
        /** @noinspection PhpInternalEntityUsedInspection */
10624 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10625
10626 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10627
            self::checkForSupport();
10628
        }
10629
10630
        // save for later comparision
10631 13
        $str_backup = $str;
10632 13
        $len = self::strlen_in_byte($str);
10633
10634 13
        if (self::$ORD === null) {
10635
            self::$ORD = self::getData('ord');
10636
        }
10637
10638 13
        if (self::$CHR === null) {
10639
            self::$CHR = self::getData('chr');
10640
        }
10641
10642 13
        $noCharFound = '?';
10643
        /** @noinspection ForeachInvariantsInspection */
10644 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10645 13
            switch ($str[$i] & "\xF0") {
10646 13
                case "\xC0":
10647 12
                case "\xD0":
10648 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10649 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10650
10651 13
                    break;
10652
10653
                /** @noinspection PhpMissingBreakStatementInspection */
10654 12
                case "\xF0":
10655
                    ++$i;
10656
                // no break
10657 12
                case "\xE0":
10658 10
                    $str[$j] = $noCharFound;
10659 10
                    $i += 2;
10660
10661 10
                    break;
10662
10663
                default:
10664 12
                    $str[$j] = $str[$i];
10665
            }
10666
        }
10667
10668 13
        $return = self::substr_in_byte($str, 0, $j);
10669 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10670
            $return = '';
10671
        }
10672
10673
        if (
10674 13
            $keepUtf8Chars === true
10675
            &&
10676 13
            self::strlen($return) >= self::strlen($str_backup)
10677
        ) {
10678 2
            return $str_backup;
10679
        }
10680
10681 13
        return $return;
10682
    }
10683
10684
    /**
10685
     * Encodes an ISO-8859-1 string to UTF-8.
10686
     *
10687
     * @param string $str <p>The input string.</p>
10688
     *
10689
     * @return string
10690
     */
10691 14
    public static function utf8_encode(string $str): string
10692
    {
10693 14
        if ($str === '') {
10694 13
            return '';
10695
        }
10696
10697 14
        $str = \utf8_encode($str);
10698
10699
        // the polyfill maybe return false
10700
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10701 14
        if ($str === false) {
10702
            return '';
10703
        }
10704
10705 14
        if (\strpos($str, "\xC2") === false) {
10706 6
            return $str;
10707
        }
10708
10709 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10710 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10711
10712 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10713 1
            if (self::$WIN1252_TO_UTF8 === null) {
10714
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10715
            }
10716
10717 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10718 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10719
        }
10720
10721 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10722
    }
10723
10724
    /**
10725
     * fix -> utf8-win1252 chars
10726
     *
10727
     * @param string $str <p>The input string.</p>
10728
     *
10729
     * @return string
10730
     *
10731
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10732
     */
10733 2
    public static function utf8_fix_win1252_chars(string $str): string
10734
    {
10735 2
        return self::fix_simple_utf8($str);
10736
    }
10737
10738
    /**
10739
     * Returns an array with all utf8 whitespace characters.
10740
     *
10741
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10742
     *
10743
     * @author: Derek E. [email protected]
10744
     *
10745
     * @return string[]
10746
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
10747
     *                  as defined in above URL
10748
     */
10749 2
    public static function whitespace_table(): array
10750
    {
10751 2
        return self::$WHITESPACE_TABLE;
10752
    }
10753
10754
    /**
10755
     * Limit the number of words in a string.
10756
     *
10757
     * @param string $str      <p>The input string.</p>
10758
     * @param int    $limit    <p>The limit of words as integer.</p>
10759
     * @param string $strAddOn <p>Replacement for the striped string.</p>
10760
     *
10761
     * @return string
10762
     */
10763 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10764
    {
10765 2
        if ($str === '') {
10766 2
            return '';
10767
        }
10768
10769 2
        if ($limit < 1) {
10770 2
            return '';
10771
        }
10772
10773 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10774
10775
        if (
10776 2
            !isset($matches[0])
10777
            ||
10778 2
            self::strlen($str) === self::strlen($matches[0])
10779
        ) {
10780 2
            return $str;
10781
        }
10782
10783 2
        return self::rtrim($matches[0]) . $strAddOn;
10784
    }
10785
10786
    /**
10787
     * Wraps a string to a given number of characters
10788
     *
10789
     * @see  http://php.net/manual/en/function.wordwrap.php
10790
     *
10791
     * @param string $str   <p>The input string.</p>
10792
     * @param int    $width [optional] <p>The column width.</p>
10793
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10794
     * @param bool   $cut   [optional] <p>
10795
     *                      If the cut is set to true, the string is
10796
     *                      always wrapped at or before the specified width. So if you have
10797
     *                      a word that is larger than the given width, it is broken apart.
10798
     *                      </p>
10799
     *
10800
     * @return string the given string wrapped at the specified column
10801
     */
10802 10
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10803
    {
10804 10
        if ($str === '' || $break === '') {
10805 3
            return '';
10806
        }
10807
10808 8
        $w = '';
10809 8
        $strSplit = \explode($break, $str);
10810 8
        $count = $strSplit === false
10811
            ? 0
10812 8
            : \count($strSplit);
10813
10814 8
        $chars = [];
10815
        /** @noinspection ForeachInvariantsInspection */
10816 8
        for ($i = 0; $i < $count; ++$i) {
10817 8
            if ($i) {
10818 1
                $chars[] = $break;
10819 1
                $w .= '#';
10820
            }
10821
10822 8
            $c = $strSplit[$i];
10823 8
            unset($strSplit[$i]);
10824
10825 8
            if ($c !== null) {
10826 8
                foreach (self::split($c) as $c) {
10827 8
                    $chars[] = $c;
10828 8
                    $w .= $c === ' ' ? ' ' : '?';
10829
                }
10830
            }
10831
        }
10832
10833 8
        $strReturn = '';
10834 8
        $j = 0;
10835 8
        $b = $i = -1;
10836 8
        $w = \wordwrap($w, $width, '#', $cut);
10837
10838 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
10839 6
            for (++$i; $i < $b; ++$i) {
10840 6
                $strReturn .= $chars[$j];
10841 6
                unset($chars[$j++]);
10842
            }
10843
10844 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
10845 3
                unset($chars[$j++]);
10846
            }
10847
10848 6
            $strReturn .= $break;
10849
        }
10850
10851 8
        return $strReturn . \implode('', $chars);
10852
    }
10853
10854
    /**
10855
     * Line-Wrap the string after $limit, but also after the next word.
10856
     *
10857
     * @param string $str
10858
     * @param int    $limit
10859
     *
10860
     * @return string
10861
     */
10862 1
    public static function wordwrap_per_line(string $str, int $limit): string
10863
    {
10864 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
10865
10866 1
        $string = '';
10867 1
        foreach ($strings as $value) {
10868 1
            if ($value === false) {
10869
                continue;
10870
            }
10871
10872 1
            $string .= \wordwrap($value, $limit);
10873 1
            $string .= "\n";
10874
        }
10875
10876 1
        return $string;
10877
    }
10878
10879
    /**
10880
     * Returns an array of Unicode White Space characters.
10881
     *
10882
     * @return string[] an array with numeric code point as key and White Space Character as value
10883
     */
10884 2
    public static function ws(): array
10885
    {
10886 2
        return self::$WHITESPACE;
10887
    }
10888
10889
    /**
10890
     * Adds the specified amount of left and right padding to the given string.
10891
     * The default character used is a space.
10892
     *
10893
     * @param string $str
10894
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
10895
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
10896
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
10897
     * @param string $encoding [optional] <p>Default: UTF-8</p>
10898
     *
10899
     * @return string string with padding applied
10900
     */
10901 25
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
10902
    {
10903 25
        $strlen = self::strlen($str, $encoding);
10904
10905 25
        if ($left && $right) {
10906 8
            $length = ($left + $right) + $strlen;
10907 8
            $type = \STR_PAD_BOTH;
10908 17
        } elseif ($left) {
10909 7
            $length = $left + $strlen;
10910 7
            $type = \STR_PAD_LEFT;
10911 10
        } elseif ($right) {
10912 10
            $length = $right + $strlen;
10913 10
            $type = \STR_PAD_RIGHT;
10914
        } else {
10915
            $length = ($left + $right) + $strlen;
10916
            $type = \STR_PAD_BOTH;
10917
        }
10918
10919 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
10920
    }
10921
10922
    /**
10923
     * @param string $str
10924
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
10925
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
10926
     *
10927
     * @return string
10928
     */
10929 54
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
10930
    {
10931 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
10932 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
10933
10934 54
        if ($useLower === true) {
10935 2
            $str = (string) \str_replace(
10936 2
                $upper,
10937 2
                $lower,
10938 2
                $str
10939
            );
10940
        } else {
10941 52
            $str = (string) \str_replace(
10942 52
                $lower,
10943 52
                $upper,
10944 52
                $str
10945
            );
10946
        }
10947
10948 54
        if ($fullCaseFold) {
10949 52
            static $FULL_CASE_FOLD = null;
10950 52
            if ($FULL_CASE_FOLD === null) {
10951 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
10952
            }
10953
10954 52
            if ($useLower === true) {
10955 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
10956
            } else {
10957 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
10958
            }
10959
        }
10960
10961 54
        return $str;
10962
    }
10963
10964
    /**
10965
     * get data from "/data/*.php"
10966
     *
10967
     * @param string $file
10968
     *
10969
     * @return mixed
10970
     */
10971 5
    private static function getData(string $file)
10972
    {
10973
        /** @noinspection PhpIncludeInspection */
10974 5
        return include __DIR__ . '/data/' . $file . '.php';
10975
    }
10976
10977
    /**
10978
     * get data from "/data/*.php"
10979
     *
10980
     * @param string $file
10981
     *
10982
     * @return false|mixed will return false on error
10983
     */
10984 9
    private static function getDataIfExists(string $file)
10985
    {
10986 9
        $file = __DIR__ . '/data/' . $file . '.php';
10987 9
        if (\file_exists($file)) {
10988
            /** @noinspection PhpIncludeInspection */
10989 8
            return include $file;
10990
        }
10991
10992 2
        return false;
10993
    }
10994
10995
    /**
10996
     * Checks whether mbstring "overloaded" is active on the server.
10997
     *
10998
     * @return bool
10999
     */
11000
    private static function mbstring_overloaded(): bool
11001
    {
11002
        /**
11003
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
11004
         */
11005
11006
        /** @noinspection PhpComposerExtensionStubsInspection */
11007
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
11008
        return \defined('MB_OVERLOAD_STRING')
11009
               &&
11010
               (@\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
11011
    }
11012
11013
    /**
11014
     * @param array $strings
11015
     * @param bool  $removeEmptyValues
11016
     * @param int   $removeShortValues
11017
     *
11018
     * @return array
11019
     */
11020 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
11021
    {
11022
        // init
11023 2
        $return = [];
11024
11025 2
        foreach ($strings as $str) {
11026
            if (
11027 2
                $removeShortValues !== null
11028
                &&
11029 2
                self::strlen($str) <= $removeShortValues
11030
            ) {
11031 2
                continue;
11032
            }
11033
11034
            if (
11035 2
                $removeEmptyValues === true
11036
                &&
11037 2
                \trim($str) === ''
11038
            ) {
11039 2
                continue;
11040
            }
11041
11042 2
            $return[] = $str;
11043
        }
11044
11045 2
        return $return;
11046
    }
11047
11048
    /**
11049
     * rxClass
11050
     *
11051
     * @param string $s
11052
     * @param string $class
11053
     *
11054
     * @return string
11055
     */
11056 43
    private static function rxClass(string $s, string $class = ''): string
11057
    {
11058 43
        static $RX_CLASSS_CACHE = [];
11059
11060 43
        $cacheKey = $s . $class;
11061
11062 43
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
11063 31
            return $RX_CLASSS_CACHE[$cacheKey];
11064
        }
11065
11066
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11067 16
        $class = [$class];
11068
11069
        /** @noinspection SuspiciousLoopInspection */
11070 16
        foreach (self::str_split($s) as $s) {
11071 15
            if ($s === '-') {
11072
                $class[0] = '-' . $class[0];
11073 15
            } elseif (!isset($s[2])) {
11074 15
                $class[0] .= \preg_quote($s, '/');
11075 1
            } elseif (self::strlen($s) === 1) {
11076 1
                $class[0] .= $s;
11077
            } else {
11078 15
                $class[] = $s;
11079
            }
11080
        }
11081
11082 16
        if ($class[0]) {
11083 16
            $class[0] = '[' . $class[0] . ']';
11084
        }
11085
11086 16
        if (\count($class) === 1) {
11087 16
            $return = $class[0];
11088
        } else {
11089
            $return = '(?:' . \implode('|', $class) . ')';
11090
        }
11091
11092 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
11093
11094 16
        return $return;
11095
    }
11096
11097
    /**
11098
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
11099
     *
11100
     * @param string $names
11101
     * @param string $delimiter
11102
     * @param string $encoding
11103
     *
11104
     * @return string
11105
     */
11106 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
11107
    {
11108
        // init
11109 1
        $namesArray = \explode($delimiter, $names);
11110
11111 1
        if ($namesArray === false) {
11112
            return '';
11113
        }
11114
11115
        $specialCases = [
11116 1
            'names' => [
11117
                'ab',
11118
                'af',
11119
                'al',
11120
                'and',
11121
                'ap',
11122
                'bint',
11123
                'binte',
11124
                'da',
11125
                'de',
11126
                'del',
11127
                'den',
11128
                'der',
11129
                'di',
11130
                'dit',
11131
                'ibn',
11132
                'la',
11133
                'mac',
11134
                'nic',
11135
                'of',
11136
                'ter',
11137
                'the',
11138
                'und',
11139
                'van',
11140
                'von',
11141
                'y',
11142
                'zu',
11143
            ],
11144
            'prefixes' => [
11145
                'al-',
11146
                "d'",
11147
                'ff',
11148
                "l'",
11149
                'mac',
11150
                'mc',
11151
                'nic',
11152
            ],
11153
        ];
11154
11155 1
        foreach ($namesArray as &$name) {
11156 1
            if (\in_array($name, $specialCases['names'], true)) {
11157 1
                continue;
11158
            }
11159
11160 1
            $continue = false;
11161
11162 1
            if ($delimiter === '-') {
11163 1
                foreach ($specialCases['names'] as $beginning) {
11164 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11165 1
                        $continue = true;
11166
                    }
11167
                }
11168
            }
11169
11170 1
            foreach ($specialCases['prefixes'] as $beginning) {
11171 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11172 1
                    $continue = true;
11173
                }
11174
            }
11175
11176 1
            if ($continue === true) {
11177 1
                continue;
11178
            }
11179
11180 1
            $name = self::str_upper_first($name);
11181
        }
11182
11183 1
        return \implode($delimiter, $namesArray);
11184
    }
11185
11186
    /**
11187
     * Generic case sensitive transformation for collation matching.
11188
     *
11189
     * @param string $str <p>The input string</p>
11190
     *
11191
     * @return string|null
11192
     */
11193 6
    private static function strtonatfold(string $str)
11194
    {
11195
        /** @noinspection PhpUndefinedClassInspection */
11196 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
11197
    }
11198
11199
    /**
11200
     * @param int|string $input
11201
     *
11202
     * @return string
11203
     */
11204 30
    private static function to_utf8_convert_helper($input): string
11205
    {
11206
        // init
11207 30
        $buf = '';
11208
11209 30
        if (self::$ORD === null) {
11210 1
            self::$ORD = self::getData('ord');
11211
        }
11212
11213 30
        if (self::$CHR === null) {
11214 1
            self::$CHR = self::getData('chr');
11215
        }
11216
11217 30
        if (self::$WIN1252_TO_UTF8 === null) {
11218 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11219
        }
11220
11221 30
        $ordC1 = self::$ORD[$input];
11222 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
11223 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
11224
        } else {
11225 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
11226 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
11227 2
            $buf .= $cc1 . $cc2;
11228
        }
11229
11230 30
        return $buf;
11231
    }
11232
}
11233