Passed
Push — master ( 252a0e...676b2b )
by Lars
03:55
created

UTF8::normalize_line_ending()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Adds the specified amount of left and right padding to the given string.
269
     * The default character used is a space.
270
     *
271
     * @param string $str
272
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
273
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
274
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
275
     * @param string $encoding [optional] <p>Default: UTF-8</p>
276
     *
277
     * @return string string with padding applied
278
     */
279 25
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
280
    {
281 25
        $strlen = self::strlen($str, $encoding);
282
283 25
        if ($left && $right) {
284 8
            $length = ($left + $right) + $strlen;
285 8
            $type = \STR_PAD_BOTH;
286 17
        } elseif ($left) {
287 7
            $length = $left + $strlen;
288 7
            $type = \STR_PAD_LEFT;
289 10
        } elseif ($right) {
290 10
            $length = $right + $strlen;
291 10
            $type = \STR_PAD_RIGHT;
292
        } else {
293
            $length = ($left + $right) + $strlen;
294
            $type = \STR_PAD_BOTH;
295
        }
296
297 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
298
    }
299
300
    /**
301
     * Changes all keys in an array.
302
     *
303
     * @param array $array <p>The array to work on</p>
304
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
305
     *                     or <strong>CASE_LOWER</strong> (default)</p>
306
     *
307
     * @return string[] an array with its keys lower or uppercased
308
     */
309 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
310
    {
311
        if (
312 2
            $case !== \CASE_LOWER
313
            &&
314 2
            $case !== \CASE_UPPER
315
        ) {
316
            $case = \CASE_LOWER;
317
        }
318
319 2
        $return = [];
320 2
        foreach ($array as $key => $value) {
321 2
            if ($case === \CASE_LOWER) {
322 2
                $key = self::strtolower($key);
323
            } else {
324 2
                $key = self::strtoupper($key);
325
            }
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @return string
345
     */
346 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
347
    {
348 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
349 16
        if ($posStart === false) {
350 2
            return '';
351
        }
352
353 14
        $substrIndex = $posStart + self::strlen($start, $encoding);
354 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
355
        if (
356 14
            $posEnd === false
357
            ||
358 14
            $posEnd === $substrIndex
359
        ) {
360 4
            return '';
361
        }
362
363 10
        $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
364
365 10
        if ($return === false) {
366
            return '';
367
        }
368
369 10
        return $return;
370
    }
371
372
    /**
373
     * Convert binary into an string.
374
     *
375
     * @param mixed $bin 1|0
376
     *
377
     * @return string
378
     */
379 2
    public static function binary_to_str($bin): string
380
    {
381 2
        if (!isset($bin[0])) {
382
            return '';
383
        }
384
385 2
        $convert = \base_convert($bin, 2, 16);
386 2
        if ($convert === '0') {
387 1
            return '';
388
        }
389
390 2
        return \pack('H*', $convert);
391
    }
392
393
    /**
394
     * Returns the UTF-8 Byte Order Mark Character.
395
     *
396
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
397
     *
398
     * @return string UTF-8 Byte Order Mark
399
     */
400 4
    public static function bom(): string
401
    {
402 4
        return "\xef\xbb\xbf";
403
    }
404
405
    /**
406
     * @alias of UTF8::chr_map()
407
     *
408
     * @see   UTF8::chr_map()
409
     *
410
     * @param array|string $callback
411
     * @param string       $str
412
     *
413
     * @return string[]
414
     */
415 2
    public static function callback($callback, string $str): array
416
    {
417 2
        return self::chr_map($callback, $str);
418
    }
419
420
    /**
421
     * Returns the character at $index, with indexes starting at 0.
422
     *
423
     * @param string $str
424
     * @param int    $index    <p>Position of the character.</p>
425
     * @param string $encoding [optional] <p>Default is UTF-8</p>
426
     *
427
     * @return string the character at $index
428
     */
429 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
430
    {
431 9
        return (string) self::substr($str, $index, 1, $encoding);
432
    }
433
434
    /**
435
     * Returns an array consisting of the characters in the string.
436
     *
437
     * @param string $str <p>The input string.</p>
438
     *
439
     * @return string[] an array of chars
440
     */
441 3
    public static function chars(string $str): array
442
    {
443 3
        return self::str_split($str, 1);
444
    }
445
446
    /**
447
     * This method will auto-detect your server environment for UTF-8 support.
448
     *
449
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
450
     */
451 37
    public static function checkForSupport()
452
    {
453 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
454
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
455
456
            // http://php.net/manual/en/book.mbstring.php
457
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
458
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
459
460
            // http://php.net/manual/en/book.iconv.php
461
            self::$SUPPORT['iconv'] = self::iconv_loaded();
462
463
            // http://php.net/manual/en/book.intl.php
464
            self::$SUPPORT['intl'] = self::intl_loaded();
465
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
466
467
            if (
468
                self::$SUPPORT['intl'] === true
469
                &&
470
                \function_exists('transliterator_list_ids') === true
471
            ) {
472
                /** @noinspection PhpComposerExtensionStubsInspection */
473
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
474
            }
475
476
            // http://php.net/manual/en/class.intlchar.php
477
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
478
479
            // http://php.net/manual/en/book.ctype.php
480
            self::$SUPPORT['ctype'] = self::ctype_loaded();
481
482
            // http://php.net/manual/en/class.finfo.php
483
            self::$SUPPORT['finfo'] = self::finfo_loaded();
484
485
            // http://php.net/manual/en/book.json.php
486
            self::$SUPPORT['json'] = self::json_loaded();
487
488
            // http://php.net/manual/en/book.pcre.php
489
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
490
491
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
492
        }
493 37
    }
494
495
    /**
496
     * Generates a UTF-8 encoded character from the given code point.
497
     *
498
     * INFO: opposite to UTF8::ord()
499
     *
500
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
501
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
502
     *
503
     * @return string|null multi-byte character, returns null on failure or empty input
504
     */
505 17
    public static function chr($code_point, string $encoding = 'UTF-8')
506
    {
507
        // init
508 17
        static $CHAR_CACHE = [];
509
510 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
511
            self::checkForSupport();
512
        }
513
514 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
515 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
516
        }
517
518
        if (
519 17
            $encoding !== 'UTF-8'
520
            &&
521 17
            $encoding !== 'ISO-8859-1'
522
            &&
523 17
            $encoding !== 'WINDOWS-1252'
524
            &&
525 17
            self::$SUPPORT['mbstring'] === false
526
        ) {
527
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
528
        }
529
530 17
        $cacheKey = $code_point . $encoding;
531 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
532 16
            return $CHAR_CACHE[$cacheKey];
533
        }
534
535 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
536
537 10
            if (self::$CHR === null) {
538
                $chrTmp = self::getData('chr');
539
                if ($chrTmp) {
540
                    self::$CHR = (array) $chrTmp;
541
                }
542
            }
543
544 10
            $chr = self::$CHR[$code_point];
545
546 10
            if ($encoding !== 'UTF-8') {
547 1
                $chr = self::encode($encoding, $chr);
548
            }
549
550 10
            return $CHAR_CACHE[$cacheKey] = $chr;
551
        }
552
553 7
        if (self::$SUPPORT['intlChar'] === true) {
554
            /** @noinspection PhpComposerExtensionStubsInspection */
555 7
            $chr = \IntlChar::chr($code_point);
556
557 7
            if ($encoding !== 'UTF-8') {
558
                $chr = self::encode($encoding, $chr);
559
            }
560
561 7
            return $CHAR_CACHE[$cacheKey] = $chr;
562
        }
563
564
        if (self::$CHR === null) {
565
            $chrTmp = self::getData('chr');
566
            if ($chrTmp) {
567
                self::$CHR = (array) $chrTmp;
568
            }
569
        }
570
571
        $code_point = (int) $code_point;
572
        if ($code_point <= 0x7F) {
573
            $chr = self::$CHR[$code_point];
574
        } elseif ($code_point <= 0x7FF) {
575
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
576
                   self::$CHR[($code_point & 0x3F) + 0x80];
577
        } elseif ($code_point <= 0xFFFF) {
578
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
579
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
580
                   self::$CHR[($code_point & 0x3F) + 0x80];
581
        } else {
582
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
583
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
584
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
585
                   self::$CHR[($code_point & 0x3F) + 0x80];
586
        }
587
588
        if ($encoding !== 'UTF-8') {
589
            $chr = self::encode($encoding, $chr);
590
        }
591
592
        return $CHAR_CACHE[$cacheKey] = $chr;
593
    }
594
595
    /**
596
     * Applies callback to all characters of a string.
597
     *
598
     * @param array|string $callback <p>The callback function.</p>
599
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
600
     *
601
     * @return string[] the outcome of callback
602
     */
603 2
    public static function chr_map($callback, string $str): array
604
    {
605 2
        $chars = self::split($str);
606
607 2
        return \array_map($callback, $chars);
608
    }
609
610
    /**
611
     * Generates an array of byte length of each character of a Unicode string.
612
     *
613
     * 1 byte => U+0000  - U+007F
614
     * 2 byte => U+0080  - U+07FF
615
     * 3 byte => U+0800  - U+FFFF
616
     * 4 byte => U+10000 - U+10FFFF
617
     *
618
     * @param string $str <p>The original unicode string.</p>
619
     *
620
     * @return int[] an array of byte lengths of each character
621
     */
622 4
    public static function chr_size_list(string $str): array
623
    {
624 4
        if ($str === '') {
625 4
            return [];
626
        }
627
628 4
        $strSplit = self::split($str);
629
630 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
631
            self::checkForSupport();
632
        }
633
634 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
635
            return \array_map(
636
                function ($data) {
637
                    return self::strlen_in_byte($data);
638
                },
639
                $strSplit
640
            );
641
        }
642
643 4
        return \array_map('\strlen', $strSplit);
644
    }
645
646
    /**
647
     * Get a decimal code representation of a specific character.
648
     *
649
     * @param string $char <p>The input character.</p>
650
     *
651
     * @return int
652
     */
653 4
    public static function chr_to_decimal(string $char): int
654
    {
655 4
        $code = self::ord($char[0]);
656 4
        $bytes = 1;
657
658 4
        if (!($code & 0x80)) {
659
            // 0xxxxxxx
660 4
            return $code;
661
        }
662
663 4
        if (($code & 0xe0) === 0xc0) {
664
            // 110xxxxx
665 4
            $bytes = 2;
666 4
            $code &= ~0xc0;
667 4
        } elseif (($code & 0xf0) === 0xe0) {
668
            // 1110xxxx
669 4
            $bytes = 3;
670 4
            $code &= ~0xe0;
671 2
        } elseif (($code & 0xf8) === 0xf0) {
672
            // 11110xxx
673 2
            $bytes = 4;
674 2
            $code &= ~0xf0;
675
        }
676
677 4
        for ($i = 2; $i <= $bytes; $i++) {
678
            // 10xxxxxx
679 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
680
        }
681
682 4
        return $code;
683
    }
684
685
    /**
686
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
687
     *
688
     * @param int|string $char <p>The input character</p>
689
     * @param string     $pfix [optional]
690
     *
691
     * @return string The code point encoded as U+xxxx
692
     */
693 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
694
    {
695 2
        if ($char === '') {
696 2
            return '';
697
        }
698
699 2
        if ($char === '&#0;') {
700 2
            $char = '';
701
        }
702
703 2
        return self::int_to_hex(self::ord($char), $pfix);
704
    }
705
706
    /**
707
     * alias for "UTF8::chr_to_decimal()"
708
     *
709
     * @see UTF8::chr_to_decimal()
710
     *
711
     * @param string $chr
712
     *
713
     * @return int
714
     */
715 2
    public static function chr_to_int(string $chr): int
716
    {
717 2
        return self::chr_to_decimal($chr);
718
    }
719
720
    /**
721
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
722
     *
723
     * @param string $body     <p>The original string to be split.</p>
724
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
725
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
726
     *
727
     * @return string the chunked string
728
     */
729 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
730
    {
731 4
        return \implode($end, self::split($body, $chunklen));
732
    }
733
734
    /**
735
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
736
     *
737
     * @param string $str                           <p>The string to be sanitized.</p>
738
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
739
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
740
     *                                              whitespace.</p>
741
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
742
     *                                              e.g.: "…"
743
     *                                              => "..."</p>
744
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
745
     *                                              combination with
746
     *                                              $normalize_whitespace</p>
747
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
748
     *                                              mark e.g.: "�"</p>
749
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
750
     *                                              characters e.g.: "\0"</p>
751
     *
752
     * @return string clean UTF-8 encoded string
753
     */
754 111
    public static function clean(
755
        string $str,
756
        bool $remove_bom = false,
757
        bool $normalize_whitespace = false,
758
        bool $normalize_msword = false,
759
        bool $keep_non_breaking_space = false,
760
        bool $replace_diamond_question_mark = false,
761
        bool $remove_invisible_characters = true
762
    ): string {
763
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
764
        // caused connection reset problem on larger strings
765
766 111
        $regx = '/
767
          (
768
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
769
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
770
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
771
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
772
            ){1,100}                      # ...one or more times
773
          )
774
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
775
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
776
        /x';
777 111
        $str = (string) \preg_replace($regx, '$1', $str);
778
779 111
        if ($replace_diamond_question_mark === true) {
780 60
            $str = self::replace_diamond_question_mark($str, '');
781
        }
782
783 111
        if ($remove_invisible_characters === true) {
784 111
            $str = self::remove_invisible_characters($str);
785
        }
786
787 111
        if ($normalize_whitespace === true) {
788 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
789
        }
790
791 111
        if ($normalize_msword === true) {
792 32
            $str = self::normalize_msword($str);
793
        }
794
795 111
        if ($remove_bom === true) {
796 62
            $str = self::remove_bom($str);
797
        }
798
799 111
        return $str;
800
    }
801
802
    /**
803
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
804
     *
805
     * @param string $str <p>The input string.</p>
806
     *
807
     * @return string
808
     */
809 33
    public static function cleanup($str): string
810
    {
811
        // init
812 33
        $str = (string) $str;
813
814 33
        if ($str === '') {
815 5
            return '';
816
        }
817
818
        // fixed ISO <-> UTF-8 Errors
819 33
        $str = self::fix_simple_utf8($str);
820
821
        // remove all none UTF-8 symbols
822
        // && remove diamond question mark (�)
823
        // && remove remove invisible characters (e.g. "\0")
824
        // && remove BOM
825
        // && normalize whitespace chars (but keep non-breaking-spaces)
826 33
        return self::clean(
827 33
            $str,
828 33
            true,
829 33
            true,
830 33
            false,
831 33
            true,
832 33
            true,
833 33
            true
834
        );
835
    }
836
837
    /**
838
     * Accepts a string or a array of strings and returns an array of Unicode code points.
839
     *
840
     * INFO: opposite to UTF8::string()
841
     *
842
     * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
843
     * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
844
     *                                    default, code points will be returned as integers.</p>
845
     *
846
     * @return array<int|string>
847
     *                           The array of code points:<br>
848
     *                           array<int> for $u_style === false<br>
849
     *                           array<string> for $u_style === true<br>
850
     */
851 12
    public static function codepoints($arg, bool $u_style = false): array
852
    {
853 12
        if (\is_string($arg) === true) {
854 12
            $arg = self::split($arg);
855
        }
856
857 12
        $arg = \array_map(
858
            [
859 12
                self::class,
860
                'ord',
861
            ],
862 12
            $arg
863
        );
864
865 12
        if (\count($arg) === 0) {
866 7
            return [];
867
        }
868
869 11
        if ($u_style) {
870 2
            $arg = \array_map(
871
                [
872 2
                    self::class,
873
                    'int_to_hex',
874
                ],
875 2
                $arg
876
            );
877
        }
878
879 11
        return $arg;
880
    }
881
882
    /**
883
     * Trims the string and replaces consecutive whitespace characters with a
884
     * single space. This includes tabs and newline characters, as well as
885
     * multibyte whitespace such as the thin space and ideographic space.
886
     *
887
     * @param string $str <p>The input string.</p>
888
     *
889
     * @return string string with a trimmed $str and condensed whitespace
890
     */
891 13
    public static function collapse_whitespace(string $str): string
892
    {
893 13
        return self::trim(
894 13
            self::regex_replace($str, '[[:space:]]+', ' ')
895
        );
896
    }
897
898
    /**
899
     * Returns count of characters used in a string.
900
     *
901
     * @param string $str       <p>The input string.</p>
902
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
903
     *
904
     * @return int[] an associative array of Character as keys and
905
     *               their count as values
906
     */
907 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
908
    {
909 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
910
    }
911
912
    /**
913
     * Remove css media-queries.
914
     *
915
     * @param string $str
916
     *
917
     * @return string
918
     */
919 1
    public static function css_stripe_media_queries(string $str): string
920
    {
921 1
        return (string) \preg_replace(
922 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
923 1
            '',
924 1
            $str
925
        );
926
    }
927
928
    /**
929
     * Checks whether ctype is available on the server.
930
     *
931
     * @return bool
932
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
933
     */
934
    public static function ctype_loaded(): bool
935
    {
936
        return \extension_loaded('ctype');
937
    }
938
939
    /**
940
     * Converts a int-value into an UTF-8 character.
941
     *
942
     * @param mixed $int
943
     *
944
     * @return string
945
     */
946 10
    public static function decimal_to_chr($int): string
947
    {
948 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
949
    }
950
951
    /**
952
     * Decodes a MIME header field
953
     *
954
     * @param string $str
955
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
956
     *
957
     * @return false|string
958
     *                      A decoded MIME field on success,
959
     *                      or false if an error occurs during the decoding
960
     */
961
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
962
    {
963
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
964
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
965
        }
966
967
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
968
            self::checkForSupport();
969
        }
970
971
        if (self::$SUPPORT['iconv'] === true) {
972
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
973
        }
974
975
        if ($encoding !== 'UTF-8') {
976
            $str = self::encode($encoding, $str);
977
        }
978
979
        return \mb_decode_mimeheader($str);
980
    }
981
982
    /**
983
     * Encode a string with a new charset-encoding.
984
     *
985
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
986
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
987
     *
988
     * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
989
     * @param string $str                         <p>The input string</p>
990
     * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
991
     *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
992
     *                                            string-encoding</p>
993
     * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
994
     *                                            A empty string will trigger the autodetect anyway.</p>
995
     *
996
     * @return string
997
     */
998 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
999
    {
1000 28
        if ($str === '' || $toEncoding === '') {
1001 12
            return $str;
1002
        }
1003
1004 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1005 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1006
        }
1007
1008 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1009 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1010
        }
1011
1012 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1013
            return $str;
1014
        }
1015
1016 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1017
            self::checkForSupport();
1018
        }
1019
1020 28
        if ($toEncoding === 'JSON') {
1021 1
            return self::json_encode($str);
1022
        }
1023 28
        if ($fromEncoding === 'JSON') {
1024 1
            $str = self::json_decode($str);
1025 1
            $fromEncoding = '';
1026
        }
1027
1028 28
        if ($toEncoding === 'BASE64') {
1029 2
            return \base64_encode($str);
1030
        }
1031 28
        if ($fromEncoding === 'BASE64') {
1032 2
            $str = \base64_decode($str, true);
1033 2
            $fromEncoding = '';
1034
        }
1035
1036 28
        if ($toEncoding === 'HTML-ENTITIES') {
1037 2
            return self::html_encode($str, true, 'UTF-8');
1038
        }
1039 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1040 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1041 2
            $fromEncoding = '';
1042
        }
1043
1044 28
        $fromEncodingDetected = false;
1045
        if (
1046 28
            $autodetectFromEncoding === true
1047
            ||
1048 28
            !$fromEncoding
1049
        ) {
1050 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1051
        }
1052
1053
        // DEBUG
1054
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1055
1056 28
        if ($fromEncodingDetected !== false) {
1057 24
            $fromEncoding = $fromEncodingDetected;
1058 6
        } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1059
            // fallback for the "autodetect"-mode
1060 6
            return self::to_utf8($str);
1061
        }
1062
1063
        if (
1064 24
            !$fromEncoding
1065
            ||
1066 24
            $fromEncoding === $toEncoding
1067
        ) {
1068 15
            return $str;
1069
        }
1070
1071
        if (
1072 18
            $toEncoding === 'UTF-8'
1073
            &&
1074
            (
1075 17
                $fromEncoding === 'WINDOWS-1252'
1076
                ||
1077 18
                $fromEncoding === 'ISO-8859-1'
1078
            )
1079
        ) {
1080 14
            return self::to_utf8($str);
1081
        }
1082
1083
        if (
1084 10
            $toEncoding === 'ISO-8859-1'
1085
            &&
1086
            (
1087 5
                $fromEncoding === 'WINDOWS-1252'
1088
                ||
1089 10
                $fromEncoding === 'UTF-8'
1090
            )
1091
        ) {
1092 5
            return self::to_iso8859($str);
1093
        }
1094
1095
        if (
1096 9
            $toEncoding !== 'UTF-8'
1097
            &&
1098 9
            $toEncoding !== 'ISO-8859-1'
1099
            &&
1100 9
            $toEncoding !== 'WINDOWS-1252'
1101
            &&
1102 9
            self::$SUPPORT['mbstring'] === false
1103
        ) {
1104
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1105
        }
1106
1107 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1108
            self::checkForSupport();
1109
        }
1110
1111 9
        if (self::$SUPPORT['mbstring'] === true) {
1112
            // info: do not use the symfony polyfill here
1113 9
            $strEncoded = \mb_convert_encoding(
1114 9
                $str,
1115 9
                $toEncoding,
1116 9
                $fromEncoding
1117
            );
1118
1119 9
            if ($strEncoded) {
1120 9
                return $strEncoded;
1121
            }
1122
        }
1123
1124
        $return = \iconv($fromEncoding, $toEncoding, $str);
1125
        if ($return !== false) {
1126
            return $return;
1127
        }
1128
1129
        return $str;
1130
    }
1131
1132
    /**
1133
     * @param string $str
1134
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1135
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1136
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1137
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1138
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1139
     *
1140
     * @return false|string
1141
     *                      An encoded MIME field on success,
1142
     *                      or false if an error occurs during the encoding
1143
     */
1144
    public static function encode_mimeheader(
1145
        $str,
1146
        $fromCharset = 'UTF-8',
1147
        $toCharset = 'UTF-8',
1148
        $transferEncoding = 'Q',
1149
        $linefeed = "\r\n",
1150
        $indent = 76
1151
    ) {
1152
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1153
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1154
        }
1155
1156
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1157
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1158
        }
1159
1160
        return \iconv_mime_encode(
1161
            '',
1162
            $str,
1163
            [
1164
                'scheme'           => $transferEncoding,
1165
                'line-length'      => $indent,
1166
                'input-charset'    => $fromCharset,
1167
                'output-charset'   => $toCharset,
1168
                'line-break-chars' => $linefeed,
1169
            ]
1170
        );
1171
    }
1172
1173
    /**
1174
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1175
     *
1176
     * @param string   $str                    <p>The input string.</p>
1177
     * @param string   $search                 <p>The searched string.</p>
1178
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1179
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1180
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1181
     *
1182
     * @return string
1183
     */
1184 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1185
    {
1186 1
        if ($str === '') {
1187 1
            return '';
1188
        }
1189
1190 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1191
1192 1
        if ($length === null) {
1193 1
            $length = (int) \round(self::strlen($str, $encoding) / 2, 0);
1194
        }
1195
1196 1
        if (empty($search)) {
1197 1
            $stringLength = self::strlen($str, $encoding);
1198
1199 1
            if ($length > 0) {
1200 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1201
            } else {
1202 1
                $end = 0;
1203
            }
1204
1205 1
            $pos = (int) \min(
1206 1
                self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1206
                self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1207 1
                self::strpos($str, '.', $end, $encoding)
1208
            );
1209
1210 1
            if ($pos) {
1211 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1212 1
                if ($strSub === false) {
1213
                    return '';
1214
                }
1215
1216 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1217
            }
1218
1219
            return $str;
1220
        }
1221
1222 1
        $wordPos = self::stripos($str, $search, 0, $encoding);
1223 1
        $halfSide = (int) ($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1224
1225 1
        $pos_start = 0;
1226 1
        if ($halfSide > 0) {
1227 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1228 1
            if ($halfText !== false) {
1229 1
                $pos_start = (int) \max(
1230 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1231 1
                    self::strrpos($halfText, '.', 0, $encoding)
1232
                );
1233
            }
1234
        }
1235
1236 1
        if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type false|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1237 1
            $l = $pos_start + $length - 1;
1238 1
            $realLength = self::strlen($str, $encoding);
1239
1240 1
            if ($l > $realLength) {
1241
                $l = $realLength;
1242
            }
1243
1244 1
            $pos_end = (int) \min(
1245 1
                    self::strpos($str, ' ', $l, $encoding),
1246 1
                    self::strpos($str, '.', $l, $encoding)
1247 1
                ) - $pos_start;
1248
1249 1
            if (!$pos_end || $pos_end <= 0) {
1250 1
                $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1250
                $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1251 1
                if ($strSub !== false) {
1252 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1253
                } else {
1254 1
                    $extract = '';
1255
                }
1256
            } else {
1257 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1258 1
                if ($strSub !== false) {
1259 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1260
                } else {
1261 1
                    $extract = '';
1262
                }
1263
            }
1264
        } else {
1265 1
            $l = $length - 1;
1266 1
            $trueLength = self::strlen($str, $encoding);
1267
1268 1
            if ($l > $trueLength) {
1269
                $l = $trueLength;
1270
            }
1271
1272 1
            $pos_end = \min(
1273 1
                self::strpos($str, ' ', $l, $encoding),
1274 1
                self::strpos($str, '.', $l, $encoding)
1275
            );
1276
1277 1
            if ($pos_end) {
1278 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1279 1
                if ($strSub !== false) {
1280 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1281
                } else {
1282 1
                    $extract = '';
1283
                }
1284
            } else {
1285 1
                $extract = $str;
1286
            }
1287
        }
1288
1289 1
        return $extract;
1290
    }
1291
1292
    /**
1293
     * Reads entire file into a string.
1294
     *
1295
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1296
     *
1297
     * @see http://php.net/manual/en/function.file-get-contents.php
1298
     *
1299
     * @param string        $filename             <p>
1300
     *                                            Name of the file to read.
1301
     *                                            </p>
1302
     * @param bool          $use_include_path     [optional] <p>
1303
     *                                            Prior to PHP 5, this parameter is called
1304
     *                                            use_include_path and is a bool.
1305
     *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1306
     *                                            to trigger include path
1307
     *                                            search.
1308
     *                                            </p>
1309
     * @param resource|null $context              [optional] <p>
1310
     *                                            A valid context resource created with
1311
     *                                            stream_context_create. If you don't need to use a
1312
     *                                            custom context, you can skip this parameter by &null;.
1313
     *                                            </p>
1314
     * @param int|null      $offset               [optional] <p>
1315
     *                                            The offset where the reading starts.
1316
     *                                            </p>
1317
     * @param int|null      $maxLength            [optional] <p>
1318
     *                                            Maximum length of data read. The default is to read until end
1319
     *                                            of file is reached.
1320
     *                                            </p>
1321
     * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1322
     * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1323
     *                                            some files, because they used non default utf-8 chars. Binary files
1324
     *                                            like images or pdf will not be converted.</p>
1325
     * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1326
     *                                            A empty string will trigger the autodetect anyway.</p>
1327
     *
1328
     * @return false|string the function returns the read data or false on failure
1329
     */
1330 12
    public static function file_get_contents(
1331
        string $filename,
1332
        bool $use_include_path = false,
1333
        $context = null,
1334
        int $offset = null,
1335
        int $maxLength = null,
1336
        int $timeout = 10,
1337
        bool $convertToUtf8 = true,
1338
        string $fromEncoding = ''
1339
    ) {
1340
        // init
1341 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1342
1343 12
        if ($timeout && $context === null) {
1344 9
            $context = \stream_context_create(
1345
                [
1346
                    'http' => [
1347 9
                        'timeout' => $timeout,
1348
                    ],
1349
                ]
1350
            );
1351
        }
1352
1353 12
        if ($offset === null) {
1354 12
            $offset = 0;
1355
        }
1356
1357 12
        if (\is_int($maxLength) === true) {
1358 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1359
        } else {
1360 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1361
        }
1362
1363
        // return false on error
1364 12
        if ($data === false) {
1365
            return false;
1366
        }
1367
1368 12
        if ($convertToUtf8 === true) {
1369
            if (
1370 12
                self::is_binary($data, true) === true
1371
                &&
1372 12
                self::is_utf16($data, false) === false
1373
                &&
1374 12
                self::is_utf32($data, false) === false
1375 7
            ) {
1376
                // do nothing, it's binary and not UTF16 or UTF32
1377
            } else {
1378 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1379 9
                $data = self::cleanup($data);
1380
            }
1381
        }
1382
1383 12
        return $data;
1384
    }
1385
1386
    /**
1387
     * Checks if a file starts with BOM (Byte Order Mark) character.
1388
     *
1389
     * @param string $file_path <p>Path to a valid file.</p>
1390
     *
1391
     * @throws \RuntimeException if file_get_contents() returned false
1392
     *
1393
     * @return bool
1394
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1395
     */
1396 2
    public static function file_has_bom(string $file_path): bool
1397
    {
1398 2
        $file_content = \file_get_contents($file_path);
1399 2
        if ($file_content === false) {
1400
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1401
        }
1402
1403 2
        return self::string_has_bom($file_content);
1404
    }
1405
1406
    /**
1407
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1408
     *
1409
     * @param mixed  $var
1410
     * @param int    $normalization_form
1411
     * @param string $leading_combining
1412
     *
1413
     * @return mixed
1414
     */
1415 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1416
    {
1417 43
        switch (\gettype($var)) {
1418 43
            case 'array':
1419 6
                foreach ($var as $k => $v) {
1420
                    /** @noinspection AlterInForeachInspection */
1421 6
                    $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1422
                }
1423
1424 6
                break;
1425 43
            case 'object':
1426 4
                foreach ($var as $k => $v) {
1427 4
                    $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1428
                }
1429
1430 4
                break;
1431 43
            case 'string':
1432
1433 43
                if (\strpos($var, "\r") !== false) {
1434
                    // Workaround https://bugs.php.net/65732
1435 3
                    $var = self::normalize_line_ending($var);
1436
                }
1437
1438 43
                if (self::is_ascii($var) === false) {
1439
                    /** @noinspection PhpUndefinedClassInspection */
1440 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1441 21
                        $n = '-';
1442
                    } else {
1443
                        /** @noinspection PhpUndefinedClassInspection */
1444 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1445
1446 13
                        if (isset($n[0])) {
1447 7
                            $var = $n;
1448
                        } else {
1449 9
                            $var = self::encode('UTF-8', $var, true);
1450
                        }
1451
                    }
1452
1453
                    if (
1454 26
                        $var[0] >= "\x80"
1455
                        &&
1456 26
                        isset($n[0], $leading_combining[0])
1457
                        &&
1458 26
                        \preg_match('/^\p{Mn}/u', $var)
1459
                    ) {
1460
                        // Prevent leading combining chars
1461
                        // for NFC-safe concatenations.
1462 3
                        $var = $leading_combining . $var;
1463
                    }
1464
                }
1465
1466 43
                break;
1467
        }
1468
1469 43
        return $var;
1470
    }
1471
1472
    /**
1473
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1474
     *
1475
     * Gets a specific external variable by name and optionally filters it
1476
     *
1477
     * @see  http://php.net/manual/en/function.filter-input.php
1478
     *
1479
     * @param int    $type          <p>
1480
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1481
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1482
     *                              <b>INPUT_ENV</b>.
1483
     *                              </p>
1484
     * @param string $variable_name <p>
1485
     *                              Name of a variable to get.
1486
     *                              </p>
1487
     * @param int    $filter        [optional] <p>
1488
     *                              The ID of the filter to apply. The
1489
     *                              manual page lists the available filters.
1490
     *                              </p>
1491
     * @param mixed  $options       [optional] <p>
1492
     *                              Associative array of options or bitwise disjunction of flags. If filter
1493
     *                              accepts options, flags can be provided in "flags" field of array.
1494
     *                              </p>
1495
     *
1496
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1497
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1498
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1499
     */
1500
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1501
    {
1502
        if (\func_num_args() < 4) {
1503
            $var = \filter_input($type, $variable_name, $filter);
1504
        } else {
1505
            $var = \filter_input($type, $variable_name, $filter, $options);
1506
        }
1507
1508
        return self::filter($var);
1509
    }
1510
1511
    /**
1512
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1513
     *
1514
     * Gets external variables and optionally filters them
1515
     *
1516
     * @see  http://php.net/manual/en/function.filter-input-array.php
1517
     *
1518
     * @param int   $type       <p>
1519
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1520
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1521
     *                          <b>INPUT_ENV</b>.
1522
     *                          </p>
1523
     * @param mixed $definition [optional] <p>
1524
     *                          An array defining the arguments. A valid key is a string
1525
     *                          containing a variable name and a valid value is either a filter type, or an array
1526
     *                          optionally specifying the filter, flags and options. If the value is an
1527
     *                          array, valid keys are filter which specifies the
1528
     *                          filter type,
1529
     *                          flags which specifies any flags that apply to the
1530
     *                          filter, and options which specifies any options that
1531
     *                          apply to the filter. See the example below for a better understanding.
1532
     *                          </p>
1533
     *                          <p>
1534
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1535
     *                          input array are filtered by this filter.
1536
     *                          </p>
1537
     * @param bool  $add_empty  [optional] <p>
1538
     *                          Add missing keys as <b>NULL</b> to the return value.
1539
     *                          </p>
1540
     *
1541
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1542
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1543
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1544
     *               is not set and <b>NULL</b> if the filter fails.
1545
     */
1546
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1547
    {
1548
        if (\func_num_args() < 2) {
1549
            $a = \filter_input_array($type);
1550
        } else {
1551
            $a = \filter_input_array($type, $definition, $add_empty);
1552
        }
1553
1554
        return self::filter($a);
1555
    }
1556
1557
    /**
1558
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1559
     *
1560
     * Filters a variable with a specified filter
1561
     *
1562
     * @see  http://php.net/manual/en/function.filter-var.php
1563
     *
1564
     * @param mixed $variable <p>
1565
     *                        Value to filter.
1566
     *                        </p>
1567
     * @param int   $filter   [optional] <p>
1568
     *                        The ID of the filter to apply. The
1569
     *                        manual page lists the available filters.
1570
     *                        </p>
1571
     * @param mixed $options  [optional] <p>
1572
     *                        Associative array of options or bitwise disjunction of flags. If filter
1573
     *                        accepts options, flags can be provided in "flags" field of array. For
1574
     *                        the "callback" filter, callable type should be passed. The
1575
     *                        callback must accept one argument, the value to be filtered, and return
1576
     *                        the value after filtering/sanitizing it.
1577
     *                        </p>
1578
     *                        <p>
1579
     *                        <code>
1580
     *                        // for filters that accept options, use this format
1581
     *                        $options = array(
1582
     *                        'options' => array(
1583
     *                        'default' => 3, // value to return if the filter fails
1584
     *                        // other options here
1585
     *                        'min_range' => 0
1586
     *                        ),
1587
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1588
     *                        );
1589
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1590
     *                        // for filter that only accept flags, you can pass them directly
1591
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1592
     *                        // for filter that only accept flags, you can also pass as an array
1593
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1594
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1595
     *                        // callback validate filter
1596
     *                        function foo($value)
1597
     *                        {
1598
     *                        // Expected format: Surname, GivenNames
1599
     *                        if (strpos($value, ", ") === false) return false;
1600
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1601
     *                        $empty = (empty($surname) || empty($givennames));
1602
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1603
     *                        if ($empty || $notstrings) {
1604
     *                        return false;
1605
     *                        } else {
1606
     *                        return $value;
1607
     *                        }
1608
     *                        }
1609
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1610
     *                        </code>
1611
     *                        </p>
1612
     *
1613
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1614
     */
1615 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1616
    {
1617 2
        if (\func_num_args() < 3) {
1618 2
            $variable = \filter_var($variable, $filter);
1619
        } else {
1620 2
            $variable = \filter_var($variable, $filter, $options);
1621
        }
1622
1623 2
        return self::filter($variable);
1624
    }
1625
1626
    /**
1627
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1628
     *
1629
     * Gets multiple variables and optionally filters them
1630
     *
1631
     * @see  http://php.net/manual/en/function.filter-var-array.php
1632
     *
1633
     * @param array $data       <p>
1634
     *                          An array with string keys containing the data to filter.
1635
     *                          </p>
1636
     * @param mixed $definition [optional] <p>
1637
     *                          An array defining the arguments. A valid key is a string
1638
     *                          containing a variable name and a valid value is either a
1639
     *                          filter type, or an
1640
     *                          array optionally specifying the filter, flags and options.
1641
     *                          If the value is an array, valid keys are filter
1642
     *                          which specifies the filter type,
1643
     *                          flags which specifies any flags that apply to the
1644
     *                          filter, and options which specifies any options that
1645
     *                          apply to the filter. See the example below for a better understanding.
1646
     *                          </p>
1647
     *                          <p>
1648
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1649
     *                          input array are filtered by this filter.
1650
     *                          </p>
1651
     * @param bool  $add_empty  [optional] <p>
1652
     *                          Add missing keys as <b>NULL</b> to the return value.
1653
     *                          </p>
1654
     *
1655
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1656
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1657
     *               set
1658
     */
1659 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1660
    {
1661 2
        if (\func_num_args() < 2) {
1662 2
            $a = \filter_var_array($data);
1663
        } else {
1664 2
            $a = \filter_var_array($data, $definition, $add_empty);
1665
        }
1666
1667 2
        return self::filter($a);
1668
    }
1669
1670
    /**
1671
     * Checks whether finfo is available on the server.
1672
     *
1673
     * @return bool
1674
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1675
     */
1676
    public static function finfo_loaded(): bool
1677
    {
1678
        return \class_exists('finfo');
1679
    }
1680
1681
    /**
1682
     * Returns the first $n characters of the string.
1683
     *
1684
     * @param string $str      <p>The input string.</p>
1685
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1686
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1687
     *
1688
     * @return string
1689
     */
1690 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1691
    {
1692 13
        if ($n <= 0) {
1693 4
            return '';
1694
        }
1695
1696 9
        $strSub = self::substr($str, 0, $n, $encoding);
1697 9
        if ($strSub === false) {
1698
            return '';
1699
        }
1700
1701 9
        return $strSub;
1702
    }
1703
1704
    /**
1705
     * Check if the number of unicode characters are not more than the specified integer.
1706
     *
1707
     * @param string $str      the original string to be checked
1708
     * @param int    $box_size the size in number of chars to be checked against string
1709
     *
1710
     * @return bool true if string is less than or equal to $box_size, false otherwise
1711
     */
1712 2
    public static function fits_inside(string $str, int $box_size): bool
1713
    {
1714 2
        return self::strlen($str) <= $box_size;
1715
    }
1716
1717
    /**
1718
     * @param string $str
1719
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1720
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1721
     *
1722
     * @return string
1723
     */
1724 54
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1725
    {
1726 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
1727 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
1728
1729 54
        if ($useLower === true) {
1730 2
            $str = (string) \str_replace(
1731 2
                $upper,
1732 2
                $lower,
1733 2
                $str
1734
            );
1735
        } else {
1736 52
            $str = (string) \str_replace(
1737 52
                $lower,
1738 52
                $upper,
1739 52
                $str
1740
            );
1741
        }
1742
1743 54
        if ($fullCaseFold) {
1744 52
            static $FULL_CASE_FOLD = null;
1745 52
            if ($FULL_CASE_FOLD === null) {
1746 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
1747
            }
1748
1749 52
            if ($useLower === true) {
1750 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1751
            } else {
1752 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1753
            }
1754
        }
1755
1756 54
        return $str;
1757
    }
1758
1759
    /**
1760
     * Try to fix simple broken UTF-8 strings.
1761
     *
1762
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1763
     *
1764
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1765
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1766
     * See: http://en.wikipedia.org/wiki/Windows-1252
1767
     *
1768
     * @param string $str <p>The input string</p>
1769
     *
1770
     * @return string
1771
     */
1772 42
    public static function fix_simple_utf8(string $str): string
1773
    {
1774 42
        if ($str === '') {
1775 4
            return '';
1776
        }
1777
1778 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1779 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1780
1781 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1782 1
            if (self::$BROKEN_UTF8_FIX === null) {
1783 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1784
            }
1785
1786 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1787 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1788
        }
1789
1790 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1791
    }
1792
1793
    /**
1794
     * Fix a double (or multiple) encoded UTF8 string.
1795
     *
1796
     * @param string|string[] $str you can use a string or an array of strings
1797
     *
1798
     * @return string|string[]
1799
     *                          Will return the fixed input-"array" or
1800
     *                          the fixed input-"string"
1801
     */
1802 2
    public static function fix_utf8($str)
1803
    {
1804 2
        if (\is_array($str) === true) {
1805 2
            foreach ($str as $k => $v) {
1806 2
                $str[$k] = self::fix_utf8($v);
1807
            }
1808
1809 2
            return $str;
1810
        }
1811
1812 2
        $str = (string) $str;
1813 2
        $last = '';
1814 2
        while ($last !== $str) {
1815 2
            $last = $str;
1816 2
            $str = self::to_utf8(
1817 2
                self::utf8_decode($str, true)
1818
            );
1819
        }
1820
1821 2
        return $str;
1822
    }
1823
1824
    /**
1825
     * Get character of a specific character.
1826
     *
1827
     * @param string $char
1828
     *
1829
     * @return string 'RTL' or 'LTR'
1830
     */
1831 2
    public static function getCharDirection(string $char): string
1832
    {
1833 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1834
            self::checkForSupport();
1835
        }
1836
1837 2
        if (self::$SUPPORT['intlChar'] === true) {
1838
            /** @noinspection PhpComposerExtensionStubsInspection */
1839 2
            $tmpReturn = \IntlChar::charDirection($char);
1840
1841
            // from "IntlChar"-Class
1842
            $charDirection = [
1843 2
                'RTL' => [1, 13, 14, 15, 21],
1844
                'LTR' => [0, 11, 12, 20],
1845
            ];
1846
1847 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1848
                return 'LTR';
1849
            }
1850
1851 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1852 2
                return 'RTL';
1853
            }
1854
        }
1855
1856 2
        $c = static::chr_to_decimal($char);
1857
1858 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1859 2
            return 'LTR';
1860
        }
1861
1862 2
        if ($c <= 0x85e) {
1863 2
            if ($c === 0x5be ||
1864 2
                $c === 0x5c0 ||
1865 2
                $c === 0x5c3 ||
1866 2
                $c === 0x5c6 ||
1867 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1868 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1869 2
                $c === 0x608 ||
1870 2
                $c === 0x60b ||
1871 2
                $c === 0x60d ||
1872 2
                $c === 0x61b ||
1873 2
                ($c >= 0x61e && $c <= 0x64a) ||
1874
                ($c >= 0x66d && $c <= 0x66f) ||
1875
                ($c >= 0x671 && $c <= 0x6d5) ||
1876
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1877
                ($c >= 0x6ee && $c <= 0x6ef) ||
1878
                ($c >= 0x6fa && $c <= 0x70d) ||
1879
                $c === 0x710 ||
1880
                ($c >= 0x712 && $c <= 0x72f) ||
1881
                ($c >= 0x74d && $c <= 0x7a5) ||
1882
                $c === 0x7b1 ||
1883
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1884
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1885
                $c === 0x7fa ||
1886
                ($c >= 0x800 && $c <= 0x815) ||
1887
                $c === 0x81a ||
1888
                $c === 0x824 ||
1889
                $c === 0x828 ||
1890
                ($c >= 0x830 && $c <= 0x83e) ||
1891
                ($c >= 0x840 && $c <= 0x858) ||
1892 2
                $c === 0x85e
1893
            ) {
1894 2
                return 'RTL';
1895
            }
1896 2
        } elseif ($c === 0x200f) {
1897
            return 'RTL';
1898 2
        } elseif ($c >= 0xfb1d) {
1899 2
            if ($c === 0xfb1d ||
1900 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1901 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1902 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1903 2
                $c === 0xfb3e ||
1904 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1905 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1906 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1907 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1908 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1909 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1910 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1911 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1912 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1913 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1914 2
                $c === 0x10808 ||
1915 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1916 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1917 2
                $c === 0x1083c ||
1918 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1919 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1920 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1921 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1922 2
                $c === 0x1093f ||
1923 2
                $c === 0x10a00 ||
1924 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1925 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1926 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1927 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1928 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1929 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1930 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1931 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1932 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1933 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1934
            ) {
1935 2
                return 'RTL';
1936
            }
1937
        }
1938
1939 2
        return 'LTR';
1940
    }
1941
1942
    /**
1943
     * get data from "/data/*.php"
1944
     *
1945
     * @param string $file
1946
     *
1947
     * @return mixed
1948
     */
1949 5
    private static function getData(string $file)
1950
    {
1951
        /** @noinspection PhpIncludeInspection */
1952 5
        return include __DIR__ . '/data/' . $file . '.php';
1953
    }
1954
1955
    /**
1956
     * get data from "/data/*.php"
1957
     *
1958
     * @param string $file
1959
     *
1960
     * @return false|mixed will return false on error
1961
     */
1962 9
    private static function getDataIfExists(string $file)
1963
    {
1964 9
        $file = __DIR__ . '/data/' . $file . '.php';
1965 9
        if (\file_exists($file)) {
1966
            /** @noinspection PhpIncludeInspection */
1967 8
            return include $file;
1968
        }
1969
1970 2
        return false;
1971
    }
1972
1973
    /**
1974
     * Check for php-support.
1975
     *
1976
     * @param string|null $key
1977
     *
1978
     * @return mixed
1979
     *               Return the full support-"array", if $key === null<br>
1980
     *               return bool-value, if $key is used and available<br>
1981
     *               otherwise return <strong>null</strong>
1982
     */
1983 26
    public static function getSupportInfo(string $key = null)
1984
    {
1985 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1986
            self::checkForSupport();
1987
        }
1988
1989 26
        if ($key === null) {
1990 4
            return self::$SUPPORT;
1991
        }
1992
1993 24
        if (!isset(self::$SUPPORT[$key])) {
1994 2
            return null;
1995
        }
1996
1997 22
        return self::$SUPPORT[$key];
1998
    }
1999
2000
    /**
2001
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2002
     *          if you need more supported types, please use e.g. "finfo"
2003
     *
2004
     * @param string $str
2005
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2006
     *
2007
     * @return array
2008
     *               with this keys: 'ext', 'mime', 'type'
2009
     */
2010 39
    public static function get_file_type(
2011
        string $str,
2012
        array $fallback = [
2013
            'ext'  => null,
2014
            'mime' => 'application/octet-stream',
2015
            'type' => null,
2016
        ]
2017
    ): array {
2018 39
        if ($str === '') {
2019
            return $fallback;
2020
        }
2021
2022 39
        $str_info = self::substr_in_byte($str, 0, 2);
2023 39
        if (self::strlen_in_byte($str_info) !== 2) {
2024 10
            return $fallback;
2025
        }
2026
2027 35
        $str_info = \unpack('C2chars', $str_info);
2028 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2029
2030
        // DEBUG
2031
        //var_dump($type_code);
2032
2033
        switch ($type_code) {
2034 35
            case 3780:
2035 5
                $ext = 'pdf';
2036 5
                $mime = 'application/pdf';
2037 5
                $type = 'binary';
2038
2039 5
                break;
2040 35
            case 7790:
2041
                $ext = 'exe';
2042
                $mime = 'application/octet-stream';
2043
                $type = 'binary';
2044
2045
                break;
2046 35
            case 7784:
2047
                $ext = 'midi';
2048
                $mime = 'audio/x-midi';
2049
                $type = 'binary';
2050
2051
                break;
2052 35
            case 8075:
2053 7
                $ext = 'zip';
2054 7
                $mime = 'application/zip';
2055 7
                $type = 'binary';
2056
2057 7
                break;
2058 35
            case 8297:
2059
                $ext = 'rar';
2060
                $mime = 'application/rar';
2061
                $type = 'binary';
2062
2063
                break;
2064 35
            case 255216:
2065
                $ext = 'jpg';
2066
                $mime = 'image/jpeg';
2067
                $type = 'binary';
2068
2069
                break;
2070 35
            case 7173:
2071
                $ext = 'gif';
2072
                $mime = 'image/gif';
2073
                $type = 'binary';
2074
2075
                break;
2076 35
            case 6677:
2077
                $ext = 'bmp';
2078
                $mime = 'image/bmp';
2079
                $type = 'binary';
2080
2081
                break;
2082 35
            case 13780:
2083 7
                $ext = 'png';
2084 7
                $mime = 'image/png';
2085 7
                $type = 'binary';
2086
2087 7
                break;
2088
            default:
2089 32
                return $fallback;
2090
        }
2091
2092
        return [
2093 7
            'ext'  => $ext,
2094 7
            'mime' => $mime,
2095 7
            'type' => $type,
2096
        ];
2097
    }
2098
2099
    /**
2100
     * @param int    $length        <p>Length of the random string.</p>
2101
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2102
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2103
     *
2104
     * @return string
2105
     */
2106 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2107
    {
2108
        // init
2109 1
        $i = 0;
2110 1
        $str = '';
2111 1
        $maxlength = self::strlen($possibleChars, $encoding);
2112
2113 1
        if ($maxlength === 0) {
2114 1
            return '';
2115
        }
2116
2117
        // add random chars
2118 1
        while ($i < $length) {
2119
            try {
2120 1
                $randInt = \random_int(0, $maxlength - 1);
2121
            } catch (\Exception $e) {
2122
                /** @noinspection RandomApiMigrationInspection */
2123
                $randInt = \mt_rand(0, $maxlength - 1);
2124
            }
2125 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2126 1
            $str .= $char;
2127 1
            $i++;
2128
        }
2129
2130 1
        return $str;
2131
    }
2132
2133
    /**
2134
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2135
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2136
     *
2137
     * @return string
2138
     */
2139 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2140
    {
2141 1
        $uniqueHelper = \mt_rand() .
2142 1
                        \session_id() .
2143 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2144 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2145 1
                        $entropyExtra;
2146
2147 1
        $uniqueString = \uniqid($uniqueHelper, true);
2148
2149 1
        if ($md5) {
2150 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2151
        }
2152
2153 1
        return $uniqueString;
2154
    }
2155
2156
    /**
2157
     * alias for "UTF8::string_has_bom()"
2158
     *
2159
     * @see        UTF8::string_has_bom()
2160
     *
2161
     * @param string $str
2162
     *
2163
     * @return bool
2164
     *
2165
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2166
     */
2167 2
    public static function hasBom(string $str): bool
2168
    {
2169 2
        return self::string_has_bom($str);
2170
    }
2171
2172
    /**
2173
     * Returns true if the string contains a lower case char, false otherwise.
2174
     *
2175
     * @param string $str <p>The input string.</p>
2176
     *
2177
     * @return bool whether or not the string contains a lower case character
2178
     */
2179 47
    public static function has_lowercase(string $str): bool
2180
    {
2181 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2182
    }
2183
2184
    /**
2185
     * Returns true if the string contains an upper case char, false otherwise.
2186
     *
2187
     * @param string $str <p>The input string.</p>
2188
     *
2189
     * @return bool whether or not the string contains an upper case character
2190
     */
2191 12
    public static function has_uppercase(string $str): bool
2192
    {
2193 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2194
    }
2195
2196
    /**
2197
     * Converts a hexadecimal-value into an UTF-8 character.
2198
     *
2199
     * @param string $hexdec <p>The hexadecimal value.</p>
2200
     *
2201
     * @return false|string one single UTF-8 character
2202
     */
2203 4
    public static function hex_to_chr(string $hexdec)
2204
    {
2205 4
        return self::decimal_to_chr(\hexdec($hexdec));
2206
    }
2207
2208
    /**
2209
     * Converts hexadecimal U+xxxx code point representation to integer.
2210
     *
2211
     * INFO: opposite to UTF8::int_to_hex()
2212
     *
2213
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2214
     *
2215
     * @return false|int the code point, or false on failure
2216
     */
2217 2
    public static function hex_to_int($hexDec)
2218
    {
2219
        // init
2220 2
        $hexDec = (string) $hexDec;
2221
2222 2
        if ($hexDec === '') {
2223 2
            return false;
2224
        }
2225
2226 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2227 2
            return \intval($match[1], 16);
2228
        }
2229
2230 2
        return false;
2231
    }
2232
2233
    /**
2234
     * alias for "UTF8::html_entity_decode()"
2235
     *
2236
     * @see UTF8::html_entity_decode()
2237
     *
2238
     * @param string $str
2239
     * @param int    $flags
2240
     * @param string $encoding
2241
     *
2242
     * @return string
2243
     */
2244 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2245
    {
2246 4
        return self::html_entity_decode($str, $flags, $encoding);
2247
    }
2248
2249
    /**
2250
     * Converts a UTF-8 string to a series of HTML numbered entities.
2251
     *
2252
     * INFO: opposite to UTF8::html_decode()
2253
     *
2254
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2255
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2256
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2257
     *
2258
     * @return string HTML numbered entities
2259
     */
2260 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2261
    {
2262 13
        if ($str === '') {
2263 4
            return '';
2264
        }
2265
2266 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2267 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2268
        }
2269
2270 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2271
            self::checkForSupport();
2272
        }
2273
2274
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2275 13
        if (self::$SUPPORT['mbstring'] === true) {
2276 13
            $startCode = 0x00;
2277 13
            if ($keepAsciiChars === true) {
2278 13
                $startCode = 0x80;
2279
            }
2280
2281 13
            return \mb_encode_numericentity(
2282 13
                $str,
2283 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2284 13
                $encoding
2285
            );
2286
        }
2287
2288
        //
2289
        // fallback via vanilla php
2290
        //
2291
2292
        return \implode(
2293
            '',
2294
            \array_map(
2295
                function ($chr) use ($keepAsciiChars, $encoding) {
2296
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2297
                },
2298
                self::split($str)
2299
            )
2300
        );
2301
    }
2302
2303
    /**
2304
     * UTF-8 version of html_entity_decode()
2305
     *
2306
     * The reason we are not using html_entity_decode() by itself is because
2307
     * while it is not technically correct to leave out the semicolon
2308
     * at the end of an entity most browsers will still interpret the entity
2309
     * correctly. html_entity_decode() does not convert entities without
2310
     * semicolons, so we are left with our own little solution here. Bummer.
2311
     *
2312
     * Convert all HTML entities to their applicable characters
2313
     *
2314
     * INFO: opposite to UTF8::html_encode()
2315
     *
2316
     * @see http://php.net/manual/en/function.html-entity-decode.php
2317
     *
2318
     * @param string $str      <p>
2319
     *                         The input string.
2320
     *                         </p>
2321
     * @param int    $flags    [optional] <p>
2322
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2323
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2324
     *                         <table>
2325
     *                         Available <i>flags</i> constants
2326
     *                         <tr valign="top">
2327
     *                         <td>Constant Name</td>
2328
     *                         <td>Description</td>
2329
     *                         </tr>
2330
     *                         <tr valign="top">
2331
     *                         <td><b>ENT_COMPAT</b></td>
2332
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2333
     *                         </tr>
2334
     *                         <tr valign="top">
2335
     *                         <td><b>ENT_QUOTES</b></td>
2336
     *                         <td>Will convert both double and single quotes.</td>
2337
     *                         </tr>
2338
     *                         <tr valign="top">
2339
     *                         <td><b>ENT_NOQUOTES</b></td>
2340
     *                         <td>Will leave both double and single quotes unconverted.</td>
2341
     *                         </tr>
2342
     *                         <tr valign="top">
2343
     *                         <td><b>ENT_HTML401</b></td>
2344
     *                         <td>
2345
     *                         Handle code as HTML 4.01.
2346
     *                         </td>
2347
     *                         </tr>
2348
     *                         <tr valign="top">
2349
     *                         <td><b>ENT_XML1</b></td>
2350
     *                         <td>
2351
     *                         Handle code as XML 1.
2352
     *                         </td>
2353
     *                         </tr>
2354
     *                         <tr valign="top">
2355
     *                         <td><b>ENT_XHTML</b></td>
2356
     *                         <td>
2357
     *                         Handle code as XHTML.
2358
     *                         </td>
2359
     *                         </tr>
2360
     *                         <tr valign="top">
2361
     *                         <td><b>ENT_HTML5</b></td>
2362
     *                         <td>
2363
     *                         Handle code as HTML 5.
2364
     *                         </td>
2365
     *                         </tr>
2366
     *                         </table>
2367
     *                         </p>
2368
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2369
     *
2370
     * @return string the decoded string
2371
     */
2372 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2373
    {
2374 40
        if ($str === '') {
2375 12
            return '';
2376
        }
2377
2378 40
        if (!isset($str[3])) { // examples: &; || &x;
2379 19
            return $str;
2380
        }
2381
2382
        if (
2383 39
            \strpos($str, '&') === false
2384
            ||
2385
            (
2386 39
                \strpos($str, '&#') === false
2387
                &&
2388 39
                \strpos($str, ';') === false
2389
            )
2390
        ) {
2391 18
            return $str;
2392
        }
2393
2394 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2395 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2396
        }
2397
2398 39
        if ($flags === null) {
2399 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2400
        }
2401
2402
        if (
2403 39
            $encoding !== 'UTF-8'
2404
            &&
2405 39
            $encoding !== 'ISO-8859-1'
2406
            &&
2407 39
            $encoding !== 'WINDOWS-1252'
2408
            &&
2409 39
            self::$SUPPORT['mbstring'] === false
2410
        ) {
2411
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2412
        }
2413
2414 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2415
            self::checkForSupport();
2416
        }
2417
2418
        do {
2419 39
            $str_compare = $str;
2420
2421
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2422 39
            if (self::$SUPPORT['mbstring'] === true) {
2423 39
                $str = \mb_decode_numericentity(
2424 39
                    $str,
2425 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2426 39
                    $encoding
2427
                );
2428
            } else {
2429
                $str = (string) \preg_replace_callback(
2430
                    "/&#\d{2,6};/",
2431
                    function ($matches) use ($encoding) {
2432
                        // always fallback via symfony polyfill
2433
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2434
2435
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2436
                            return $returnTmp;
2437
                        }
2438
2439
                        return $matches[0];
2440
                    },
2441
                    $str
2442
                );
2443
            }
2444
2445
            // decode numeric & UTF16 two byte entities
2446 39
            $str = \html_entity_decode(
2447 39
                \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2448 39
                $flags,
2449 39
                $encoding
2450
            );
2451 39
        } while ($str_compare !== $str);
2452
2453 39
        return $str;
2454
    }
2455
2456
    /**
2457
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2458
     *
2459
     * @param string $str
2460
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2461
     *
2462
     * @return string
2463
     */
2464 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2465
    {
2466 6
        return self::htmlspecialchars(
2467 6
            $str,
2468 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2469 6
            $encoding
2470
        );
2471
    }
2472
2473
    /**
2474
     * Remove empty html-tag.
2475
     *
2476
     * e.g.: <tag></tag>
2477
     *
2478
     * @param string $str
2479
     *
2480
     * @return string
2481
     */
2482 1
    public static function html_stripe_empty_tags(string $str): string
2483
    {
2484 1
        return (string) \preg_replace(
2485 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2486 1
            '',
2487 1
            $str
2488
        );
2489
    }
2490
2491
    /**
2492
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2493
     *
2494
     * @see http://php.net/manual/en/function.htmlentities.php
2495
     *
2496
     * @param string $str           <p>
2497
     *                              The input string.
2498
     *                              </p>
2499
     * @param int    $flags         [optional] <p>
2500
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2501
     *                              invalid code unit sequences and the used document type. The default is
2502
     *                              ENT_COMPAT | ENT_HTML401.
2503
     *                              <table>
2504
     *                              Available <i>flags</i> constants
2505
     *                              <tr valign="top">
2506
     *                              <td>Constant Name</td>
2507
     *                              <td>Description</td>
2508
     *                              </tr>
2509
     *                              <tr valign="top">
2510
     *                              <td><b>ENT_COMPAT</b></td>
2511
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2512
     *                              </tr>
2513
     *                              <tr valign="top">
2514
     *                              <td><b>ENT_QUOTES</b></td>
2515
     *                              <td>Will convert both double and single quotes.</td>
2516
     *                              </tr>
2517
     *                              <tr valign="top">
2518
     *                              <td><b>ENT_NOQUOTES</b></td>
2519
     *                              <td>Will leave both double and single quotes unconverted.</td>
2520
     *                              </tr>
2521
     *                              <tr valign="top">
2522
     *                              <td><b>ENT_IGNORE</b></td>
2523
     *                              <td>
2524
     *                              Silently discard invalid code unit sequences instead of returning
2525
     *                              an empty string. Using this flag is discouraged as it
2526
     *                              may have security implications.
2527
     *                              </td>
2528
     *                              </tr>
2529
     *                              <tr valign="top">
2530
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2531
     *                              <td>
2532
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2533
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2534
     *                              </td>
2535
     *                              </tr>
2536
     *                              <tr valign="top">
2537
     *                              <td><b>ENT_DISALLOWED</b></td>
2538
     *                              <td>
2539
     *                              Replace invalid code points for the given document type with a
2540
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2541
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2542
     *                              instance, to ensure the well-formedness of XML documents with
2543
     *                              embedded external content.
2544
     *                              </td>
2545
     *                              </tr>
2546
     *                              <tr valign="top">
2547
     *                              <td><b>ENT_HTML401</b></td>
2548
     *                              <td>
2549
     *                              Handle code as HTML 4.01.
2550
     *                              </td>
2551
     *                              </tr>
2552
     *                              <tr valign="top">
2553
     *                              <td><b>ENT_XML1</b></td>
2554
     *                              <td>
2555
     *                              Handle code as XML 1.
2556
     *                              </td>
2557
     *                              </tr>
2558
     *                              <tr valign="top">
2559
     *                              <td><b>ENT_XHTML</b></td>
2560
     *                              <td>
2561
     *                              Handle code as XHTML.
2562
     *                              </td>
2563
     *                              </tr>
2564
     *                              <tr valign="top">
2565
     *                              <td><b>ENT_HTML5</b></td>
2566
     *                              <td>
2567
     *                              Handle code as HTML 5.
2568
     *                              </td>
2569
     *                              </tr>
2570
     *                              </table>
2571
     *                              </p>
2572
     * @param string $encoding      [optional] <p>
2573
     *                              Like <b>htmlspecialchars</b>,
2574
     *                              <b>htmlentities</b> takes an optional third argument
2575
     *                              <i>encoding</i> which defines encoding used in
2576
     *                              conversion.
2577
     *                              Although this argument is technically optional, you are highly
2578
     *                              encouraged to specify the correct value for your code.
2579
     *                              </p>
2580
     * @param bool   $double_encode [optional] <p>
2581
     *                              When <i>double_encode</i> is turned off PHP will not
2582
     *                              encode existing html entities. The default is to convert everything.
2583
     *                              </p>
2584
     *
2585
     * @return string the encoded string.
2586
     * </p>
2587
     * <p>
2588
     * If the input <i>string</i> contains an invalid code unit
2589
     * sequence within the given <i>encoding</i> an empty string
2590
     * will be returned, unless either the <b>ENT_IGNORE</b> or
2591
     * <b>ENT_SUBSTITUTE</b> flags are set
2592
     */
2593 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2594
    {
2595 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2596 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2597
        }
2598
2599 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2600
2601
        /**
2602
         * PHP doesn't replace a backslash to its html entity since this is something
2603
         * that's mostly used to escape characters when inserting in a database. Since
2604
         * we're using a decent database layer, we don't need this shit and we're replacing
2605
         * the double backslashes by its' html entity equivalent.
2606
         *
2607
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2608
         */
2609 9
        $str = \str_replace('\\', '&#92;', $str);
2610
2611 9
        return self::html_encode($str, true, $encoding);
2612
    }
2613
2614
    /**
2615
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2616
     *
2617
     * INFO: Take a look at "UTF8::htmlentities()"
2618
     *
2619
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2620
     *
2621
     * @param string $str           <p>
2622
     *                              The string being converted.
2623
     *                              </p>
2624
     * @param int    $flags         [optional] <p>
2625
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2626
     *                              invalid code unit sequences and the used document type. The default is
2627
     *                              ENT_COMPAT | ENT_HTML401.
2628
     *                              <table>
2629
     *                              Available <i>flags</i> constants
2630
     *                              <tr valign="top">
2631
     *                              <td>Constant Name</td>
2632
     *                              <td>Description</td>
2633
     *                              </tr>
2634
     *                              <tr valign="top">
2635
     *                              <td><b>ENT_COMPAT</b></td>
2636
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2637
     *                              </tr>
2638
     *                              <tr valign="top">
2639
     *                              <td><b>ENT_QUOTES</b></td>
2640
     *                              <td>Will convert both double and single quotes.</td>
2641
     *                              </tr>
2642
     *                              <tr valign="top">
2643
     *                              <td><b>ENT_NOQUOTES</b></td>
2644
     *                              <td>Will leave both double and single quotes unconverted.</td>
2645
     *                              </tr>
2646
     *                              <tr valign="top">
2647
     *                              <td><b>ENT_IGNORE</b></td>
2648
     *                              <td>
2649
     *                              Silently discard invalid code unit sequences instead of returning
2650
     *                              an empty string. Using this flag is discouraged as it
2651
     *                              may have security implications.
2652
     *                              </td>
2653
     *                              </tr>
2654
     *                              <tr valign="top">
2655
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2656
     *                              <td>
2657
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2658
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2659
     *                              </td>
2660
     *                              </tr>
2661
     *                              <tr valign="top">
2662
     *                              <td><b>ENT_DISALLOWED</b></td>
2663
     *                              <td>
2664
     *                              Replace invalid code points for the given document type with a
2665
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2666
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2667
     *                              instance, to ensure the well-formedness of XML documents with
2668
     *                              embedded external content.
2669
     *                              </td>
2670
     *                              </tr>
2671
     *                              <tr valign="top">
2672
     *                              <td><b>ENT_HTML401</b></td>
2673
     *                              <td>
2674
     *                              Handle code as HTML 4.01.
2675
     *                              </td>
2676
     *                              </tr>
2677
     *                              <tr valign="top">
2678
     *                              <td><b>ENT_XML1</b></td>
2679
     *                              <td>
2680
     *                              Handle code as XML 1.
2681
     *                              </td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_XHTML</b></td>
2685
     *                              <td>
2686
     *                              Handle code as XHTML.
2687
     *                              </td>
2688
     *                              </tr>
2689
     *                              <tr valign="top">
2690
     *                              <td><b>ENT_HTML5</b></td>
2691
     *                              <td>
2692
     *                              Handle code as HTML 5.
2693
     *                              </td>
2694
     *                              </tr>
2695
     *                              </table>
2696
     *                              </p>
2697
     * @param string $encoding      [optional] <p>
2698
     *                              Defines encoding used in conversion.
2699
     *                              </p>
2700
     *                              <p>
2701
     *                              For the purposes of this function, the encodings
2702
     *                              ISO-8859-1, ISO-8859-15,
2703
     *                              UTF-8, cp866,
2704
     *                              cp1251, cp1252, and
2705
     *                              KOI8-R are effectively equivalent, provided the
2706
     *                              <i>string</i> itself is valid for the encoding, as
2707
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2708
     *                              the same positions in all of these encodings.
2709
     *                              </p>
2710
     * @param bool   $double_encode [optional] <p>
2711
     *                              When <i>double_encode</i> is turned off PHP will not
2712
     *                              encode existing html entities, the default is to convert everything.
2713
     *                              </p>
2714
     *
2715
     * @return string the converted string.
2716
     * </p>
2717
     * <p>
2718
     * If the input <i>string</i> contains an invalid code unit
2719
     * sequence within the given <i>encoding</i> an empty string
2720
     * will be returned, unless either the <b>ENT_IGNORE</b> or
2721
     * <b>ENT_SUBSTITUTE</b> flags are set
2722
     */
2723 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2724
    {
2725 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2726 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2727
        }
2728
2729 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2730
    }
2731
2732
    /**
2733
     * Checks whether iconv is available on the server.
2734
     *
2735
     * @return bool
2736
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2737
     */
2738
    public static function iconv_loaded(): bool
2739
    {
2740
        return \extension_loaded('iconv') ? true : false;
2741
    }
2742
2743
    /**
2744
     * alias for "UTF8::decimal_to_chr()"
2745
     *
2746
     * @see UTF8::decimal_to_chr()
2747
     *
2748
     * @param mixed $int
2749
     *
2750
     * @return string
2751
     */
2752 4
    public static function int_to_chr($int): string
2753
    {
2754 4
        return self::decimal_to_chr($int);
2755
    }
2756
2757
    /**
2758
     * Converts Integer to hexadecimal U+xxxx code point representation.
2759
     *
2760
     * INFO: opposite to UTF8::hex_to_int()
2761
     *
2762
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2763
     * @param string $pfix [optional]
2764
     *
2765
     * @return string the code point, or empty string on failure
2766
     */
2767 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2768
    {
2769 6
        $hex = \dechex($int);
2770
2771 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2772
2773 6
        return $pfix . $hex . '';
2774
    }
2775
2776
    /**
2777
     * Checks whether intl-char is available on the server.
2778
     *
2779
     * @return bool
2780
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2781
     */
2782
    public static function intlChar_loaded(): bool
2783
    {
2784
        return \class_exists('IntlChar');
2785
    }
2786
2787
    /**
2788
     * Checks whether intl is available on the server.
2789
     *
2790
     * @return bool
2791
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2792
     */
2793 5
    public static function intl_loaded(): bool
2794
    {
2795 5
        return \extension_loaded('intl');
2796
    }
2797
2798
    /**
2799
     * alias for "UTF8::is_ascii()"
2800
     *
2801
     * @see        UTF8::is_ascii()
2802
     *
2803
     * @param string $str
2804
     *
2805
     * @return bool
2806
     *
2807
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2808
     */
2809 2
    public static function isAscii(string $str): bool
2810
    {
2811 2
        return self::is_ascii($str);
2812
    }
2813
2814
    /**
2815
     * alias for "UTF8::is_base64()"
2816
     *
2817
     * @see        UTF8::is_base64()
2818
     *
2819
     * @param string $str
2820
     *
2821
     * @return bool
2822
     *
2823
     * @deprecated <p>use "UTF8::is_base64()"</p>
2824
     */
2825 2
    public static function isBase64($str): bool
2826
    {
2827 2
        return self::is_base64($str);
2828
    }
2829
2830
    /**
2831
     * alias for "UTF8::is_binary()"
2832
     *
2833
     * @see        UTF8::is_binary()
2834
     *
2835
     * @param mixed $str
2836
     * @param bool  $strict
2837
     *
2838
     * @return bool
2839
     *
2840
     * @deprecated <p>use "UTF8::is_binary()"</p>
2841
     */
2842 4
    public static function isBinary($str, $strict = false): bool
2843
    {
2844 4
        return self::is_binary($str, $strict);
2845
    }
2846
2847
    /**
2848
     * alias for "UTF8::is_bom()"
2849
     *
2850
     * @see        UTF8::is_bom()
2851
     *
2852
     * @param string $utf8_chr
2853
     *
2854
     * @return bool
2855
     *
2856
     * @deprecated <p>use "UTF8::is_bom()"</p>
2857
     */
2858 2
    public static function isBom(string $utf8_chr): bool
2859
    {
2860 2
        return self::is_bom($utf8_chr);
2861
    }
2862
2863
    /**
2864
     * alias for "UTF8::is_html()"
2865
     *
2866
     * @see        UTF8::is_html()
2867
     *
2868
     * @param string $str
2869
     *
2870
     * @return bool
2871
     *
2872
     * @deprecated <p>use "UTF8::is_html()"</p>
2873
     */
2874 2
    public static function isHtml(string $str): bool
2875
    {
2876 2
        return self::is_html($str);
2877
    }
2878
2879
    /**
2880
     * alias for "UTF8::is_json()"
2881
     *
2882
     * @see        UTF8::is_json()
2883
     *
2884
     * @param string $str
2885
     *
2886
     * @return bool
2887
     *
2888
     * @deprecated <p>use "UTF8::is_json()"</p>
2889
     */
2890
    public static function isJson(string $str): bool
2891
    {
2892
        return self::is_json($str);
2893
    }
2894
2895
    /**
2896
     * alias for "UTF8::is_utf16()"
2897
     *
2898
     * @see        UTF8::is_utf16()
2899
     *
2900
     * @param mixed $str
2901
     *
2902
     * @return false|int
2903
     *                    <strong>false</strong> if is't not UTF16,<br>
2904
     *                    <strong>1</strong> for UTF-16LE,<br>
2905
     *                    <strong>2</strong> for UTF-16BE
2906
     *
2907
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2908
     */
2909 2
    public static function isUtf16($str)
2910
    {
2911 2
        return self::is_utf16($str);
2912
    }
2913
2914
    /**
2915
     * alias for "UTF8::is_utf32()"
2916
     *
2917
     * @see        UTF8::is_utf32()
2918
     *
2919
     * @param mixed $str
2920
     *
2921
     * @return false|int
2922
     *                   <strong>false</strong> if is't not UTF16,
2923
     *                   <strong>1</strong> for UTF-32LE,
2924
     *                   <strong>2</strong> for UTF-32BE
2925
     *
2926
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2927
     */
2928 2
    public static function isUtf32($str)
2929
    {
2930 2
        return self::is_utf32($str);
2931
    }
2932
2933
    /**
2934
     * alias for "UTF8::is_utf8()"
2935
     *
2936
     * @see        UTF8::is_utf8()
2937
     *
2938
     * @param string $str
2939
     * @param bool   $strict
2940
     *
2941
     * @return bool
2942
     *
2943
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2944
     */
2945 17
    public static function isUtf8($str, $strict = false): bool
2946
    {
2947 17
        return self::is_utf8($str, $strict);
2948
    }
2949
2950
    /**
2951
     * Returns true if the string contains only alphabetic chars, false otherwise.
2952
     *
2953
     * @param string $str
2954
     *
2955
     * @return bool
2956
     *               Whether or not $str contains only alphabetic chars
2957
     */
2958 10
    public static function is_alpha(string $str): bool
2959
    {
2960 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2961
    }
2962
2963
    /**
2964
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2965
     *
2966
     * @param string $str
2967
     *
2968
     * @return bool
2969
     *               Whether or not $str contains only alphanumeric chars
2970
     */
2971 13
    public static function is_alphanumeric(string $str): bool
2972
    {
2973 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2974
    }
2975
2976
    /**
2977
     * Checks if a string is 7 bit ASCII.
2978
     *
2979
     * @param string $str <p>The string to check.</p>
2980
     *
2981
     * @return bool
2982
     *              <strong>true</strong> if it is ASCII<br>
2983
     *              <strong>false</strong> otherwise
2984
     */
2985 201
    public static function is_ascii(string $str): bool
2986
    {
2987 201
        if ($str === '') {
2988 10
            return true;
2989
        }
2990
2991 200
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2992
    }
2993
2994
    /**
2995
     * Returns true if the string is base64 encoded, false otherwise.
2996
     *
2997
     * @param string $str <p>The input string.</p>
2998
     *
2999
     * @return bool whether or not $str is base64 encoded
3000
     */
3001 9
    public static function is_base64($str): bool
3002
    {
3003 9
        if ($str === '') {
3004 3
            return false;
3005
        }
3006
3007 8
        if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
3008 2
            return false;
3009
        }
3010
3011 8
        $base64String = (string) \base64_decode($str, true);
3012
3013 8
        return $base64String && \base64_encode($base64String) === $str;
3014
    }
3015
3016
    /**
3017
     * Check if the input is binary... (is look like a hack).
3018
     *
3019
     * @param mixed $input
3020
     * @param bool  $strict
3021
     *
3022
     * @return bool
3023
     */
3024 39
    public static function is_binary($input, bool $strict = false): bool
3025
    {
3026 39
        $input = (string) $input;
3027 39
        if ($input === '') {
3028 10
            return false;
3029
        }
3030
3031 39
        if (\preg_match('~^[01]+$~', $input)) {
3032 12
            return true;
3033
        }
3034
3035 39
        $ext = self::get_file_type($input);
3036 39
        if ($ext['type'] === 'binary') {
3037 7
            return true;
3038
        }
3039
3040 36
        $testLength = self::strlen_in_byte($input);
3041 36
        if ($testLength) {
3042 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3043
                self::checkForSupport();
3044
            }
3045
3046 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3047 36
            if (($testNull / $testLength) > 0.256) {
3048 12
                return true;
3049
            }
3050
        }
3051
3052 34
        if ($strict === true) {
3053 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3054
                self::checkForSupport();
3055
            }
3056
3057 34
            if (self::$SUPPORT['finfo'] === false) {
3058
                throw new \RuntimeException('ext-fileinfo: is not installed');
3059
            }
3060
3061
            /** @noinspection PhpComposerExtensionStubsInspection */
3062 34
            $finfo = new \finfo(\FILEINFO_MIME_ENCODING);
3063 34
            $finfo_encoding = $finfo->buffer($input);
3064 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3065 14
                return true;
3066
            }
3067
        }
3068
3069 30
        return false;
3070
    }
3071
3072
    /**
3073
     * Check if the file is binary.
3074
     *
3075
     * @param string $file
3076
     *
3077
     * @return bool
3078
     */
3079 6
    public static function is_binary_file($file): bool
3080
    {
3081
        // init
3082 6
        $block = '';
3083
3084 6
        $fp = \fopen($file, 'rb');
3085 6
        if (\is_resource($fp)) {
3086 6
            $block = \fread($fp, 512);
3087 6
            \fclose($fp);
3088
        }
3089
3090 6
        if ($block === '') {
3091 2
            return false;
3092
        }
3093
3094 6
        return self::is_binary($block, true);
3095
    }
3096
3097
    /**
3098
     * Returns true if the string contains only whitespace chars, false otherwise.
3099
     *
3100
     * @param string $str
3101
     *
3102
     * @return bool
3103
     *               Whether or not $str contains only whitespace characters
3104
     */
3105 15
    public static function is_blank(string $str): bool
3106
    {
3107 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3108
    }
3109
3110
    /**
3111
     * Checks if the given string is equal to any "Byte Order Mark".
3112
     *
3113
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3114
     *
3115
     * @param string $str <p>The input string.</p>
3116
     *
3117
     * @return bool
3118
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3119
     */
3120 2
    public static function is_bom($str): bool
3121
    {
3122 2
        foreach (self::$BOM as $bomString => $bomByteLength) {
3123 2
            if ($str === $bomString) {
3124 2
                return true;
3125
            }
3126
        }
3127
3128 2
        return false;
3129
    }
3130
3131
    /**
3132
     * Determine whether the string is considered to be empty.
3133
     *
3134
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3135
     * empty() does not generate a warning if the variable does not exist.
3136
     *
3137
     * @param mixed $str
3138
     *
3139
     * @return bool whether or not $str is empty()
3140
     */
3141
    public static function is_empty($str): bool
3142
    {
3143
        return empty($str);
3144
    }
3145
3146
    /**
3147
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3148
     *
3149
     * @param string $str
3150
     *
3151
     * @return bool
3152
     *               Whether or not $str contains only hexadecimal chars
3153
     */
3154 13
    public static function is_hexadecimal(string $str): bool
3155
    {
3156 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3157
    }
3158
3159
    /**
3160
     * Check if the string contains any html-tags <lall>.
3161
     *
3162
     * @param string $str <p>The input string.</p>
3163
     *
3164
     * @return bool
3165
     */
3166 3
    public static function is_html(string $str): bool
3167
    {
3168 3
        if ($str === '') {
3169 3
            return false;
3170
        }
3171
3172
        // init
3173 3
        $matches = [];
3174
3175 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3176
3177 3
        return !(\count($matches) === 0);
3178
    }
3179
3180
    /**
3181
     * Try to check if "$str" is an json-string.
3182
     *
3183
     * @param string $str <p>The input string.</p>
3184
     *
3185
     * @return bool
3186
     */
3187 22
    public static function is_json(string $str): bool
3188
    {
3189 22
        if ($str === '') {
3190 3
            return false;
3191
        }
3192
3193 21
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3194
            self::checkForSupport();
3195
        }
3196
3197 21
        if (self::$SUPPORT['json'] === false) {
3198
            throw new \RuntimeException('ext-json: is not installed');
3199
        }
3200
3201 21
        $json = self::json_decode($str);
3202
3203
        /** @noinspection PhpComposerExtensionStubsInspection */
3204
        return (
3205 21
                   \is_object($json) === true
3206
                   ||
3207 21
                   \is_array($json) === true
3208
               )
3209
               &&
3210 21
               \json_last_error() === \JSON_ERROR_NONE;
3211
    }
3212
3213
    /**
3214
     * @param string $str
3215
     *
3216
     * @return bool
3217
     */
3218 8
    public static function is_lowercase(string $str): bool
3219
    {
3220 8
        if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3221 3
            return true;
3222
        }
3223
3224 5
        return false;
3225
    }
3226
3227
    /**
3228
     * Returns true if the string is serialized, false otherwise.
3229
     *
3230
     * @param string $str
3231
     *
3232
     * @return bool whether or not $str is serialized
3233
     */
3234 7
    public static function is_serialized(string $str): bool
3235
    {
3236 7
        if ($str === '') {
3237 1
            return false;
3238
        }
3239
3240
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3241
        /** @noinspection UnserializeExploitsInspection */
3242 6
        return $str === 'b:0;'
3243
               ||
3244 6
               @\unserialize($str) !== false;
3245
    }
3246
3247
    /**
3248
     * Returns true if the string contains only lower case chars, false
3249
     * otherwise.
3250
     *
3251
     * @param string $str <p>The input string.</p>
3252
     *
3253
     * @return bool
3254
     *               Whether or not $str contains only lower case characters
3255
     */
3256 8
    public static function is_uppercase(string $str): bool
3257
    {
3258 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3259
    }
3260
3261
    /**
3262
     * Check if the string is UTF-16.
3263
     *
3264
     * @param mixed $str <p>The input string.</p>
3265
     * @param bool  $checkIfStringIsBinary
3266
     *
3267
     * @return false|int
3268
     *                   <strong>false</strong> if is't not UTF-16,<br>
3269
     *                   <strong>1</strong> for UTF-16LE,<br>
3270
     *                   <strong>2</strong> for UTF-16BE
3271
     */
3272 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3273
    {
3274
        // init
3275 21
        $str = (string) $str;
3276 21
        $strChars = [];
3277
3278
        if (
3279 21
            $checkIfStringIsBinary === true
3280
            &&
3281 21
            self::is_binary($str, true) === false
3282
        ) {
3283 2
            return false;
3284
        }
3285
3286 21
        if (self::$SUPPORT['mbstring'] === false) {
3287 2
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3288
        }
3289
3290 21
        $str = self::remove_bom($str);
3291
3292 21
        $maybeUTF16LE = 0;
3293 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3294 21
        if ($test) {
3295 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3296 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3297 15
            if ($test3 === $test) {
3298 15
                if (\count($strChars) === 0) {
3299 15
                    $strChars = self::count_chars($str, true);
3300
                }
3301 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3302 15
                    if (\in_array($test3char, $strChars, true) === true) {
3303 15
                        $maybeUTF16LE++;
3304
                    }
3305
                }
3306
            }
3307
        }
3308
3309 21
        $maybeUTF16BE = 0;
3310 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3311 21
        if ($test) {
3312 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3313 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3314 15
            if ($test3 === $test) {
3315 15
                if (\count($strChars) === 0) {
3316 7
                    $strChars = self::count_chars($str, true);
3317
                }
3318 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3319 15
                    if (\in_array($test3char, $strChars, true) === true) {
3320 15
                        $maybeUTF16BE++;
3321
                    }
3322
                }
3323
            }
3324
        }
3325
3326 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3327 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3328 4
                return 1;
3329
            }
3330
3331 6
            return 2;
3332
        }
3333
3334 17
        return false;
3335
    }
3336
3337
    /**
3338
     * Check if the string is UTF-32.
3339
     *
3340
     * @param mixed $str <p>The input string.</p>
3341
     * @param bool  $checkIfStringIsBinary
3342
     *
3343
     * @return false|int
3344
     *                   <strong>false</strong> if is't not UTF-32,<br>
3345
     *                   <strong>1</strong> for UTF-32LE,<br>
3346
     *                   <strong>2</strong> for UTF-32BE
3347
     */
3348 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3349
    {
3350
        // init
3351 17
        $str = (string) $str;
3352 17
        $strChars = [];
3353
3354
        if (
3355 17
            $checkIfStringIsBinary === true
3356
            &&
3357 17
            self::is_binary($str, true) === false
3358
        ) {
3359 2
            return false;
3360
        }
3361
3362 17
        if (self::$SUPPORT['mbstring'] === false) {
3363 2
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3364
        }
3365
3366 17
        $str = self::remove_bom($str);
3367
3368 17
        $maybeUTF32LE = 0;
3369 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3370 17
        if ($test) {
3371 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3372 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3373 11
            if ($test3 === $test) {
3374 11
                if (\count($strChars) === 0) {
3375 11
                    $strChars = self::count_chars($str, true);
3376
                }
3377 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3378 11
                    if (\in_array($test3char, $strChars, true) === true) {
3379 11
                        $maybeUTF32LE++;
3380
                    }
3381
                }
3382
            }
3383
        }
3384
3385 17
        $maybeUTF32BE = 0;
3386 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3387 17
        if ($test) {
3388 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3389 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3390 11
            if ($test3 === $test) {
3391 11
                if (\count($strChars) === 0) {
3392 7
                    $strChars = self::count_chars($str, true);
3393
                }
3394 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3395 11
                    if (\in_array($test3char, $strChars, true) === true) {
3396 11
                        $maybeUTF32BE++;
3397
                    }
3398
                }
3399
            }
3400
        }
3401
3402 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3403 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3404 2
                return 1;
3405
            }
3406
3407 2
            return 2;
3408
        }
3409
3410 17
        return false;
3411
    }
3412
3413
    /**
3414
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3415
     *
3416
     * @see    http://hsivonen.iki.fi/php-utf8/
3417
     *
3418
     * @param string|string[] $str    <p>The string to be checked.</p>
3419
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3420
     *
3421
     * @return bool
3422
     */
3423 106
    public static function is_utf8($str, bool $strict = false): bool
3424
    {
3425 106
        if (\is_array($str) === true) {
3426 2
            foreach ($str as $k => $v) {
3427 2
                if (self::is_utf8($v, $strict) === false) {
3428 2
                    return false;
3429
                }
3430
            }
3431
3432
            return true;
3433
        }
3434
3435 106
        if ($str === '') {
3436 12
            return true;
3437
        }
3438
3439 102
        if ($strict === true) {
3440 2
            $isBinary = self::is_binary($str, true);
3441
3442 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3443 2
                return false;
3444
            }
3445
3446
            if ($isBinary && self::is_utf32($str, false) !== false) {
3447
                return false;
3448
            }
3449
        }
3450
3451 102
        if (self::pcre_utf8_support() !== true) {
3452
3453
            // If even just the first character can be matched, when the /u
3454
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3455
            // invalid, nothing at all will match, even if the string contains
3456
            // some valid sequences
3457
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3458
        }
3459
3460 102
        $mState = 0; // cached expected number of octets after the current octet
3461
        // until the beginning of the next UTF8 character sequence
3462 102
        $mUcs4 = 0; // cached Unicode character
3463 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3464
3465 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3466
            self::checkForSupport();
3467
        }
3468
3469 102
        if (self::$ORD === null) {
3470
            self::$ORD = self::getData('ord');
3471
        }
3472
3473 102
        $len = self::strlen_in_byte((string) $str);
3474
        /** @noinspection ForeachInvariantsInspection */
3475 102
        for ($i = 0; $i < $len; $i++) {
3476 102
            $in = self::$ORD[$str[$i]];
3477 102
            if ($mState === 0) {
3478
                // When mState is zero we expect either a US-ASCII character or a
3479
                // multi-octet sequence.
3480 102
                if ((0x80 & $in) === 0) {
3481
                    // US-ASCII, pass straight through.
3482 98
                    $mBytes = 1;
3483 83
                } elseif ((0xE0 & $in) === 0xC0) {
3484
                    // First octet of 2 octet sequence.
3485 74
                    $mUcs4 = $in;
3486 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3487 74
                    $mState = 1;
3488 74
                    $mBytes = 2;
3489 58
                } elseif ((0xF0 & $in) === 0xE0) {
3490
                    // First octet of 3 octet sequence.
3491 41
                    $mUcs4 = $in;
3492 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3493 41
                    $mState = 2;
3494 41
                    $mBytes = 3;
3495 30
                } elseif ((0xF8 & $in) === 0xF0) {
3496
                    // First octet of 4 octet sequence.
3497 19
                    $mUcs4 = $in;
3498 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3499 19
                    $mState = 3;
3500 19
                    $mBytes = 4;
3501 13
                } elseif ((0xFC & $in) === 0xF8) {
3502
                    /* First octet of 5 octet sequence.
3503
                     *
3504
                     * This is illegal because the encoded codepoint must be either
3505
                     * (a) not the shortest form or
3506
                     * (b) outside the Unicode range of 0-0x10FFFF.
3507
                     * Rather than trying to resynchronize, we will carry on until the end
3508
                     * of the sequence and let the later error handling code catch it.
3509
                     */
3510 5
                    $mUcs4 = $in;
3511 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3512 5
                    $mState = 4;
3513 5
                    $mBytes = 5;
3514 10
                } elseif ((0xFE & $in) === 0xFC) {
3515
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3516 5
                    $mUcs4 = $in;
3517 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3518 5
                    $mState = 5;
3519 5
                    $mBytes = 6;
3520
                } else {
3521
                    // Current octet is neither in the US-ASCII range nor a legal first
3522
                    // octet of a multi-octet sequence.
3523 102
                    return false;
3524
                }
3525
            } else {
3526
                // When mState is non-zero, we expect a continuation of the multi-octet
3527
                // sequence
3528 83
                if ((0xC0 & $in) === 0x80) {
3529
                    // Legal continuation.
3530 75
                    $shift = ($mState - 1) * 6;
3531 75
                    $tmp = $in;
3532 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3533 75
                    $mUcs4 |= $tmp;
3534
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3535
                    // Unicode code point to be output.
3536 75
                    if (--$mState === 0) {
3537
                        // Check for illegal sequences and code points.
3538
                        //
3539
                        // From Unicode 3.1, non-shortest form is illegal
3540
                        if (
3541 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3542
                            ||
3543 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3544
                            ||
3545 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3546
                            ||
3547 75
                            ($mBytes > 4)
3548
                            ||
3549
                            // From Unicode 3.2, surrogate characters are illegal.
3550 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3551
                            ||
3552
                            // Code points outside the Unicode range are illegal.
3553 75
                            ($mUcs4 > 0x10FFFF)
3554
                        ) {
3555 8
                            return false;
3556
                        }
3557
                        // initialize UTF8 cache
3558 75
                        $mState = 0;
3559 75
                        $mUcs4 = 0;
3560 75
                        $mBytes = 1;
3561
                    }
3562
                } else {
3563
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3564
                    // Incomplete multi-octet sequence.
3565 36
                    return false;
3566
                }
3567
            }
3568
        }
3569
3570 66
        return true;
3571
    }
3572
3573
    /**
3574
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3575
     * Decodes a JSON string
3576
     *
3577
     * @see http://php.net/manual/en/function.json-decode.php
3578
     *
3579
     * @param string $json    <p>
3580
     *                        The <i>json</i> string being decoded.
3581
     *                        </p>
3582
     *                        <p>
3583
     *                        This function only works with UTF-8 encoded strings.
3584
     *                        </p>
3585
     *                        <p>PHP implements a superset of
3586
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3587
     *                        only supports these values when they are nested inside an array or an object.
3588
     *                        </p>
3589
     * @param bool   $assoc   [optional] <p>
3590
     *                        When <b>TRUE</b>, returned objects will be converted into
3591
     *                        associative arrays.
3592
     *                        </p>
3593
     * @param int    $depth   [optional] <p>
3594
     *                        User specified recursion depth.
3595
     *                        </p>
3596
     * @param int    $options [optional] <p>
3597
     *                        Bitmask of JSON decode options. Currently only
3598
     *                        <b>JSON_BIGINT_AS_STRING</b>
3599
     *                        is supported (default is to cast large integers as floats)
3600
     *                        </p>
3601
     *
3602
     * @return mixed
3603
     *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3604
     *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3605
     *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3606
     *                is deeper than the recursion limit.
3607
     */
3608 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3609
    {
3610 24
        $json = self::filter($json);
3611
3612 24
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3613
            self::checkForSupport();
3614
        }
3615
3616 24
        if (self::$SUPPORT['json'] === false) {
3617
            throw new \RuntimeException('ext-json: is not installed');
3618
        }
3619
3620
        /** @noinspection PhpComposerExtensionStubsInspection */
3621 24
        return \json_decode($json, $assoc, $depth, $options);
3622
    }
3623
3624
    /**
3625
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3626
     * Returns the JSON representation of a value.
3627
     *
3628
     * @see http://php.net/manual/en/function.json-encode.php
3629
     *
3630
     * @param mixed $value   <p>
3631
     *                       The <i>value</i> being encoded. Can be any type except
3632
     *                       a resource.
3633
     *                       </p>
3634
     *                       <p>
3635
     *                       All string data must be UTF-8 encoded.
3636
     *                       </p>
3637
     *                       <p>PHP implements a superset of
3638
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3639
     *                       only supports these values when they are nested inside an array or an object.
3640
     *                       </p>
3641
     * @param int   $options [optional] <p>
3642
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3643
     *                       <b>JSON_HEX_TAG</b>,
3644
     *                       <b>JSON_HEX_AMP</b>,
3645
     *                       <b>JSON_HEX_APOS</b>,
3646
     *                       <b>JSON_NUMERIC_CHECK</b>,
3647
     *                       <b>JSON_PRETTY_PRINT</b>,
3648
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3649
     *                       <b>JSON_FORCE_OBJECT</b>,
3650
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3651
     *                       constants is described on
3652
     *                       the JSON constants page.
3653
     *                       </p>
3654
     * @param int   $depth   [optional] <p>
3655
     *                       Set the maximum depth. Must be greater than zero.
3656
     *                       </p>
3657
     *
3658
     * @return false|string
3659
     *                      A JSON encoded <strong>string</strong> on success or<br>
3660
     *                      <strong>FALSE</strong> on failure
3661
     */
3662 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3663
    {
3664 5
        $value = self::filter($value);
3665
3666 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3667
            self::checkForSupport();
3668
        }
3669
3670 5
        if (self::$SUPPORT['json'] === false) {
3671
            throw new \RuntimeException('ext-json: is not installed');
3672
        }
3673
3674
        /** @noinspection PhpComposerExtensionStubsInspection */
3675 5
        return \json_encode($value, $options, $depth);
3676
    }
3677
3678
    /**
3679
     * Checks whether JSON is available on the server.
3680
     *
3681
     * @return bool
3682
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3683
     */
3684
    public static function json_loaded(): bool
3685
    {
3686
        return \function_exists('json_decode');
3687
    }
3688
3689
    /**
3690
     * Makes string's first char lowercase.
3691
     *
3692
     * @param string      $str                   <p>The input string</p>
3693
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3694
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3695
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3696
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3697
     *
3698
     * @return string the resulting string
3699
     */
3700 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3701
    {
3702 46
        $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3703 46
        if ($strPartTwo === false) {
3704
            $strPartTwo = '';
3705
        }
3706
3707 46
        $strPartOne = self::strtolower(
3708 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3709 46
            $encoding,
3710 46
            $cleanUtf8,
3711 46
            $lang,
3712 46
            $tryToKeepStringLength
3713
        );
3714
3715 46
        return $strPartOne . $strPartTwo;
3716
    }
3717
3718
    /**
3719
     * alias for "UTF8::lcfirst()"
3720
     *
3721
     * @see UTF8::lcfirst()
3722
     *
3723
     * @param string      $str
3724
     * @param string      $encoding
3725
     * @param bool        $cleanUtf8
3726
     * @param string|null $lang
3727
     * @param bool        $tryToKeepStringLength
3728
     *
3729
     * @return string
3730
     */
3731 2
    public static function lcword(
3732
        string $str,
3733
        string $encoding = 'UTF-8',
3734
        bool $cleanUtf8 = false,
3735
        string $lang = null,
3736
        bool $tryToKeepStringLength = false
3737
    ): string {
3738 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3739
    }
3740
3741
    /**
3742
     * Lowercase for all words in the string.
3743
     *
3744
     * @param string      $str                   <p>The input string.</p>
3745
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3746
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3747
     *                                           a new word.</p>
3748
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3749
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3750
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3751
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3752
     *
3753
     * @return string
3754
     */
3755 2
    public static function lcwords(
3756
        string $str,
3757
        array $exceptions = [],
3758
        string $charlist = '',
3759
        string $encoding = 'UTF-8',
3760
        bool $cleanUtf8 = false,
3761
        string $lang = null,
3762
        bool $tryToKeepStringLength = false
3763
    ): string {
3764 2
        if (!$str) {
3765 2
            return '';
3766
        }
3767
3768 2
        $words = self::str_to_words($str, $charlist);
3769 2
        $newWords = [];
3770
3771 2
        if (\count($exceptions) > 0) {
3772 2
            $useExceptions = true;
3773
        } else {
3774 2
            $useExceptions = false;
3775
        }
3776
3777 2
        foreach ($words as $word) {
3778 2
            if (!$word) {
3779 2
                continue;
3780
            }
3781
3782
            if (
3783 2
                $useExceptions === false
3784
                ||
3785
                (
3786 2
                    $useExceptions === true
3787
                    &&
3788 2
                    !\in_array($word, $exceptions, true)
3789
                )
3790
            ) {
3791 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3792
            }
3793
3794 2
            $newWords[] = $word;
3795
        }
3796
3797 2
        return \implode('', $newWords);
3798
    }
3799
3800
    /**
3801
     * alias for "UTF8::lcfirst()"
3802
     *
3803
     * @see UTF8::lcfirst()
3804
     *
3805
     * @param string      $str
3806
     * @param string      $encoding
3807
     * @param bool        $cleanUtf8
3808
     * @param string|null $lang
3809
     * @param bool        $tryToKeepStringLength
3810
     *
3811
     * @return string
3812
     */
3813 5
    public static function lowerCaseFirst(
3814
        string $str,
3815
        string $encoding = 'UTF-8',
3816
        bool $cleanUtf8 = false,
3817
        string $lang = null,
3818
        bool $tryToKeepStringLength = false
3819
    ): string {
3820 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3821
    }
3822
3823
    /**
3824
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3825
     *
3826
     * @param string $str   <p>The string to be trimmed</p>
3827
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3828
     *
3829
     * @return string the string with unwanted characters stripped from the left
3830
     */
3831 22
    public static function ltrim(string $str = '', $chars = \INF): string
3832
    {
3833 22
        if ($str === '') {
3834 3
            return '';
3835
        }
3836
3837
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3838 21
        if ($chars === \INF || !$chars) {
3839 14
            $pattern = "^[\pZ\pC]+";
3840
        } else {
3841 10
            $chars = \preg_quote($chars, '/');
3842 10
            $pattern = "^[${chars}]+";
3843
        }
3844
3845 21
        return self::regex_replace($str, $pattern, '', '', '/');
3846
    }
3847
3848
    /**
3849
     * Returns the UTF-8 character with the maximum code point in the given data.
3850
     *
3851
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3852
     *
3853
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3854
     */
3855 2
    public static function max($arg)
3856
    {
3857 2
        if (\is_array($arg) === true) {
3858 2
            $arg = \implode('', $arg);
3859
        }
3860
3861 2
        $codepoints = self::codepoints($arg, false);
3862 2
        if (\count($codepoints) === 0) {
3863 2
            return null;
3864
        }
3865
3866 2
        $codepoint_max = \max($codepoints);
3867
3868 2
        return self::chr($codepoint_max);
3869
    }
3870
3871
    /**
3872
     * Calculates and returns the maximum number of bytes taken by any
3873
     * UTF-8 encoded character in the given string.
3874
     *
3875
     * @param string $str <p>The original Unicode string.</p>
3876
     *
3877
     * @return int max byte lengths of the given chars
3878
     */
3879 2
    public static function max_chr_width(string $str): int
3880
    {
3881 2
        $bytes = self::chr_size_list($str);
3882 2
        if (\count($bytes) > 0) {
3883 2
            return (int) \max($bytes);
3884
        }
3885
3886 2
        return 0;
3887
    }
3888
3889
    /**
3890
     * Checks whether mbstring is available on the server.
3891
     *
3892
     * @return bool
3893
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3894
     */
3895 27
    public static function mbstring_loaded(): bool
3896
    {
3897 27
        $return = \extension_loaded('mbstring') ? true : false;
3898
3899 27
        if ($return === true) {
3900 27
            \mb_internal_encoding('UTF-8');
3901
        }
3902
3903 27
        return $return;
3904
    }
3905
3906
    /**
3907
     * Checks whether mbstring "overloaded" is active on the server.
3908
     *
3909
     * @return bool
3910
     */
3911
    private static function mbstring_overloaded(): bool
3912
    {
3913
        /**
3914
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3915
         */
3916
3917
        /** @noinspection PhpComposerExtensionStubsInspection */
3918
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3919
        return \defined('MB_OVERLOAD_STRING')
3920
               &&
3921
               (@\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
3922
    }
3923
3924
    /**
3925
     * Returns the UTF-8 character with the minimum code point in the given data.
3926
     *
3927
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3928
     *
3929
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3930
     */
3931 2
    public static function min($arg)
3932
    {
3933 2
        if (\is_array($arg) === true) {
3934 2
            $arg = \implode('', $arg);
3935
        }
3936
3937 2
        $codepoints = self::codepoints($arg, false);
3938 2
        if (\count($codepoints) === 0) {
3939 2
            return null;
3940
        }
3941
3942 2
        $codepoint_min = \min($codepoints);
3943
3944 2
        return self::chr($codepoint_min);
3945
    }
3946
3947
    /**
3948
     * alias for "UTF8::normalize_encoding()"
3949
     *
3950
     * @see        UTF8::normalize_encoding()
3951
     *
3952
     * @param mixed $encoding
3953
     * @param mixed $fallback
3954
     *
3955
     * @return mixed
3956
     *
3957
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3958
     */
3959 2
    public static function normalizeEncoding($encoding, $fallback = '')
3960
    {
3961 2
        return self::normalize_encoding($encoding, $fallback);
3962
    }
3963
3964
    /**
3965
     * Normalize the encoding-"name" input.
3966
     *
3967
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3968
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3969
     *
3970
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3971
     */
3972 341
    public static function normalize_encoding($encoding, $fallback = '')
3973
    {
3974 341
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3975
3976
        // init
3977 341
        $encoding = (string) $encoding;
3978
3979
        if (
3980 341
            !$encoding
3981
            ||
3982 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3983
            ||
3984 341
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3985
        ) {
3986 296
            return $fallback;
3987
        }
3988
3989
        if (
3990 49
            $encoding === 'UTF-8'
3991
            ||
3992 49
            $encoding === 'UTF8'
3993
        ) {
3994 22
            return 'UTF-8';
3995
        }
3996
3997
        if (
3998 42
            $encoding === '8BIT'
3999
            ||
4000 42
            $encoding === 'BINARY'
4001
        ) {
4002
            return 'CP850';
4003
        }
4004
4005
        if (
4006 42
            $encoding === 'HTML'
4007
            ||
4008 42
            $encoding === 'HTML-ENTITIES'
4009
        ) {
4010 2
            return 'HTML-ENTITIES';
4011
        }
4012
4013 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4014 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4015
        }
4016
4017 6
        if (self::$ENCODINGS === null) {
4018 1
            self::$ENCODINGS = self::getData('encodings');
4019
        }
4020
4021 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4022 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4023
4024 4
            return $encoding;
4025
        }
4026
4027 5
        $encodingOrig = $encoding;
4028 5
        $encoding = \strtoupper($encoding);
4029 5
        $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4030
4031
        $equivalences = [
4032 5
            'ISO8859'     => 'ISO-8859-1',
4033
            'ISO88591'    => 'ISO-8859-1',
4034
            'ISO'         => 'ISO-8859-1',
4035
            'LATIN'       => 'ISO-8859-1',
4036
            'LATIN1'      => 'ISO-8859-1', // Western European
4037
            'ISO88592'    => 'ISO-8859-2',
4038
            'LATIN2'      => 'ISO-8859-2', // Central European
4039
            'ISO88593'    => 'ISO-8859-3',
4040
            'LATIN3'      => 'ISO-8859-3', // Southern European
4041
            'ISO88594'    => 'ISO-8859-4',
4042
            'LATIN4'      => 'ISO-8859-4', // Northern European
4043
            'ISO88595'    => 'ISO-8859-5',
4044
            'ISO88596'    => 'ISO-8859-6', // Greek
4045
            'ISO88597'    => 'ISO-8859-7',
4046
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4047
            'ISO88599'    => 'ISO-8859-9',
4048
            'LATIN5'      => 'ISO-8859-9', // Turkish
4049
            'ISO885911'   => 'ISO-8859-11',
4050
            'TIS620'      => 'ISO-8859-11', // Thai
4051
            'ISO885910'   => 'ISO-8859-10',
4052
            'LATIN6'      => 'ISO-8859-10', // Nordic
4053
            'ISO885913'   => 'ISO-8859-13',
4054
            'LATIN7'      => 'ISO-8859-13', // Baltic
4055
            'ISO885914'   => 'ISO-8859-14',
4056
            'LATIN8'      => 'ISO-8859-14', // Celtic
4057
            'ISO885915'   => 'ISO-8859-15',
4058
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4059
            'ISO885916'   => 'ISO-8859-16',
4060
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4061
            'CP1250'      => 'WINDOWS-1250',
4062
            'WIN1250'     => 'WINDOWS-1250',
4063
            'WINDOWS1250' => 'WINDOWS-1250',
4064
            'CP1251'      => 'WINDOWS-1251',
4065
            'WIN1251'     => 'WINDOWS-1251',
4066
            'WINDOWS1251' => 'WINDOWS-1251',
4067
            'CP1252'      => 'WINDOWS-1252',
4068
            'WIN1252'     => 'WINDOWS-1252',
4069
            'WINDOWS1252' => 'WINDOWS-1252',
4070
            'CP1253'      => 'WINDOWS-1253',
4071
            'WIN1253'     => 'WINDOWS-1253',
4072
            'WINDOWS1253' => 'WINDOWS-1253',
4073
            'CP1254'      => 'WINDOWS-1254',
4074
            'WIN1254'     => 'WINDOWS-1254',
4075
            'WINDOWS1254' => 'WINDOWS-1254',
4076
            'CP1255'      => 'WINDOWS-1255',
4077
            'WIN1255'     => 'WINDOWS-1255',
4078
            'WINDOWS1255' => 'WINDOWS-1255',
4079
            'CP1256'      => 'WINDOWS-1256',
4080
            'WIN1256'     => 'WINDOWS-1256',
4081
            'WINDOWS1256' => 'WINDOWS-1256',
4082
            'CP1257'      => 'WINDOWS-1257',
4083
            'WIN1257'     => 'WINDOWS-1257',
4084
            'WINDOWS1257' => 'WINDOWS-1257',
4085
            'CP1258'      => 'WINDOWS-1258',
4086
            'WIN1258'     => 'WINDOWS-1258',
4087
            'WINDOWS1258' => 'WINDOWS-1258',
4088
            'UTF16'       => 'UTF-16',
4089
            'UTF32'       => 'UTF-32',
4090
            'UTF8'        => 'UTF-8',
4091
            'UTF'         => 'UTF-8',
4092
            'UTF7'        => 'UTF-7',
4093
            '8BIT'        => 'CP850',
4094
            'BINARY'      => 'CP850',
4095
        ];
4096
4097 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4098 4
            $encoding = $equivalences[$encodingUpperHelper];
4099
        }
4100
4101 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4102
4103 5
        return $encoding;
4104
    }
4105
4106
    /**
4107
     * Standardize line ending to unix-like.
4108
     *
4109
     * @param string $str
4110
     *
4111
     * @return string
4112
     */
4113 5
    public static function normalize_line_ending(string $str): string
4114
    {
4115 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
4116
    }
4117
4118
    /**
4119
     * Normalize some MS Word special characters.
4120
     *
4121
     * @param string $str <p>The string to be normalized.</p>
4122
     *
4123
     * @return string
4124
     */
4125 38
    public static function normalize_msword(string $str): string
4126
    {
4127 38
        if ($str === '') {
4128 2
            return '';
4129
        }
4130
4131 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
4132 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4133
4134 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4135 1
            if (self::$UTF8_MSWORD === null) {
4136 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4137
            }
4138
4139 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4140 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4141
        }
4142
4143 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4144
    }
4145
4146
    /**
4147
     * Normalize the whitespace.
4148
     *
4149
     * @param string $str                     <p>The string to be normalized.</p>
4150
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4151
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4152
     *                                        bidirectional text chars.</p>
4153
     *
4154
     * @return string
4155
     */
4156 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4157
    {
4158 86
        if ($str === '') {
4159 9
            return '';
4160
        }
4161
4162 86
        static $WHITESPACE_CACHE = [];
4163 86
        $cacheKey = (int) $keepNonBreakingSpace;
4164
4165 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4166 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4167
4168 2
            if ($keepNonBreakingSpace === true) {
4169 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4170
            }
4171
4172 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4173
        }
4174
4175 86
        if ($keepBidiUnicodeControls === false) {
4176 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4177
4178 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4179 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4180
            }
4181
4182 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4183
        }
4184
4185 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4186
    }
4187
4188
    /**
4189
     * Calculates Unicode code point of the given UTF-8 encoded character.
4190
     *
4191
     * INFO: opposite to UTF8::chr()
4192
     *
4193
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4194
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4195
     *
4196
     * @return int
4197
     *             Unicode code point of the given character,<br>
4198
     *             0 on invalid UTF-8 byte sequence
4199
     */
4200 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4201
    {
4202
        // init
4203 30
        $chr = (string) $chr;
4204
4205 30
        static $CHAR_CACHE = [];
4206
4207
        // save the original string
4208 30
        $chr_orig = $chr;
4209
4210 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4211 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4212
        }
4213
4214 30
        $cacheKey = $chr_orig . $encoding;
4215 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4216 23
            return $CHAR_CACHE[$cacheKey];
4217
        }
4218
4219 25
        if (self::$ORD === null) {
4220
            self::$ORD = self::getData('ord');
4221
        }
4222
4223 25
        if (isset(self::$ORD[$chr])) {
4224 25
            return self::$ORD[$chr];
4225
        }
4226
4227
        // check again, if it's still not UTF-8
4228 7
        if ($encoding !== 'UTF-8') {
4229 1
            $chr = self::encode($encoding, $chr);
4230
        }
4231
4232 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4233
            self::checkForSupport();
4234
        }
4235
4236 7
        if (self::$SUPPORT['intlChar'] === true) {
4237
            /** @noinspection PhpComposerExtensionStubsInspection */
4238 6
            $code = \IntlChar::ord($chr);
4239 6
            if ($code) {
4240 5
                return $CHAR_CACHE[$cacheKey] = $code;
4241
            }
4242
        }
4243
4244
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4245 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4246 2
        $code = $chr ? $chr[1] : 0;
4247
4248 2
        if ($code >= 0xF0 && isset($chr[4])) {
4249
            /** @noinspection UnnecessaryCastingInspection */
4250
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4251
        }
4252
4253 2
        if ($code >= 0xE0 && isset($chr[3])) {
4254
            /** @noinspection UnnecessaryCastingInspection */
4255 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4256
        }
4257
4258 2
        if ($code >= 0xC0 && isset($chr[2])) {
4259
            /** @noinspection UnnecessaryCastingInspection */
4260 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4261
        }
4262
4263 1
        return $CHAR_CACHE[$cacheKey] = $code;
4264
    }
4265
4266
    /**
4267
     * Parses the string into an array (into the the second parameter).
4268
     *
4269
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4270
     *          if the second parameter is not set!
4271
     *
4272
     * @see http://php.net/manual/en/function.parse-str.php
4273
     *
4274
     * @param string $str       <p>The input string.</p>
4275
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4276
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4277
     *
4278
     * @return bool
4279
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4280
     */
4281 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4282
    {
4283 2
        if ($cleanUtf8 === true) {
4284 2
            $str = self::clean($str);
4285
        }
4286
4287 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4288
            self::checkForSupport();
4289
        }
4290
4291 2
        if (self::$SUPPORT['mbstring'] === true) {
4292 2
            $return = \mb_parse_str($str, $result);
4293
4294 2
            return !($return === false || empty($result));
4295
        }
4296
4297
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4298
        \parse_str($str, $result);
4299
4300
        return !empty($result);
4301
    }
4302
4303
    /**
4304
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4305
     *
4306
     * @return bool
4307
     *              <strong>true</strong> if support is available,<br>
4308
     *              <strong>false</strong> otherwise
4309
     */
4310 102
    public static function pcre_utf8_support(): bool
4311
    {
4312
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4313 102
        return (bool) @\preg_match('//u', '');
4314
    }
4315
4316
    /**
4317
     * Create an array containing a range of UTF-8 characters.
4318
     *
4319
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4320
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4321
     *
4322
     * @return string[]
4323
     */
4324 2
    public static function range($var1, $var2): array
4325
    {
4326 2
        if (!$var1 || !$var2) {
4327 2
            return [];
4328
        }
4329
4330 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4331
            self::checkForSupport();
4332
        }
4333
4334 2
        if (self::$SUPPORT['ctype'] === false) {
4335
            throw new \RuntimeException('ext-ctype: is not installed');
4336
        }
4337
4338
        /** @noinspection PhpComposerExtensionStubsInspection */
4339 2
        if (\ctype_digit((string) $var1)) {
4340 2
            $start = (int) $var1;
4341 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4342
            $start = (int) self::hex_to_int($var1);
4343
        } else {
4344 2
            $start = self::ord($var1);
4345
        }
4346
4347 2
        if (!$start) {
4348
            return [];
4349
        }
4350
4351
        /** @noinspection PhpComposerExtensionStubsInspection */
4352 2
        if (\ctype_digit((string) $var2)) {
4353 2
            $end = (int) $var2;
4354 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4355
            $end = (int) self::hex_to_int($var2);
4356
        } else {
4357 2
            $end = self::ord($var2);
4358
        }
4359
4360 2
        if (!$end) {
4361
            return [];
4362
        }
4363
4364 2
        return \array_map(
4365
            [
4366 2
                self::class,
4367
                'chr',
4368
            ],
4369 2
            \range($start, $end)
4370
        );
4371
    }
4372
4373
    /**
4374
     * Multi decode html entity & fix urlencoded-win1252-chars.
4375
     *
4376
     * e.g:
4377
     * 'test+test'                     => 'test+test'
4378
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4379
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4380
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4381
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4382
     * 'Düsseldorf'                   => 'Düsseldorf'
4383
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4384
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4385
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4386
     *
4387
     * @param string $str          <p>The input string.</p>
4388
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4389
     *
4390
     * @return string
4391
     */
4392 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4393
    {
4394 3
        if ($str === '') {
4395 2
            return '';
4396
        }
4397
4398 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4399 3
        if (\preg_match($pattern, $str)) {
4400 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4401
        }
4402
4403 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4404
4405
        do {
4406 3
            $str_compare = $str;
4407
4408 3
            $str = self::fix_simple_utf8(
4409 3
                \rawurldecode(
4410 3
                    self::html_entity_decode(
4411 3
                        self::to_utf8($str),
4412 3
                        $flags
4413
                    )
4414
                )
4415
            );
4416 3
        } while ($multi_decode === true && $str_compare !== $str);
4417
4418 3
        return $str;
4419
    }
4420
4421
    /**
4422
     * @param array $strings
4423
     * @param bool  $removeEmptyValues
4424
     * @param int   $removeShortValues
4425
     *
4426
     * @return array
4427
     */
4428 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4429
    {
4430
        // init
4431 2
        $return = [];
4432
4433 2
        foreach ($strings as $str) {
4434
            if (
4435 2
                $removeShortValues !== null
4436
                &&
4437 2
                self::strlen($str) <= $removeShortValues
4438
            ) {
4439 2
                continue;
4440
            }
4441
4442
            if (
4443 2
                $removeEmptyValues === true
4444
                &&
4445 2
                \trim($str) === ''
4446
            ) {
4447 2
                continue;
4448
            }
4449
4450 2
            $return[] = $str;
4451
        }
4452
4453 2
        return $return;
4454
    }
4455
4456
    /**
4457
     * Replaces all occurrences of $pattern in $str by $replacement.
4458
     *
4459
     * @param string $str         <p>The input string.</p>
4460
     * @param string $pattern     <p>The regular expression pattern.</p>
4461
     * @param string $replacement <p>The string to replace with.</p>
4462
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4463
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4464
     *
4465
     * @return string
4466
     */
4467 259
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4468
    {
4469 259
        if ($options === 'msr') {
4470 9
            $options = 'ms';
4471
        }
4472
4473
        // fallback
4474 259
        if (!$delimiter) {
4475
            $delimiter = '/';
4476
        }
4477
4478 259
        return (string) \preg_replace(
4479 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4480 259
            $replacement,
4481 259
            $str
4482
        );
4483
    }
4484
4485
    /**
4486
     * alias for "UTF8::remove_bom()"
4487
     *
4488
     * @see        UTF8::remove_bom()
4489
     *
4490
     * @param string $str
4491
     *
4492
     * @return string
4493
     *
4494
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4495
     */
4496
    public static function removeBOM(string $str): string
4497
    {
4498
        return self::remove_bom($str);
4499
    }
4500
4501
    /**
4502
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4503
     *
4504
     * @param string $str <p>The input string.</p>
4505
     *
4506
     * @return string string without UTF-BOM
4507
     */
4508 79
    public static function remove_bom(string $str): string
4509
    {
4510 79
        if ($str === '') {
4511 7
            return '';
4512
        }
4513
4514 79
        $strLength = self::strlen_in_byte($str);
4515 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4516 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4517 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4518 10
                if ($strTmp === false) {
4519
                    return '';
4520
                }
4521
4522 10
                $strLength -= $bomByteLength;
4523
4524 79
                $str = (string) $strTmp;
4525
            }
4526
        }
4527
4528 79
        return $str;
4529
    }
4530
4531
    /**
4532
     * Removes duplicate occurrences of a string in another string.
4533
     *
4534
     * @param string          $str  <p>The base string.</p>
4535
     * @param string|string[] $what <p>String to search for in the base string.</p>
4536
     *
4537
     * @return string the result string with removed duplicates
4538
     */
4539 2
    public static function remove_duplicates(string $str, $what = ' '): string
4540
    {
4541 2
        if (\is_string($what) === true) {
4542 2
            $what = [$what];
4543
        }
4544
4545 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4546
            /** @noinspection ForeachSourceInspection */
4547 2
            foreach ($what as $item) {
4548 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4549
            }
4550
        }
4551
4552 2
        return $str;
4553
    }
4554
4555
    /**
4556
     * Remove html via "strip_tags()" from the string.
4557
     *
4558
     * @param string $str
4559
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4560
     *                              not be stripped. Default: null
4561
     *                              </p>
4562
     *
4563
     * @return string
4564
     */
4565 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4566
    {
4567 6
        return \strip_tags($str, $allowableTags);
4568
    }
4569
4570
    /**
4571
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4572
     *
4573
     * @param string $str
4574
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4575
     *
4576
     * @return string
4577
     */
4578 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4579
    {
4580 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4581
    }
4582
4583
    /**
4584
     * Remove invisible characters from a string.
4585
     *
4586
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4587
     *
4588
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4589
     *
4590
     * @param string $str
4591
     * @param bool   $url_encoded
4592
     * @param string $replacement
4593
     *
4594
     * @return string
4595
     */
4596 113
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4597
    {
4598
        // init
4599 113
        $non_displayables = [];
4600
4601
        // every control character except newline (dec 10),
4602
        // carriage return (dec 13) and horizontal tab (dec 09)
4603 113
        if ($url_encoded) {
4604 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4605 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4606
        }
4607
4608 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4609
4610
        do {
4611 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4612 113
        } while ($count !== 0);
4613
4614 113
        return $str;
4615
    }
4616
4617
    /**
4618
     * Returns a new string with the prefix $substring removed, if present.
4619
     *
4620
     * @param string $str
4621
     * @param string $substring <p>The prefix to remove.</p>
4622
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4623
     *
4624
     * @return string string without the prefix $substring
4625
     */
4626 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4627
    {
4628 12
        if (self::str_starts_with($str, $substring)) {
4629 6
            return (string) self::substr(
4630 6
                $str,
4631 6
                self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4631
                /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4632 6
                null,
4633 6
                $encoding
4634
            );
4635
        }
4636
4637 6
        return $str;
4638
    }
4639
4640
    /**
4641
     * Returns a new string with the suffix $substring removed, if present.
4642
     *
4643
     * @param string $str
4644
     * @param string $substring <p>The suffix to remove.</p>
4645
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4646
     *
4647
     * @return string string having a $str without the suffix $substring
4648
     */
4649 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4650
    {
4651 12
        if (self::str_ends_with($str, $substring)) {
4652 6
            return (string) self::substr(
4653 6
                $str,
4654 6
                0,
4655 6
                self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4656
            );
4657
        }
4658
4659 6
        return $str;
4660
    }
4661
4662
    /**
4663
     * Replaces all occurrences of $search in $str by $replacement.
4664
     *
4665
     * @param string $str           <p>The input string.</p>
4666
     * @param string $search        <p>The needle to search for.</p>
4667
     * @param string $replacement   <p>The string to replace with.</p>
4668
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4669
     *
4670
     * @return string string after the replacements
4671
     */
4672 29
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4673
    {
4674 29
        if ($caseSensitive) {
4675 22
            return self::str_replace($search, $replacement, $str);
4676
        }
4677
4678 7
        return self::str_ireplace($search, $replacement, $str);
4679
    }
4680
4681
    /**
4682
     * Replaces all occurrences of $search in $str by $replacement.
4683
     *
4684
     * @param string       $str           <p>The input string.</p>
4685
     * @param array        $search        <p>The elements to search for.</p>
4686
     * @param array|string $replacement   <p>The string to replace with.</p>
4687
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4688
     *
4689
     * @return string string after the replacements
4690
     */
4691 30
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4692
    {
4693 30
        if ($caseSensitive) {
4694 23
            return self::str_replace($search, $replacement, $str);
4695
        }
4696
4697 7
        return self::str_ireplace($search, $replacement, $str);
4698
    }
4699
4700
    /**
4701
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4702
     *
4703
     * @param string $str                <p>The input string</p>
4704
     * @param string $replacementChar    <p>The replacement character.</p>
4705
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4706
     *
4707
     * @return string
4708
     */
4709 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4710
    {
4711 62
        if ($str === '') {
4712 9
            return '';
4713
        }
4714
4715 62
        if ($processInvalidUtf8 === true) {
4716 62
            $replacementCharHelper = $replacementChar;
4717 62
            if ($replacementChar === '') {
4718 62
                $replacementCharHelper = 'none';
4719
            }
4720
4721 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4722
                self::checkForSupport();
4723
            }
4724
4725 62
            if (self::$SUPPORT['mbstring'] === false) {
4726
                // if there is no native support for "mbstring",
4727
                // then we need to clean the string before ...
4728
                $str = self::clean($str);
4729
            }
4730
4731
            // always fallback via symfony polyfill
4732 62
            $save = \mb_substitute_character();
4733 62
            \mb_substitute_character($replacementCharHelper);
4734 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4735 62
            \mb_substitute_character($save);
4736
4737 62
            if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4738 62
                $str = $strTmp;
4739
            } else {
4740
                $str = '';
4741
            }
4742
        }
4743
4744 62
        return \str_replace(
4745
            [
4746 62
                "\xEF\xBF\xBD",
4747
                '�',
4748
            ],
4749
            [
4750 62
                $replacementChar,
4751 62
                $replacementChar,
4752
            ],
4753 62
            $str
4754
        );
4755
    }
4756
4757
    /**
4758
     * Strip whitespace or other characters from end of a UTF-8 string.
4759
     *
4760
     * @param string $str   <p>The string to be trimmed.</p>
4761
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4762
     *
4763
     * @return string the string with unwanted characters stripped from the right
4764
     */
4765 22
    public static function rtrim(string $str = '', $chars = \INF): string
4766
    {
4767 22
        if ($str === '') {
4768 3
            return '';
4769
        }
4770
4771
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4772 21
        if ($chars === \INF || !$chars) {
4773 16
            $pattern = "[\pZ\pC]+\$";
4774
        } else {
4775 8
            $chars = \preg_quote($chars, '/');
4776 8
            $pattern = "[${chars}]+\$";
4777
        }
4778
4779 21
        return self::regex_replace($str, $pattern, '', '', '/');
4780
    }
4781
4782
    /**
4783
     * rxClass
4784
     *
4785
     * @param string $s
4786
     * @param string $class
4787
     *
4788
     * @return string
4789
     */
4790 42
    private static function rxClass(string $s, string $class = ''): string
4791
    {
4792 42
        static $RX_CLASSS_CACHE = [];
4793
4794 42
        $cacheKey = $s . $class;
4795
4796 42
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4797 30
            return $RX_CLASSS_CACHE[$cacheKey];
4798
        }
4799
4800
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4801 16
        $class = [$class];
4802
4803
        /** @noinspection SuspiciousLoopInspection */
4804 16
        foreach (self::str_split($s) as $s) {
4805 15
            if ($s === '-') {
4806
                $class[0] = '-' . $class[0];
4807 15
            } elseif (!isset($s[2])) {
4808 15
                $class[0] .= \preg_quote($s, '/');
4809 1
            } elseif (self::strlen($s) === 1) {
4810 1
                $class[0] .= $s;
4811
            } else {
4812 15
                $class[] = $s;
4813
            }
4814
        }
4815
4816 16
        if ($class[0]) {
4817 16
            $class[0] = '[' . $class[0] . ']';
4818
        }
4819
4820 16
        if (\count($class) === 1) {
4821 16
            $return = $class[0];
4822
        } else {
4823
            $return = '(?:' . \implode('|', $class) . ')';
4824
        }
4825
4826 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
4827
4828 16
        return $return;
4829
    }
4830
4831
    /**
4832
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4833
     */
4834 2
    public static function showSupport()
4835
    {
4836 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4837
            self::checkForSupport();
4838
        }
4839
4840 2
        echo '<pre>';
4841 2
        foreach (self::$SUPPORT as $key => $value) {
4842 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4843
        }
4844 2
        echo '</pre>';
4845 2
    }
4846
4847
    /**
4848
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4849
     *
4850
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4851
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4852
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4853
     *
4854
     * @return string the HTML numbered entity
4855
     */
4856 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4857
    {
4858 2
        if ($char === '') {
4859 2
            return '';
4860
        }
4861
4862
        if (
4863 2
            $keepAsciiChars === true
4864
            &&
4865 2
            self::is_ascii($char) === true
4866
        ) {
4867 2
            return $char;
4868
        }
4869
4870 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4871 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4872
        }
4873
4874 2
        return '&#' . self::ord($char, $encoding) . ';';
4875
    }
4876
4877
    /**
4878
     * @param string $str
4879
     * @param int    $tabLength
4880
     *
4881
     * @return string
4882
     */
4883 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4884
    {
4885 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4886
    }
4887
4888
    /**
4889
     * Convert a string to an array of Unicode characters.
4890
     *
4891
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4892
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4893
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4894
     *
4895
     * @return string[] an array containing chunks of the string
4896
     */
4897 87
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4898
    {
4899 87
        if ($length <= 0) {
4900 3
            return [];
4901
        }
4902
4903 86
        if (\is_array($str) === true) {
4904 2
            foreach ($str as $k => $v) {
4905 2
                $str[$k] = self::split($v, $length);
4906
            }
4907
4908 2
            return $str;
4909
        }
4910
4911
        // init
4912 86
        $str = (string) $str;
4913
4914 86
        if ($str === '') {
4915 13
            return [];
4916
        }
4917
4918
        // init
4919 83
        $ret = [];
4920
4921 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4922
            self::checkForSupport();
4923
        }
4924
4925 83
        if ($cleanUtf8 === true) {
4926 19
            $str = self::clean($str);
4927
        }
4928
4929 83
        if (self::$SUPPORT['pcre_utf8'] === true) {
4930 79
            \preg_match_all('/./us', $str, $retArray);
4931 79
            if (isset($retArray[0])) {
4932 79
                $ret = $retArray[0];
4933
            }
4934 79
            unset($retArray);
4935
        } else {
4936
4937
            // fallback
4938
4939 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4940
                self::checkForSupport();
4941
            }
4942
4943 8
            $len = self::strlen_in_byte($str);
4944
4945
            /** @noinspection ForeachInvariantsInspection */
4946 8
            for ($i = 0; $i < $len; $i++) {
4947 8
                if (($str[$i] & "\x80") === "\x00") {
4948 8
                    $ret[] = $str[$i];
4949
                } elseif (
4950 8
                    isset($str[$i + 1])
4951
                    &&
4952 8
                    ($str[$i] & "\xE0") === "\xC0"
4953
                ) {
4954 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4955 4
                        $ret[] = $str[$i] . $str[$i + 1];
4956
4957 4
                        $i++;
4958
                    }
4959
                } elseif (
4960 6
                    isset($str[$i + 2])
4961
                    &&
4962 6
                    ($str[$i] & "\xF0") === "\xE0"
4963
                ) {
4964
                    if (
4965 6
                        ($str[$i + 1] & "\xC0") === "\x80"
4966
                        &&
4967 6
                        ($str[$i + 2] & "\xC0") === "\x80"
4968
                    ) {
4969 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4970
4971 6
                        $i += 2;
4972
                    }
4973
                } elseif (
4974
                    isset($str[$i + 3])
4975
                    &&
4976
                    ($str[$i] & "\xF8") === "\xF0"
4977
                ) {
4978
                    if (
4979
                        ($str[$i + 1] & "\xC0") === "\x80"
4980
                        &&
4981
                        ($str[$i + 2] & "\xC0") === "\x80"
4982
                        &&
4983
                        ($str[$i + 3] & "\xC0") === "\x80"
4984
                    ) {
4985
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4986
4987
                        $i += 3;
4988
                    }
4989
                }
4990
            }
4991
        }
4992
4993 83
        if ($length > 1) {
4994 11
            $ret = \array_chunk($ret, $length);
4995
4996 11
            return \array_map(
4997
                function ($item) {
4998 11
                    return \implode('', $item);
4999 11
                },
5000 11
                $ret
5001
            );
5002
        }
5003
5004 76
        if (isset($ret[0]) && $ret[0] === '') {
5005
            return [];
5006
        }
5007
5008 76
        return $ret;
5009
    }
5010
5011
    /**
5012
     * Returns a camelCase version of the string. Trims surrounding spaces,
5013
     * capitalizes letters following digits, spaces, dashes and underscores,
5014
     * and removes spaces, dashes, as well as underscores.
5015
     *
5016
     * @param string      $str                   <p>The input string.</p>
5017
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
5018
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5019
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5020
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5021
     *
5022
     * @return string
5023
     */
5024 32
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
5025
    {
5026 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5027 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5028
5029 32
        $str = (string) \preg_replace_callback(
5030 32
            '/[-_\s]+(.)?/u',
5031
            function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5032 27
                if (isset($match[1])) {
5033 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5034
                }
5035
5036 1
                return '';
5037 32
            },
5038 32
            $str
5039
        );
5040
5041 32
        $str = (string) \preg_replace_callback(
5042 32
            '/[\d]+(.)?/u',
5043
            function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5044 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5045 32
            },
5046 32
            $str
5047
        );
5048
5049 32
        return $str;
5050
    }
5051
5052
    /**
5053
     * Returns the string with the first letter of each word capitalized,
5054
     * except for when the word is a name which shouldn't be capitalized.
5055
     *
5056
     * @param string $str
5057
     *
5058
     * @return string string with $str capitalized
5059
     */
5060 1
    public static function str_capitalize_name(string $str): string
5061
    {
5062 1
        $str = self::collapse_whitespace($str);
5063
5064 1
        $str = self::str_capitalize_name_helper($str, ' ');
5065
5066 1
        return self::str_capitalize_name_helper($str, '-');
5067
    }
5068
5069
    /**
5070
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5071
     *
5072
     * @param string $names
5073
     * @param string $delimiter
5074
     * @param string $encoding
5075
     *
5076
     * @return string
5077
     */
5078 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5079
    {
5080
        // init
5081 1
        $namesArray = \explode($delimiter, $names);
5082
5083 1
        if ($namesArray === false) {
5084
            return '';
5085
        }
5086
5087
        $specialCases = [
5088 1
            'names' => [
5089
                'ab',
5090
                'af',
5091
                'al',
5092
                'and',
5093
                'ap',
5094
                'bint',
5095
                'binte',
5096
                'da',
5097
                'de',
5098
                'del',
5099
                'den',
5100
                'der',
5101
                'di',
5102
                'dit',
5103
                'ibn',
5104
                'la',
5105
                'mac',
5106
                'nic',
5107
                'of',
5108
                'ter',
5109
                'the',
5110
                'und',
5111
                'van',
5112
                'von',
5113
                'y',
5114
                'zu',
5115
            ],
5116
            'prefixes' => [
5117
                'al-',
5118
                "d'",
5119
                'ff',
5120
                "l'",
5121
                'mac',
5122
                'mc',
5123
                'nic',
5124
            ],
5125
        ];
5126
5127 1
        foreach ($namesArray as &$name) {
5128 1
            if (\in_array($name, $specialCases['names'], true)) {
5129 1
                continue;
5130
            }
5131
5132 1
            $continue = false;
5133
5134 1
            if ($delimiter === '-') {
5135 1
                foreach ($specialCases['names'] as $beginning) {
5136 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5137 1
                        $continue = true;
5138
                    }
5139
                }
5140
            }
5141
5142 1
            foreach ($specialCases['prefixes'] as $beginning) {
5143 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5144 1
                    $continue = true;
5145
                }
5146
            }
5147
5148 1
            if ($continue) {
5149 1
                continue;
5150
            }
5151
5152 1
            $name = self::str_upper_first($name);
5153
        }
5154
5155 1
        return \implode($delimiter, $namesArray);
5156
    }
5157
5158
    /**
5159
     * Returns true if the string contains $needle, false otherwise. By default
5160
     * the comparison is case-sensitive, but can be made insensitive by setting
5161
     * $caseSensitive to false.
5162
     *
5163
     * @param string $haystack      <p>The input string.</p>
5164
     * @param string $needle        <p>Substring to look for.</p>
5165
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5166
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5167
     *
5168
     * @return bool whether or not $haystack contains $needle
5169
     */
5170 106
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5171
    {
5172 106
        if ($haystack === '' || $needle === '') {
5173 1
            return false;
5174
        }
5175
5176
        // only a fallback to prevent BC in the api ...
5177 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5178 2
            $encoding = (string) $caseSensitive;
5179
        }
5180
5181 105
        if ($caseSensitive) {
5182 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
5183
        }
5184
5185 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
5186
    }
5187
5188
    /**
5189
     * Returns true if the string contains all $needles, false otherwise. By
5190
     * default the comparison is case-sensitive, but can be made insensitive by
5191
     * setting $caseSensitive to false.
5192
     *
5193
     * @param string $haystack      <p>The input string.</p>
5194
     * @param array  $needles       <p>SubStrings to look for.</p>
5195
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5196
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5197
     *
5198
     * @return bool whether or not $haystack contains $needle
5199
     */
5200 44
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5201
    {
5202 44
        if ($haystack === '') {
5203
            return false;
5204
        }
5205
5206 44
        if (empty($needles)) {
5207 1
            return false;
5208
        }
5209
5210
        // only a fallback to prevent BC in the api ...
5211 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5212 1
            $encoding = (string) $caseSensitive;
5213
        }
5214
5215 43
        foreach ($needles as $needle) {
5216 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5217 43
                return false;
5218
            }
5219
        }
5220
5221 24
        return true;
5222
    }
5223
5224
    /**
5225
     * Returns true if the string contains any $needles, false otherwise. By
5226
     * default the comparison is case-sensitive, but can be made insensitive by
5227
     * setting $caseSensitive to false.
5228
     *
5229
     * @param string $haystack      <p>The input string.</p>
5230
     * @param array  $needles       <p>SubStrings to look for.</p>
5231
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5232
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5233
     *
5234
     * @return bool
5235
     *               Whether or not $str contains $needle
5236
     */
5237 43
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5238
    {
5239 43
        if (empty($needles)) {
5240 1
            return false;
5241
        }
5242
5243 42
        foreach ($needles as $needle) {
5244 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5245 42
                return true;
5246
            }
5247
        }
5248
5249 18
        return false;
5250
    }
5251
5252
    /**
5253
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5254
     * inserted before uppercase characters (with the exception of the first
5255
     * character of the string), and in place of spaces as well as underscores.
5256
     *
5257
     * @param string $str      <p>The input string.</p>
5258
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5259
     *
5260
     * @return string
5261
     */
5262 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5263
    {
5264 19
        return self::str_delimit($str, '-', $encoding);
5265
    }
5266
5267
    /**
5268
     * Returns a lowercase and trimmed string separated by the given delimiter.
5269
     * Delimiters are inserted before uppercase characters (with the exception
5270
     * of the first character of the string), and in place of spaces, dashes,
5271
     * and underscores. Alpha delimiters are not converted to lowercase.
5272
     *
5273
     * @param string      $str                           <p>The input string.</p>
5274
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5275
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5276
     * @param bool        $cleanUtf8                     [optional] <p>Remove non UTF-8 chars from the string.</p>
5277
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5278
     *                                                   tr</p>
5279
     * @param bool        $tryToKeepStringLength         [optional] <p>true === try to keep the string length: e.g. ẞ ->
5280
     *                                                   ß</p>
5281
     *
5282
     * @return string
5283
     */
5284 49
    public static function str_delimit(
5285
        string $str,
5286
        string $delimiter,
5287
        string $encoding = 'UTF-8',
5288
        bool $cleanUtf8 = false,
5289
        string $lang = null,
5290
        bool $tryToKeepStringLength = false
5291
    ): string {
5292 49
        $str = self::trim($str);
5293
5294 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
5295
5296 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5297
5298 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5299
    }
5300
5301
    /**
5302
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5303
     *
5304
     * @param string $str <p>The input string.</p>
5305
     *
5306
     * @return false|string
5307
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5308
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5309
     */
5310 30
    public static function str_detect_encoding($str)
5311
    {
5312
        // init
5313 30
        $str = (string) $str;
5314
5315
        //
5316
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5317
        //
5318
5319 30
        if (self::is_binary($str, true) === true) {
5320 10
            $isUtf16 = self::is_utf16($str, false);
5321 10
            if ($isUtf16 === 1) {
5322 2
                return 'UTF-16LE';
5323
            }
5324 10
            if ($isUtf16 === 2) {
5325 2
                return 'UTF-16BE';
5326
            }
5327
5328 8
            $isUtf32 = self::is_utf32($str, false);
5329 8
            if ($isUtf32 === 1) {
5330
                return 'UTF-32LE';
5331
            }
5332 8
            if ($isUtf32 === 2) {
5333
                return 'UTF-32BE';
5334
            }
5335
5336
            // is binary but not "UTF-16" or "UTF-32"
5337 8
            return false;
5338
        }
5339
5340
        //
5341
        // 2.) simple check for ASCII chars
5342
        //
5343
5344 26
        if (self::is_ascii($str) === true) {
5345 9
            return 'ASCII';
5346
        }
5347
5348
        //
5349
        // 3.) simple check for UTF-8 chars
5350
        //
5351
5352 26
        if (self::is_utf8($str) === true) {
5353 18
            return 'UTF-8';
5354
        }
5355
5356
        //
5357
        // 4.) check via "mb_detect_encoding()"
5358
        //
5359
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5360
5361
        $detectOrder = [
5362 16
            'ISO-8859-1',
5363
            'ISO-8859-2',
5364
            'ISO-8859-3',
5365
            'ISO-8859-4',
5366
            'ISO-8859-5',
5367
            'ISO-8859-6',
5368
            'ISO-8859-7',
5369
            'ISO-8859-8',
5370
            'ISO-8859-9',
5371
            'ISO-8859-10',
5372
            'ISO-8859-13',
5373
            'ISO-8859-14',
5374
            'ISO-8859-15',
5375
            'ISO-8859-16',
5376
            'WINDOWS-1251',
5377
            'WINDOWS-1252',
5378
            'WINDOWS-1254',
5379
            'CP932',
5380
            'CP936',
5381
            'CP950',
5382
            'CP866',
5383
            'CP850',
5384
            'CP51932',
5385
            'CP50220',
5386
            'CP50221',
5387
            'CP50222',
5388
            'ISO-2022-JP',
5389
            'ISO-2022-KR',
5390
            'JIS',
5391
            'JIS-ms',
5392
            'EUC-CN',
5393
            'EUC-JP',
5394
        ];
5395
5396 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5397
            self::checkForSupport();
5398
        }
5399
5400 16
        if (self::$SUPPORT['mbstring'] === true) {
5401
            // info: do not use the symfony polyfill here
5402 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5403 16
            if ($encoding) {
5404 16
                return $encoding;
5405
            }
5406
        }
5407
5408
        //
5409
        // 5.) check via "iconv()"
5410
        //
5411
5412
        if (self::$ENCODINGS === null) {
5413
            self::$ENCODINGS = self::getData('encodings');
5414
        }
5415
5416
        foreach (self::$ENCODINGS as $encodingTmp) {
5417
            // INFO: //IGNORE but still throw notice
5418
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5419
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5420
                return $encodingTmp;
5421
            }
5422
        }
5423
5424
        return false;
5425
    }
5426
5427
    /**
5428
     * Check if the string ends with the given substring.
5429
     *
5430
     * @param string $haystack <p>The string to search in.</p>
5431
     * @param string $needle   <p>The substring to search for.</p>
5432
     *
5433
     * @return bool
5434
     */
5435 40
    public static function str_ends_with(string $haystack, string $needle): bool
5436
    {
5437 40
        if ($haystack === '' || $needle === '') {
5438 4
            return false;
5439
        }
5440
5441 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5442
    }
5443
5444
    /**
5445
     * Returns true if the string ends with any of $substrings, false otherwise.
5446
     *
5447
     * - case-sensitive
5448
     *
5449
     * @param string   $str        <p>The input string.</p>
5450
     * @param string[] $substrings <p>Substrings to look for.</p>
5451
     *
5452
     * @return bool whether or not $str ends with $substring
5453
     */
5454 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5455
    {
5456 7
        if (empty($substrings)) {
5457
            return false;
5458
        }
5459
5460 7
        foreach ($substrings as $substring) {
5461 7
            if (self::str_ends_with($str, $substring)) {
5462 7
                return true;
5463
            }
5464
        }
5465
5466 6
        return false;
5467
    }
5468
5469
    /**
5470
     * Ensures that the string begins with $substring. If it doesn't, it's
5471
     * prepended.
5472
     *
5473
     * @param string $str       <p>The input string.</p>
5474
     * @param string $substring <p>The substring to add if not present.</p>
5475
     *
5476
     * @return string
5477
     */
5478 10
    public static function str_ensure_left(string $str, string $substring): string
5479
    {
5480 10
        if (!self::str_starts_with($str, $substring)) {
5481 4
            $str = $substring . $str;
5482
        }
5483
5484 10
        return $str;
5485
    }
5486
5487
    /**
5488
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5489
     *
5490
     * @param string $str       <p>The input string.</p>
5491
     * @param string $substring <p>The substring to add if not present.</p>
5492
     *
5493
     * @return string
5494
     */
5495 10
    public static function str_ensure_right(string $str, string $substring): string
5496
    {
5497 10
        if (!self::str_ends_with($str, $substring)) {
5498 4
            $str .= $substring;
5499
        }
5500
5501 10
        return $str;
5502
    }
5503
5504
    /**
5505
     * Capitalizes the first word of the string, replaces underscores with
5506
     * spaces, and strips '_id'.
5507
     *
5508
     * @param string $str
5509
     *
5510
     * @return string
5511
     */
5512 3
    public static function str_humanize($str): string
5513
    {
5514 3
        $str = self::str_replace(
5515
            [
5516 3
                '_id',
5517
                '_',
5518
            ],
5519
            [
5520 3
                '',
5521
                ' ',
5522
            ],
5523 3
            $str
5524
        );
5525
5526 3
        return self::ucfirst(self::trim($str));
5527
    }
5528
5529
    /**
5530
     * Check if the string ends with the given substring, case insensitive.
5531
     *
5532
     * @param string $haystack <p>The string to search in.</p>
5533
     * @param string $needle   <p>The substring to search for.</p>
5534
     *
5535
     * @return bool
5536
     */
5537 12
    public static function str_iends_with(string $haystack, string $needle): bool
5538
    {
5539 12
        if ($haystack === '' || $needle === '') {
5540 2
            return false;
5541
        }
5542
5543 12
        if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5544 12
            return true;
5545
        }
5546
5547 8
        return false;
5548
    }
5549
5550
    /**
5551
     * Returns true if the string ends with any of $substrings, false otherwise.
5552
     *
5553
     * - case-insensitive
5554
     *
5555
     * @param string   $str        <p>The input string.</p>
5556
     * @param string[] $substrings <p>Substrings to look for.</p>
5557
     *
5558
     * @return bool whether or not $str ends with $substring
5559
     */
5560 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5561
    {
5562 4
        if (empty($substrings)) {
5563
            return false;
5564
        }
5565
5566 4
        foreach ($substrings as $substring) {
5567 4
            if (self::str_iends_with($str, $substring)) {
5568 4
                return true;
5569
            }
5570
        }
5571
5572
        return false;
5573
    }
5574
5575
    /**
5576
     * Returns the index of the first occurrence of $needle in the string,
5577
     * and false if not found. Accepts an optional offset from which to begin
5578
     * the search.
5579
     *
5580
     * @param string $str      <p>The input string.</p>
5581
     * @param string $needle   <p>Substring to look for.</p>
5582
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5583
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5584
     *
5585
     * @return false|int
5586
     *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5587
     */
5588 2
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5589
    {
5590 2
        return self::stripos(
5591 2
            $str,
5592 2
            $needle,
5593 2
            $offset,
5594 2
            $encoding
5595
        );
5596
    }
5597
5598
    /**
5599
     * Returns the index of the last occurrence of $needle in the string,
5600
     * and false if not found. Accepts an optional offset from which to begin
5601
     * the search. Offsets may be negative to count from the last character
5602
     * in the string.
5603
     *
5604
     * @param string $str      <p>The input string.</p>
5605
     * @param string $needle   <p>Substring to look for.</p>
5606
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5607
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5608
     *
5609
     * @return false|int
5610
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5611
     */
5612 2
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5613
    {
5614 2
        return self::strripos(
5615 2
            $str,
5616 2
            $needle,
5617 2
            $offset,
5618 2
            $encoding
5619
        );
5620
    }
5621
5622
    /**
5623
     * Returns the index of the first occurrence of $needle in the string,
5624
     * and false if not found. Accepts an optional offset from which to begin
5625
     * the search.
5626
     *
5627
     * @param string $str      <p>The input string.</p>
5628
     * @param string $needle   <p>Substring to look for.</p>
5629
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5630
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5631
     *
5632
     * @return false|int
5633
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5634
     */
5635 12
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5636
    {
5637 12
        return self::strpos(
5638 12
            $str,
5639 12
            $needle,
5640 12
            $offset,
5641 12
            $encoding
5642
        );
5643
    }
5644
5645
    /**
5646
     * Returns the index of the last occurrence of $needle in the string,
5647
     * and false if not found. Accepts an optional offset from which to begin
5648
     * the search. Offsets may be negative to count from the last character
5649
     * in the string.
5650
     *
5651
     * @param string $str      <p>The input string.</p>
5652
     * @param string $needle   <p>Substring to look for.</p>
5653
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5654
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5655
     *
5656
     * @return false|int
5657
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5658
     */
5659 12
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5660
    {
5661 12
        return self::strrpos(
5662 12
            $str,
5663 12
            $needle,
5664 12
            $offset,
5665 12
            $encoding
5666
        );
5667
    }
5668
5669
    /**
5670
     * Inserts $substring into the string at the $index provided.
5671
     *
5672
     * @param string $str       <p>The input string.</p>
5673
     * @param string $substring <p>String to be inserted.</p>
5674
     * @param int    $index     <p>The index at which to insert the substring.</p>
5675
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5676
     *
5677
     * @return string
5678
     */
5679 8
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5680
    {
5681 8
        $len = self::strlen($str, $encoding);
5682
5683 8
        if ($index > $len) {
5684 1
            return $str;
5685
        }
5686
5687 7
        $start = self::substr($str, 0, $index, $encoding);
5688 7
        $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5688
        $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5689
5690 7
        return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5690
        return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5690
        return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5691
    }
5692
5693
    /**
5694
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5695
     *
5696
     * @see  http://php.net/manual/en/function.str-ireplace.php
5697
     *
5698
     * @param mixed $search  <p>
5699
     *                       Every replacement with search array is
5700
     *                       performed on the result of previous replacement.
5701
     *                       </p>
5702
     * @param mixed $replace <p>
5703
     *                       </p>
5704
     * @param mixed $subject <p>
5705
     *                       If subject is an array, then the search and
5706
     *                       replace is performed with every entry of
5707
     *                       subject, and the return value is an array as
5708
     *                       well.
5709
     *                       </p>
5710
     * @param int   $count   [optional] <p>
5711
     *                       The number of matched and replaced needles will
5712
     *                       be returned in count which is passed by
5713
     *                       reference.
5714
     *                       </p>
5715
     *
5716
     * @return mixed a string or an array of replacements
5717
     */
5718 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5719
    {
5720 29
        $search = (array) $search;
5721
5722
        /** @noinspection AlterInForeachInspection */
5723 29
        foreach ($search as &$s) {
5724 29
            $s = (string) $s;
5725 29
            if ($s === '') {
5726 6
                $s = '/^(?<=.)$/';
5727
            } else {
5728 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5729
            }
5730
        }
5731
5732 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5733 29
        $count = $replace; // used as reference parameter
5734
5735 29
        return $subject;
5736
    }
5737
5738
    /**
5739
     * Replaces $search from the beginning of string with $replacement.
5740
     *
5741
     * @param string $str         <p>The input string.</p>
5742
     * @param string $search      <p>The string to search for.</p>
5743
     * @param string $replacement <p>The replacement.</p>
5744
     *
5745
     * @return string string after the replacements
5746
     */
5747 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5748
    {
5749 17
        if ($str === '') {
5750 4
            if ($replacement === '') {
5751 2
                return '';
5752
            }
5753
5754 2
            if ($search === '') {
5755 2
                return $replacement;
5756
            }
5757
        }
5758
5759 13
        if ($search === '') {
5760 2
            return $str . $replacement;
5761
        }
5762
5763 11
        if (\stripos($str, $search) === 0) {
5764 10
            return $replacement . \substr($str, \strlen($search));
5765
        }
5766
5767 1
        return $str;
5768
    }
5769
5770
    /**
5771
     * Replaces $search from the ending of string with $replacement.
5772
     *
5773
     * @param string $str         <p>The input string.</p>
5774
     * @param string $search      <p>The string to search for.</p>
5775
     * @param string $replacement <p>The replacement.</p>
5776
     *
5777
     * @return string string after the replacements
5778
     */
5779 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5780
    {
5781 17
        if ($str === '') {
5782 4
            if ($replacement === '') {
5783 2
                return '';
5784
            }
5785
5786 2
            if ($search === '') {
5787 2
                return $replacement;
5788
            }
5789
        }
5790
5791 13
        if ($search === '') {
5792 2
            return $str . $replacement;
5793
        }
5794
5795 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5796 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5797
        }
5798
5799 11
        return $str;
5800
    }
5801
5802
    /**
5803
     * Check if the string starts with the given substring, case insensitive.
5804
     *
5805
     * @param string $haystack <p>The string to search in.</p>
5806
     * @param string $needle   <p>The substring to search for.</p>
5807
     *
5808
     * @return bool
5809
     */
5810 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5811
    {
5812 12
        if ($haystack === '' || $needle === '') {
5813 2
            return false;
5814
        }
5815
5816 12
        if (self::stripos($haystack, $needle) === 0) {
5817 12
            return true;
5818
        }
5819
5820 4
        return false;
5821
    }
5822
5823
    /**
5824
     * Returns true if the string begins with any of $substrings, false otherwise.
5825
     *
5826
     * - case-insensitive
5827
     *
5828
     * @param string $str        <p>The input string.</p>
5829
     * @param array  $substrings <p>Substrings to look for.</p>
5830
     *
5831
     * @return bool whether or not $str starts with $substring
5832
     */
5833 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5834
    {
5835 4
        if ($str === '') {
5836
            return false;
5837
        }
5838
5839 4
        if (empty($substrings)) {
5840
            return false;
5841
        }
5842
5843 4
        foreach ($substrings as $substring) {
5844 4
            if (self::str_istarts_with($str, $substring)) {
5845 4
                return true;
5846
            }
5847
        }
5848
5849
        return false;
5850
    }
5851
5852
    /**
5853
     * Gets the substring after the first occurrence of a separator.
5854
     *
5855
     * @param string $str       <p>The input string.</p>
5856
     * @param string $separator <p>The string separator.</p>
5857
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5858
     *
5859
     * @return string
5860
     */
5861 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5862
    {
5863
        if (
5864 1
            $separator === ''
5865
            ||
5866 1
            $str === ''
5867
        ) {
5868 1
            return '';
5869
        }
5870
5871 1
        $offset = self::str_iindex_first($str, $separator);
5872 1
        if ($offset === false) {
5873 1
            return '';
5874
        }
5875
5876 1
        return (string) self::substr(
5877 1
            $str,
5878 1
            $offset + self::strlen($separator, $encoding),
5879 1
            null,
5880 1
            $encoding
5881
        );
5882
    }
5883
5884
    /**
5885
     * Gets the substring after the last occurrence of a separator.
5886
     *
5887
     * @param string $str       <p>The input string.</p>
5888
     * @param string $separator <p>The string separator.</p>
5889
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5890
     *
5891
     * @return string
5892
     */
5893 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5894
    {
5895
        if (
5896 1
            $separator === ''
5897
            ||
5898 1
            $str === ''
5899
        ) {
5900 1
            return '';
5901
        }
5902
5903 1
        $offset = self::str_iindex_last($str, $separator);
5904 1
        if ($offset === false) {
5905 1
            return '';
5906
        }
5907
5908 1
        return (string) self::substr(
5909 1
            $str,
5910 1
            $offset + self::strlen($separator, $encoding),
5911 1
            null,
5912 1
            $encoding
5913
        );
5914
    }
5915
5916
    /**
5917
     * Gets the substring before the first occurrence of a separator.
5918
     *
5919
     * @param string $str       <p>The input string.</p>
5920
     * @param string $separator <p>The string separator.</p>
5921
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5922
     *
5923
     * @return string
5924
     */
5925 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5926
    {
5927
        if (
5928 1
            $separator === ''
5929
            ||
5930 1
            $str === ''
5931
        ) {
5932 1
            return '';
5933
        }
5934
5935 1
        $offset = self::str_iindex_first($str, $separator);
5936 1
        if ($offset === false) {
5937 1
            return '';
5938
        }
5939
5940 1
        return (string) self::substr($str, 0, $offset, $encoding);
5941
    }
5942
5943
    /**
5944
     * Gets the substring before the last occurrence of a separator.
5945
     *
5946
     * @param string $str       <p>The input string.</p>
5947
     * @param string $separator <p>The string separator.</p>
5948
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5949
     *
5950
     * @return string
5951
     */
5952 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5953
    {
5954
        if (
5955 1
            $separator === ''
5956
            ||
5957 1
            $str === ''
5958
        ) {
5959 1
            return '';
5960
        }
5961
5962 1
        $offset = self::str_iindex_last($str, $separator);
5963 1
        if ($offset === false) {
5964 1
            return '';
5965
        }
5966
5967 1
        return (string) self::substr($str, 0, $offset, $encoding);
5968
    }
5969
5970
    /**
5971
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5972
     *
5973
     * @param string $str          <p>The input string.</p>
5974
     * @param string $needle       <p>The string to look for.</p>
5975
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5976
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5977
     *
5978
     * @return string
5979
     */
5980 2
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5981
    {
5982
        if (
5983 2
            $needle === ''
5984
            ||
5985 2
            $str === ''
5986
        ) {
5987 2
            return '';
5988
        }
5989
5990 2
        $part = self::stristr(
5991 2
            $str,
5992 2
            $needle,
5993 2
            $beforeNeedle,
5994 2
            $encoding
5995
        );
5996 2
        if ($part === false) {
5997 2
            return '';
5998
        }
5999
6000 2
        return $part;
6001
    }
6002
6003
    /**
6004
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6005
     *
6006
     * @param string $str          <p>The input string.</p>
6007
     * @param string $needle       <p>The string to look for.</p>
6008
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6009
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6010
     *
6011
     * @return string
6012
     */
6013 1
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6014
    {
6015
        if (
6016 1
            $needle === ''
6017
            ||
6018 1
            $str === ''
6019
        ) {
6020 1
            return '';
6021
        }
6022
6023 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6024 1
        if ($part === false) {
6025 1
            return '';
6026
        }
6027
6028 1
        return $part;
6029
    }
6030
6031
    /**
6032
     * Returns the last $n characters of the string.
6033
     *
6034
     * @param string $str      <p>The input string.</p>
6035
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6036
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6037
     *
6038
     * @return string
6039
     */
6040 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6041
    {
6042 12
        if ($n <= 0) {
6043 4
            return '';
6044
        }
6045
6046 8
        $returnTmp = self::substr($str, -$n, null, $encoding);
6047
6048 8
        return $returnTmp === false ? '' : $returnTmp;
6049
    }
6050
6051
    /**
6052
     * Limit the number of characters in a string.
6053
     *
6054
     * @param string $str      <p>The input string.</p>
6055
     * @param int    $length   [optional] <p>Default: 100</p>
6056
     * @param string $strAddOn [optional] <p>Default: …</p>
6057
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6058
     *
6059
     * @return string
6060
     */
6061 2
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6062
    {
6063 2
        if ($str === '') {
6064 2
            return '';
6065
        }
6066
6067 2
        if ($length <= 0) {
6068 2
            return '';
6069
        }
6070
6071 2
        if (self::strlen($str, $encoding) <= $length) {
6072 2
            return $str;
6073
        }
6074
6075 2
        return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6075
        return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
6076
    }
6077
6078
    /**
6079
     * Limit the number of characters in a string, but also after the next word.
6080
     *
6081
     * @param string $str      <p>The input string.</p>
6082
     * @param int    $length   [optional] <p>Default: 100</p>
6083
     * @param string $strAddOn [optional] <p>Default: …</p>
6084
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6085
     *
6086
     * @return string
6087
     */
6088 6
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6089
    {
6090 6
        if ($str === '') {
6091 2
            return '';
6092
        }
6093
6094 6
        if ($length <= 0) {
6095 2
            return '';
6096
        }
6097
6098 6
        if (self::strlen($str, $encoding) <= $length) {
6099 2
            return $str;
6100
        }
6101
6102 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6103 5
            return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6103
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6104
        }
6105
6106 3
        $str = (string) self::substr($str, 0, $length, $encoding);
6107 3
        $array = \explode(' ', $str);
6108 3
        \array_pop($array);
6109 3
        $new_str = \implode(' ', $array);
6110
6111 3
        if ($new_str === '') {
6112 2
            $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6113
        } else {
6114 3
            $str = $new_str . $strAddOn;
6115
        }
6116
6117 3
        return $str;
6118
    }
6119
6120
    /**
6121
     * Returns the longest common prefix between the string and $otherStr.
6122
     *
6123
     * @param string $str      <p>The input sting.</p>
6124
     * @param string $otherStr <p>Second string for comparison.</p>
6125
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6126
     *
6127
     * @return string
6128
     */
6129 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6130
    {
6131 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6132
6133 10
        $longestCommonPrefix = '';
6134 10
        for ($i = 0; $i < $maxLength; $i++) {
6135 8
            $char = self::substr($str, $i, 1, $encoding);
6136
6137 8
            if ($char === self::substr($otherStr, $i, 1, $encoding)) {
6138 6
                $longestCommonPrefix .= $char;
6139
            } else {
6140 6
                break;
6141
            }
6142
        }
6143
6144 10
        return $longestCommonPrefix;
6145
    }
6146
6147
    /**
6148
     * Returns the longest common substring between the string and $otherStr.
6149
     * In the case of ties, it returns that which occurs first.
6150
     *
6151
     * @param string $str
6152
     * @param string $otherStr <p>Second string for comparison.</p>
6153
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6154
     *
6155
     * @return string string with its $str being the longest common substring
6156
     */
6157 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6158
    {
6159
        // Uses dynamic programming to solve
6160
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6161 11
        $strLength = self::strlen($str, $encoding);
6162 11
        $otherLength = self::strlen($otherStr, $encoding);
6163
6164
        // Return if either string is empty
6165 11
        if ($strLength === 0 || $otherLength === 0) {
6166 2
            return '';
6167
        }
6168
6169 9
        $len = 0;
6170 9
        $end = 0;
6171 9
        $table = \array_fill(
6172 9
            0,
6173 9
            $strLength + 1,
6174 9
            \array_fill(0, $otherLength + 1, 0)
6175
        );
6176
6177 9
        for ($i = 1; $i <= $strLength; $i++) {
6178 9
            for ($j = 1; $j <= $otherLength; $j++) {
6179 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
6180 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6181
6182 9
                if ($strChar === $otherChar) {
6183 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6184 8
                    if ($table[$i][$j] > $len) {
6185 8
                        $len = $table[$i][$j];
6186 8
                        $end = $i;
6187
                    }
6188
                } else {
6189 9
                    $table[$i][$j] = 0;
6190
                }
6191
            }
6192
        }
6193
6194 9
        $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6195
6196 9
        return $returnTmp === false ? '' : $returnTmp;
6197
    }
6198
6199
    /**
6200
     * Returns the longest common suffix between the string and $otherStr.
6201
     *
6202
     * @param string $str
6203
     * @param string $otherStr <p>Second string for comparison.</p>
6204
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6205
     *
6206
     * @return string
6207
     */
6208 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6209
    {
6210 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6211
6212 10
        $longestCommonSuffix = '';
6213 10
        for ($i = 1; $i <= $maxLength; $i++) {
6214 8
            $char = self::substr($str, -$i, 1, $encoding);
6215
6216 8
            if ($char === self::substr($otherStr, -$i, 1, $encoding)) {
6217 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6217
                $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6218
            } else {
6219 6
                break;
6220
            }
6221
        }
6222
6223 10
        return $longestCommonSuffix;
6224
    }
6225
6226
    /**
6227
     * Returns true if $str matches the supplied pattern, false otherwise.
6228
     *
6229
     * @param string $str     <p>The input string.</p>
6230
     * @param string $pattern <p>Regex pattern to match against.</p>
6231
     *
6232
     * @return bool whether or not $str matches the pattern
6233
     */
6234 126
    public static function str_matches_pattern(string $str, string $pattern): bool
6235
    {
6236 126
        if (\preg_match('/' . $pattern . '/u', $str)) {
6237 87
            return true;
6238
        }
6239
6240 39
        return false;
6241
    }
6242
6243
    /**
6244
     * Returns whether or not a character exists at an index. Offsets may be
6245
     * negative to count from the last character in the string. Implements
6246
     * part of the ArrayAccess interface.
6247
     *
6248
     * @param string $str      <p>The input string.</p>
6249
     * @param int    $offset   <p>The index to check.</p>
6250
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6251
     *
6252
     * @return bool whether or not the index exists
6253
     */
6254 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6255
    {
6256
        // init
6257 6
        $length = self::strlen($str, $encoding);
6258
6259 6
        if ($offset >= 0) {
6260 3
            return $length > $offset;
6261
        }
6262
6263 3
        return $length >= \abs($offset);
6264
    }
6265
6266
    /**
6267
     * Returns the character at the given index. Offsets may be negative to
6268
     * count from the last character in the string. Implements part of the
6269
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6270
     * does not exist.
6271
     *
6272
     * @param string $str      <p>The input string.</p>
6273
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6274
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6275
     *
6276
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6277
     *
6278
     * @return string the character at the specified index
6279
     */
6280 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6281
    {
6282
        // init
6283 2
        $length = self::strlen($str);
6284
6285
        if (
6286 2
            ($index >= 0 && $length <= $index)
6287
            ||
6288 2
            $length < \abs($index)
6289
        ) {
6290 1
            throw new \OutOfBoundsException('No character exists at the index');
6291
        }
6292
6293 1
        return self::char_at($str, $index, $encoding);
6294
    }
6295
6296
    /**
6297
     * Pad a UTF-8 string to given length with another string.
6298
     *
6299
     * @param string $str        <p>The input string.</p>
6300
     * @param int    $pad_length <p>The length of return string.</p>
6301
     * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6302
     * @param int    $pad_type   [optional] <p>
6303
     *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6304
     *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6305
     *                           </p>
6306
     * @param string $encoding   [optional] <p>Default: UTF-8</p>
6307
     *
6308
     * @return string returns the padded string
6309
     */
6310 41
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6311
    {
6312 41
        if ($str === '') {
6313
            return '';
6314
        }
6315
6316 41
        if ($pad_type !== (int) $pad_type) {
6317 13
            if ($pad_type === 'left') {
0 ignored issues
show
introduced by
The condition $pad_type === 'left' is always false.
Loading history...
6318 3
                $pad_type = \STR_PAD_LEFT;
6319 10
            } elseif ($pad_type === 'right') {
0 ignored issues
show
introduced by
The condition $pad_type === 'right' is always false.
Loading history...
6320 6
                $pad_type = \STR_PAD_RIGHT;
6321 4
            } elseif ($pad_type === 'both') {
0 ignored issues
show
introduced by
The condition $pad_type === 'both' is always false.
Loading history...
6322 3
                $pad_type = \STR_PAD_BOTH;
6323
            } else {
6324 1
                throw new \InvalidArgumentException(
6325 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6326
                );
6327
            }
6328
        }
6329
6330 40
        $str_length = self::strlen($str, $encoding);
6331
6332
        if (
6333 40
            $pad_length > 0
6334
            &&
6335 40
            $pad_length >= $str_length
6336
        ) {
6337 39
            $ps_length = self::strlen($pad_string, $encoding);
6338
6339 39
            $diff = ($pad_length - $str_length);
6340
6341
            switch ($pad_type) {
6342 39
                case \STR_PAD_LEFT:
6343 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6344 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6345 13
                    $post = '';
6346
6347 13
                    break;
6348
6349 29
                case \STR_PAD_BOTH:
6350 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6351 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6352 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6353 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6354
6355 14
                    break;
6356
6357 18
                case \STR_PAD_RIGHT:
6358
                default:
6359 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6360 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6361 18
                    $pre = '';
6362
            }
6363
6364 39
            return $pre . $str . $post;
6365
        }
6366
6367 4
        return $str;
6368
    }
6369
6370
    /**
6371
     * Returns a new string of a given length such that both sides of the
6372
     * string are padded. Alias for pad() with a $padType of 'both'.
6373
     *
6374
     * @param string $str
6375
     * @param int    $length   <p>Desired string length after padding.</p>
6376
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6377
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6378
     *
6379
     * @return string string with padding applied
6380
     */
6381 11
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6382
    {
6383 11
        $padding = $length - self::strlen($str, $encoding);
6384
6385 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6386
    }
6387
6388
    /**
6389
     * Returns a new string of a given length such that the beginning of the
6390
     * string is padded. Alias for pad() with a $padType of 'left'.
6391
     *
6392
     * @param string $str
6393
     * @param int    $length   <p>Desired string length after padding.</p>
6394
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6395
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6396
     *
6397
     * @return string string with left padding
6398
     */
6399 7
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6400
    {
6401 7
        return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6402
    }
6403
6404
    /**
6405
     * Returns a new string of a given length such that the end of the string
6406
     * is padded. Alias for pad() with a $padType of 'right'.
6407
     *
6408
     * @param string $str
6409
     * @param int    $length   <p>Desired string length after padding.</p>
6410
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6411
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6412
     *
6413
     * @return string string with right padding
6414
     */
6415 7
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6416
    {
6417 7
        return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6418
    }
6419
6420
    /**
6421
     * Repeat a string.
6422
     *
6423
     * @param string $str        <p>
6424
     *                           The string to be repeated.
6425
     *                           </p>
6426
     * @param int    $multiplier <p>
6427
     *                           Number of time the input string should be
6428
     *                           repeated.
6429
     *                           </p>
6430
     *                           <p>
6431
     *                           multiplier has to be greater than or equal to 0.
6432
     *                           If the multiplier is set to 0, the function
6433
     *                           will return an empty string.
6434
     *                           </p>
6435
     *
6436
     * @return string the repeated string
6437
     */
6438 9
    public static function str_repeat(string $str, int $multiplier): string
6439
    {
6440 9
        $str = self::filter($str);
6441
6442 9
        return \str_repeat($str, $multiplier);
6443
    }
6444
6445
    /**
6446
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6447
     *
6448
     * Replace all occurrences of the search string with the replacement string
6449
     *
6450
     * @see http://php.net/manual/en/function.str-replace.php
6451
     *
6452
     * @param mixed $search  <p>
6453
     *                       The value being searched for, otherwise known as the needle.
6454
     *                       An array may be used to designate multiple needles.
6455
     *                       </p>
6456
     * @param mixed $replace <p>
6457
     *                       The replacement value that replaces found search
6458
     *                       values. An array may be used to designate multiple replacements.
6459
     *                       </p>
6460
     * @param mixed $subject <p>
6461
     *                       The string or array being searched and replaced on,
6462
     *                       otherwise known as the haystack.
6463
     *                       </p>
6464
     *                       <p>
6465
     *                       If subject is an array, then the search and
6466
     *                       replace is performed with every entry of
6467
     *                       subject, and the return value is an array as
6468
     *                       well.
6469
     *                       </p>
6470
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6471
     *
6472
     * @return mixed this function returns a string or an array with the replaced values
6473
     */
6474 60
    public static function str_replace($search, $replace, $subject, int &$count = null)
6475
    {
6476 60
        return \str_replace($search, $replace, $subject, $count);
6477
    }
6478
6479
    /**
6480
     * Replaces $search from the beginning of string with $replacement.
6481
     *
6482
     * @param string $str         <p>The input string.</p>
6483
     * @param string $search      <p>The string to search for.</p>
6484
     * @param string $replacement <p>The replacement.</p>
6485
     *
6486
     * @return string string after the replacements
6487
     */
6488 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6489
    {
6490 17
        if ($str === '') {
6491 4
            if ($replacement === '') {
6492 2
                return '';
6493
            }
6494
6495 2
            if ($search === '') {
6496 2
                return $replacement;
6497
            }
6498
        }
6499
6500 13
        if ($search === '') {
6501 2
            return $str . $replacement;
6502
        }
6503
6504 11
        if (\strpos($str, $search) === 0) {
6505 9
            return $replacement . \substr($str, \strlen($search));
6506
        }
6507
6508 2
        return $str;
6509
    }
6510
6511
    /**
6512
     * Replaces $search from the ending of string with $replacement.
6513
     *
6514
     * @param string $str         <p>The input string.</p>
6515
     * @param string $search      <p>The string to search for.</p>
6516
     * @param string $replacement <p>The replacement.</p>
6517
     *
6518
     * @return string string after the replacements
6519
     */
6520 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6521
    {
6522 17
        if ($str === '') {
6523 4
            if ($replacement === '') {
6524 2
                return '';
6525
            }
6526
6527 2
            if ($search === '') {
6528 2
                return $replacement;
6529
            }
6530
        }
6531
6532 13
        if ($search === '') {
6533 2
            return $str . $replacement;
6534
        }
6535
6536 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6537 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6538
        }
6539
6540 11
        return $str;
6541
    }
6542
6543
    /**
6544
     * Replace the first "$search"-term with the "$replace"-term.
6545
     *
6546
     * @param string $search
6547
     * @param string $replace
6548
     * @param string $subject
6549
     *
6550
     * @return string
6551
     */
6552 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6553
    {
6554 2
        $pos = self::strpos($subject, $search);
6555 2
        if ($pos !== false) {
6556 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6556
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6557
        }
6558
6559 2
        return $subject;
6560
    }
6561
6562
    /**
6563
     * Replace the last "$search"-term with the "$replace"-term.
6564
     *
6565
     * @param string $search
6566
     * @param string $replace
6567
     * @param string $subject
6568
     *
6569
     * @return string
6570
     */
6571 2
    public static function str_replace_last(string $search, string $replace, string $subject): string
6572
    {
6573 2
        $pos = self::strrpos($subject, $search);
6574 2
        if ($pos !== false) {
6575 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6575
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6576
        }
6577
6578 2
        return $subject;
6579
    }
6580
6581
    /**
6582
     * Shuffles all the characters in the string.
6583
     *
6584
     * PS: uses random algorithm which is weak for cryptography purposes
6585
     *
6586
     * @param string $str <p>The input string</p>
6587
     *
6588
     * @return string the shuffled string
6589
     */
6590 5
    public static function str_shuffle(string $str): string
6591
    {
6592 5
        $indexes = \range(0, self::strlen($str) - 1);
6593
        /** @noinspection NonSecureShuffleUsageInspection */
6594 5
        \shuffle($indexes);
6595
6596 5
        $shuffledStr = '';
6597 5
        foreach ($indexes as $i) {
6598 5
            $shuffledStr .= self::substr($str, $i, 1);
6599
        }
6600
6601 5
        return $shuffledStr;
6602
    }
6603
6604
    /**
6605
     * Returns the substring beginning at $start, and up to, but not including
6606
     * the index specified by $end. If $end is omitted, the function extracts
6607
     * the remaining string. If $end is negative, it is computed from the end
6608
     * of the string.
6609
     *
6610
     * @param string $str
6611
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6612
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6613
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6614
     *
6615
     * @return false|string
6616
     *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6617
     *                     characters long, <b>FALSE</b> will be returned.
6618
     */
6619 18
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6620
    {
6621 18
        if ($end === null) {
6622 6
            $length = self::strlen($str);
6623 12
        } elseif ($end >= 0 && $end <= $start) {
6624 4
            return '';
6625 8
        } elseif ($end < 0) {
6626 2
            $length = self::strlen($str) + $end - $start;
6627
        } else {
6628 6
            $length = $end - $start;
6629
        }
6630
6631 14
        return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6631
        return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6632
    }
6633
6634
    /**
6635
     * Convert a string to e.g.: "snake_case"
6636
     *
6637
     * @param string $str
6638
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6639
     *
6640
     * @return string string in snake_case
6641
     */
6642 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6643
    {
6644 20
        $str = self::normalize_whitespace($str);
6645 20
        $str = \str_replace('-', '_', $str);
6646
6647 20
        $str = (string) \preg_replace_callback(
6648 20
            '/([\d|A-Z])/u',
6649
            function ($matches) use ($encoding) {
6650 8
                $match = $matches[1];
6651 8
                $matchInt = (int) $match;
6652
6653 8
                if ((string) $matchInt === $match) {
6654 4
                    return '_' . $match . '_';
6655
                }
6656
6657 4
                return '_' . self::strtolower($match, $encoding);
6658 20
            },
6659 20
            $str
6660
        );
6661
6662 20
        $str = (string) \preg_replace(
6663
            [
6664 20
                '/\s+/',        // convert spaces to "_"
6665
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6666
                '/_+/',         // remove double "_"
6667
            ],
6668
            [
6669 20
                '_',
6670
                '',
6671
                '_',
6672
            ],
6673 20
            $str
6674
        );
6675
6676 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6677
6678 20
        return self::trim($str); // trim leading & trailing whitespace
6679
    }
6680
6681
    /**
6682
     * Sort all characters according to code points.
6683
     *
6684
     * @param string $str    <p>A UTF-8 string.</p>
6685
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6686
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6687
     *
6688
     * @return string string of sorted characters
6689
     */
6690 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6691
    {
6692 2
        $array = self::codepoints($str);
6693
6694 2
        if ($unique) {
6695 2
            $array = \array_flip(\array_flip($array));
6696
        }
6697
6698 2
        if ($desc) {
6699 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6699
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6700
        } else {
6701 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6701
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6702
        }
6703
6704 2
        return self::string($array);
6705
    }
6706
6707
    /**
6708
     * alias for "UTF8::split()"
6709
     *
6710
     * @see UTF8::split()
6711
     *
6712
     * @param string|string[] $str
6713
     * @param int             $len
6714
     *
6715
     * @return string[]
6716
     */
6717 25
    public static function str_split($str, int $len = 1): array
6718
    {
6719 25
        return self::split($str, $len);
6720
    }
6721
6722
    /**
6723
     * Splits the string with the provided regular expression, returning an
6724
     * array of Stringy objects. An optional integer $limit will truncate the
6725
     * results.
6726
     *
6727
     * @param string $str
6728
     * @param string $pattern <p>The regex with which to split the string.</p>
6729
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6730
     *
6731
     * @return string[] an array of strings
6732
     */
6733 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6734
    {
6735 16
        if ($limit === 0) {
6736 2
            return [];
6737
        }
6738
6739
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6740
        // and current versions of HHVM (3.8 and below)
6741 14
        if ($pattern === '') {
6742 1
            return [$str];
6743
        }
6744
6745
        // this->split returns the remaining unsplit string in the last index when
6746
        // supplying a limit
6747 13
        if ($limit > 0) {
6748 8
            ++$limit;
6749
        } else {
6750 5
            $limit = -1;
6751
        }
6752
6753 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6754
6755 13
        if ($array === false) {
6756
            return [];
6757
        }
6758
6759 13
        if ($limit > 0 && \count($array) === $limit) {
6760 4
            \array_pop($array);
6761
        }
6762
6763 13
        return $array;
6764
    }
6765
6766
    /**
6767
     * Check if the string starts with the given substring.
6768
     *
6769
     * @param string $haystack <p>The string to search in.</p>
6770
     * @param string $needle   <p>The substring to search for.</p>
6771
     *
6772
     * @return bool
6773
     */
6774 41
    public static function str_starts_with(string $haystack, string $needle): bool
6775
    {
6776 41
        if ($haystack === '' || $needle === '') {
6777 4
            return false;
6778
        }
6779
6780 39
        if (\strpos($haystack, $needle) === 0) {
6781 19
            return true;
6782
        }
6783
6784 24
        return false;
6785
    }
6786
6787
    /**
6788
     * Returns true if the string begins with any of $substrings, false otherwise.
6789
     *
6790
     * - case-sensitive
6791
     *
6792
     * @param string $str        <p>The input string.</p>
6793
     * @param array  $substrings <p>Substrings to look for.</p>
6794
     *
6795
     * @return bool whether or not $str starts with $substring
6796
     */
6797 8
    public static function str_starts_with_any(string $str, array $substrings): bool
6798
    {
6799 8
        if ($str === '') {
6800
            return false;
6801
        }
6802
6803 8
        if (empty($substrings)) {
6804
            return false;
6805
        }
6806
6807 8
        foreach ($substrings as $substring) {
6808 8
            if (self::str_starts_with($str, $substring)) {
6809 8
                return true;
6810
            }
6811
        }
6812
6813 6
        return false;
6814
    }
6815
6816
    /**
6817
     * Gets the substring after the first occurrence of a separator.
6818
     *
6819
     * @param string $str       <p>The input string.</p>
6820
     * @param string $separator <p>The string separator.</p>
6821
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6822
     *
6823
     * @return string
6824
     */
6825 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6826
    {
6827
        if (
6828 1
            $separator === ''
6829
            ||
6830 1
            $str === ''
6831
        ) {
6832 1
            return '';
6833
        }
6834
6835 1
        $offset = self::str_index_first($str, $separator);
6836 1
        if ($offset === false) {
6837 1
            return '';
6838
        }
6839
6840 1
        return (string) self::substr(
6841 1
            $str,
6842 1
            $offset + self::strlen($separator, $encoding),
6843 1
            null,
6844 1
            $encoding
6845
        );
6846
    }
6847
6848
    /**
6849
     * Gets the substring after the last occurrence of a separator.
6850
     *
6851
     * @param string $str       <p>The input string.</p>
6852
     * @param string $separator <p>The string separator.</p>
6853
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6854
     *
6855
     * @return string
6856
     */
6857 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6858
    {
6859
        if (
6860 1
            $separator === ''
6861
            ||
6862 1
            $str === ''
6863
        ) {
6864 1
            return '';
6865
        }
6866
6867 1
        $offset = self::str_index_last($str, $separator);
6868 1
        if ($offset === false) {
6869 1
            return '';
6870
        }
6871
6872 1
        return (string) self::substr(
6873 1
            $str,
6874 1
            $offset + self::strlen($separator, $encoding),
6875 1
            null,
6876 1
            $encoding
6877
        );
6878
    }
6879
6880
    /**
6881
     * Gets the substring before the first occurrence of a separator.
6882
     *
6883
     * @param string $str       <p>The input string.</p>
6884
     * @param string $separator <p>The string separator.</p>
6885
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6886
     *
6887
     * @return string
6888
     */
6889 1
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6890
    {
6891
        if (
6892 1
            $separator === ''
6893
            ||
6894 1
            $str === ''
6895
        ) {
6896 1
            return '';
6897
        }
6898
6899 1
        $offset = self::str_index_first($str, $separator);
6900 1
        if ($offset === false) {
6901 1
            return '';
6902
        }
6903
6904 1
        return (string) self::substr(
6905 1
            $str,
6906 1
            0,
6907 1
            $offset,
6908 1
            $encoding
6909
        );
6910
    }
6911
6912
    /**
6913
     * Gets the substring before the last occurrence of a separator.
6914
     *
6915
     * @param string $str       <p>The input string.</p>
6916
     * @param string $separator <p>The string separator.</p>
6917
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6918
     *
6919
     * @return string
6920
     */
6921 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6922
    {
6923
        if (
6924 1
            $separator === ''
6925
            ||
6926 1
            $str === ''
6927
        ) {
6928 1
            return '';
6929
        }
6930
6931 1
        $offset = self::str_index_last($str, $separator);
6932 1
        if ($offset === false) {
6933 1
            return '';
6934
        }
6935
6936 1
        return (string) self::substr(
6937 1
            $str,
6938 1
            0,
6939 1
            $offset,
6940 1
            $encoding
6941
        );
6942
    }
6943
6944
    /**
6945
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6946
     *
6947
     * @param string $str          <p>The input string.</p>
6948
     * @param string $needle       <p>The string to look for.</p>
6949
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6950
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6951
     *
6952
     * @return string
6953
     */
6954 2
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6955
    {
6956
        if (
6957 2
            $str === ''
6958
            ||
6959 2
            $needle === ''
6960
        ) {
6961 2
            return '';
6962
        }
6963
6964 2
        $part = self::strstr(
6965 2
            $str,
6966 2
            $needle,
6967 2
            $beforeNeedle,
6968 2
            $encoding
6969
        );
6970 2
        if ($part === false) {
6971 2
            return '';
6972
        }
6973
6974 2
        return $part;
6975
    }
6976
6977
    /**
6978
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6979
     *
6980
     * @param string $str          <p>The input string.</p>
6981
     * @param string $needle       <p>The string to look for.</p>
6982
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6983
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6984
     *
6985
     * @return string
6986
     */
6987 2
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6988
    {
6989
        if (
6990 2
            $str === ''
6991
            ||
6992 2
            $needle === ''
6993
        ) {
6994 2
            return '';
6995
        }
6996
6997 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6998 2
        if ($part === false) {
6999 2
            return '';
7000
        }
7001
7002 2
        return $part;
7003
    }
7004
7005
    /**
7006
     * Surrounds $str with the given substring.
7007
     *
7008
     * @param string $str
7009
     * @param string $substring <p>The substring to add to both sides.</P>
7010
     *
7011
     * @return string string with the substring both prepended and appended
7012
     */
7013 5
    public static function str_surround(string $str, string $substring): string
7014
    {
7015 5
        return \implode('', [$substring, $str, $substring]);
7016
    }
7017
7018
    /**
7019
     * Returns a trimmed string with the first letter of each word capitalized.
7020
     * Also accepts an array, $ignore, allowing you to list words not to be
7021
     * capitalized.
7022
     *
7023
     * @param string              $str
7024
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7025
     *                                                   Default: null</p>
7026
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
7027
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7028
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7029
     *                                                   tr</p>
7030
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7031
     *                                                   ß</p>
7032
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7033
     *
7034
     * @return string the titleized string
7035
     */
7036 10
    public static function str_titleize(
7037
        string $str,
7038
        array $ignore = null,
7039
        string $encoding = 'UTF-8',
7040
        bool $cleanUtf8 = false,
7041
        string $lang = null,
7042
        bool $tryToKeepStringLength = false,
7043
        bool $useTrimFirst = true
7044
    ): string {
7045 10
        if ($useTrimFirst === true) {
7046 5
            $str = self::trim($str);
7047
        }
7048
7049 10
        $str_array = self::str_to_words($str);
7050
7051 10
        foreach ($str_array as &$str_tmp) {
7052 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
7053 2
                continue;
7054
            }
7055
7056 10
            $str_tmp = self::str_upper_first(
7057 10
                self::strtolower(
7058 10
                    $str_tmp,
7059 10
                    $encoding,
7060 10
                    $cleanUtf8,
7061 10
                    $lang,
7062 10
                    $tryToKeepStringLength
7063
                ),
7064 10
                $encoding,
7065 10
                $cleanUtf8,
7066 10
                $lang,
7067 10
                $tryToKeepStringLength
7068
            );
7069
        }
7070
7071 10
        return \implode('', $str_array);
7072
    }
7073
7074
    /**
7075
     * Returns a trimmed string in proper title case.
7076
     *
7077
     * Also accepts an array, $ignore, allowing you to list words not to be
7078
     * capitalized.
7079
     *
7080
     * Adapted from John Gruber's script.
7081
     *
7082
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7083
     *
7084
     * @param string $str
7085
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7086
     * @param string $encoding [optional] <p>Default: UTF-8</p>
7087
     *
7088
     * @return string the titleized string
7089
     */
7090 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7091
    {
7092 35
        $smallWords = \array_merge(
7093
            [
7094 35
                '(?<!q&)a',
7095
                'an',
7096
                'and',
7097
                'as',
7098
                'at(?!&t)',
7099
                'but',
7100
                'by',
7101
                'en',
7102
                'for',
7103
                'if',
7104
                'in',
7105
                'of',
7106
                'on',
7107
                'or',
7108
                'the',
7109
                'to',
7110
                'v[.]?',
7111
                'via',
7112
                'vs[.]?',
7113
            ],
7114 35
            $ignore
7115
        );
7116
7117 35
        $smallWordsRx = \implode('|', $smallWords);
7118 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7119
7120 35
        $str = self::trim($str);
7121
7122 35
        if (self::has_lowercase($str) === false) {
7123 2
            $str = self::strtolower($str);
7124
        }
7125
7126
        // The main substitutions
7127 35
        $str = (string) \preg_replace_callback(
7128
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7129
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7130 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7131
                        |
7132 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7133
                        |
7134 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7135
                        |
7136 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7137
                      ) (_*) \b                                                           # 6. With trailing underscore
7138
                    ~ux',
7139
            function ($matches) use ($encoding) {
7140
                // Preserve leading underscore
7141 35
                $str = $matches[1];
7142 35
                if ($matches[2]) {
7143
                    // Preserve URLs, domains, emails and file paths
7144 5
                    $str .= $matches[2];
7145 35
                } elseif ($matches[3]) {
7146
                    // Lower-case small words
7147 25
                    $str .= self::strtolower($matches[3], $encoding);
7148 35
                } elseif ($matches[4]) {
7149
                    // Capitalize word w/o internal caps
7150 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7151
                } else {
7152
                    // Preserve other kinds of word (iPhone)
7153 7
                    $str .= $matches[5];
7154
                }
7155
                // Preserve trailing underscore
7156 35
                $str .= $matches[6];
7157
7158 35
                return $str;
7159 35
            },
7160 35
            $str
7161
        );
7162
7163
        // Exceptions for small words: capitalize at start of title...
7164 35
        $str = (string) \preg_replace_callback(
7165
            '~(  \A [[:punct:]]*                # start of title...
7166
                      |  [:.;?!][ ]+               # or of subsentence...
7167
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7168 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7169
                     ~uxi',
7170
            function ($matches) use ($encoding) {
7171 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7172 35
            },
7173 35
            $str
7174
        );
7175
7176
        // ...and end of title
7177 35
        $str = (string) \preg_replace_callback(
7178 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7179
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7180
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7181
                     ~uxi',
7182
            function ($matches) use ($encoding) {
7183 3
                return static::str_upper_first($matches[1], $encoding);
7184 35
            },
7185 35
            $str
7186
        );
7187
7188
        // Exceptions for small words in hyphenated compound words
7189
        // e.g. "in-flight" -> In-Flight
7190 35
        $str = (string) \preg_replace_callback(
7191
            '~\b
7192
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7193 35
                        ( ' . $smallWordsRx . ' )
7194
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7195
                       ~uxi',
7196
            function ($matches) use ($encoding) {
7197
                return static::str_upper_first($matches[1], $encoding);
7198 35
            },
7199 35
            $str
7200
        );
7201
7202
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7203 35
        $str = (string) \preg_replace_callback(
7204
            '~\b
7205
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7206
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7207 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7208
                      (?!	- )                   # Negative lookahead for another -
7209
                     ~uxi',
7210
            function ($matches) use ($encoding) {
7211
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7212 35
            },
7213 35
            $str
7214
        );
7215
7216 35
        return $str;
7217
    }
7218
7219
    /**
7220
     * Get a binary representation of a specific string.
7221
     *
7222
     * @param string $str <p>The input string.</p>
7223
     *
7224
     * @return string
7225
     */
7226 2
    public static function str_to_binary(string $str): string
7227
    {
7228 2
        $value = \unpack('H*', $str);
7229
7230 2
        return \base_convert($value[1], 16, 2);
7231
    }
7232
7233
    /**
7234
     * @param string   $str
7235
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7236
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7237
     *
7238
     * @return string[]
7239
     */
7240 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7241
    {
7242 17
        if ($str === '') {
7243 1
            return $removeEmptyValues === true ? [] : [''];
7244
        }
7245
7246 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
7247
7248 16
        if ($return === false) {
7249
            return $removeEmptyValues === true ? [] : [''];
7250
        }
7251
7252
        if (
7253 16
            $removeShortValues === null
7254
            &&
7255 16
            $removeEmptyValues === false
7256
        ) {
7257 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7258
        }
7259
7260
        return self::reduce_string_array(
7261
            $return,
7262
            $removeEmptyValues,
7263
            $removeShortValues
7264
        );
7265
    }
7266
7267
    /**
7268
     * Convert a string into an array of words.
7269
     *
7270
     * @param string   $str
7271
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7272
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7273
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7274
     *
7275
     * @return string[]
7276
     */
7277 23
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7278
    {
7279 23
        if ($str === '') {
7280 4
            return $removeEmptyValues === true ? [] : [''];
7281
        }
7282
7283 23
        $charList = self::rxClass($charList, '\pL');
7284
7285 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7286
7287 23
        if ($return === false) {
7288
            return $removeEmptyValues === true ? [] : [''];
7289
        }
7290
7291
        if (
7292 23
            $removeShortValues === null
7293
            &&
7294 23
            $removeEmptyValues === false
7295
        ) {
7296 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7297
        }
7298
7299 2
        $tmpReturn = self::reduce_string_array(
7300 2
            $return,
7301 2
            $removeEmptyValues,
7302 2
            $removeShortValues
7303
        );
7304
7305 2
        foreach ($tmpReturn as &$item) {
7306 2
            $item = (string) $item;
7307
        }
7308
7309 2
        return $tmpReturn;
7310
    }
7311
7312
    /**
7313
     * alias for "UTF8::to_ascii()"
7314
     *
7315
     * @see UTF8::to_ascii()
7316
     *
7317
     * @param string $str
7318
     * @param string $unknown
7319
     * @param bool   $strict
7320
     *
7321
     * @return string
7322
     */
7323 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7324
    {
7325 8
        return self::to_ascii($str, $unknown, $strict);
7326
    }
7327
7328
    /**
7329
     * Truncates the string to a given length. If $substring is provided, and
7330
     * truncating occurs, the string is further truncated so that the substring
7331
     * may be appended without exceeding the desired length.
7332
     *
7333
     * @param string $str
7334
     * @param int    $length    <p>Desired length of the truncated string.</p>
7335
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7336
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7337
     *
7338
     * @return string string after truncating
7339
     */
7340 22
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7341
    {
7342
        // init
7343 22
        $str = (string) $str;
7344
7345 22
        if ($str === '') {
7346
            return '';
7347
        }
7348
7349 22
        if ($length >= self::strlen($str, $encoding)) {
7350 4
            return $str;
7351
        }
7352
7353
        // Need to further trim the string so we can append the substring
7354 18
        $substringLength = self::strlen($substring, $encoding);
7355 18
        $length -= $substringLength;
7356
7357 18
        $truncated = self::substr($str, 0, $length, $encoding);
7358
7359 18
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7359
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7360
    }
7361
7362
    /**
7363
     * Truncates the string to a given length, while ensuring that it does not
7364
     * split words. If $substring is provided, and truncating occurs, the
7365
     * string is further truncated so that the substring may be appended without
7366
     * exceeding the desired length.
7367
     *
7368
     * @param string $str
7369
     * @param int    $length    <p>Desired length of the truncated string.</p>
7370
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7371
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7372
     *
7373
     * @return string string after truncating
7374
     */
7375 23
    public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7376
    {
7377 23
        if ($length >= self::strlen($str, $encoding)) {
7378 4
            return $str;
7379
        }
7380
7381
        // need to further trim the string so we can append the substring
7382 19
        $substringLength = self::strlen($substring, $encoding);
7383 19
        $length -= $substringLength;
7384
7385 19
        $truncated = self::substr($str, 0, $length, $encoding);
7386 19
        if ($truncated === false) {
7387
            return '';
7388
        }
7389
7390
        // if the last word was truncated
7391 19
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7392 19
        if ($strPosSpace !== $length) {
7393
            // find pos of the last occurrence of a space, get up to that
7394 12
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7395
7396 12
            if ($lastPos !== false || $strPosSpace !== false) {
7397 11
                $truncated = self::substr($truncated, 0, (int) $lastPos, $encoding);
7398
            }
7399
        }
7400
7401 19
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7401
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7402
    }
7403
7404
    /**
7405
     * Returns a lowercase and trimmed string separated by underscores.
7406
     * Underscores are inserted before uppercase characters (with the exception
7407
     * of the first character of the string), and in place of spaces as well as
7408
     * dashes.
7409
     *
7410
     * @param string $str
7411
     *
7412
     * @return string the underscored string
7413
     */
7414 16
    public static function str_underscored(string $str): string
7415
    {
7416 16
        return self::str_delimit($str, '_');
7417
    }
7418
7419
    /**
7420
     * Returns an UpperCamelCase version of the supplied string. It trims
7421
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7422
     * and underscores, and removes spaces, dashes, underscores.
7423
     *
7424
     * @param string      $str                   <p>The input string.</p>
7425
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7426
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7427
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7428
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7429
     *
7430
     * @return string string in UpperCamelCase
7431
     */
7432 13
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7433
    {
7434 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7435
    }
7436
7437
    /**
7438
     * alias for "UTF8::ucfirst()"
7439
     *
7440
     * @see UTF8::ucfirst()
7441
     *
7442
     * @param string      $str
7443
     * @param string      $encoding
7444
     * @param bool        $cleanUtf8
7445
     * @param string|null $lang
7446
     * @param bool        $tryToKeepStringLength
7447
     *
7448
     * @return string
7449
     */
7450 63
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7451
    {
7452 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7453
    }
7454
7455
    /**
7456
     * Counts number of words in the UTF-8 string.
7457
     *
7458
     * @param string $str      <p>The input string.</p>
7459
     * @param int    $format   [optional] <p>
7460
     *                         <strong>0</strong> => return a number of words (default)<br>
7461
     *                         <strong>1</strong> => return an array of words<br>
7462
     *                         <strong>2</strong> => return an array of words with word-offset as key
7463
     *                         </p>
7464
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7465
     *
7466
     * @return int|string[] The number of words in the string
7467
     */
7468 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7469
    {
7470 2
        $strParts = self::str_to_words($str, $charlist);
7471
7472 2
        $len = \count($strParts);
7473
7474 2
        if ($format === 1) {
7475 2
            $numberOfWords = [];
7476 2
            for ($i = 1; $i < $len; $i += 2) {
7477 2
                $numberOfWords[] = $strParts[$i];
7478
            }
7479 2
        } elseif ($format === 2) {
7480 2
            $numberOfWords = [];
7481 2
            $offset = self::strlen($strParts[0]);
7482 2
            for ($i = 1; $i < $len; $i += 2) {
7483 2
                $numberOfWords[$offset] = $strParts[$i];
7484 2
                $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7485
            }
7486
        } else {
7487 2
            $numberOfWords = (int) (($len - 1) / 2);
7488
        }
7489
7490 2
        return $numberOfWords;
7491
    }
7492
7493
    /**
7494
     * Case-insensitive string comparison.
7495
     *
7496
     * INFO: Case-insensitive version of UTF8::strcmp()
7497
     *
7498
     * @param string $str1     <p>The first string.</p>
7499
     * @param string $str2     <p>The second string.</p>
7500
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7501
     *
7502
     * @return int
7503
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7504
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7505
     *             <strong>0</strong> if they are equal
7506
     */
7507 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7508
    {
7509 23
        return self::strcmp(
7510 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7511 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7512
        );
7513
    }
7514
7515
    /**
7516
     * alias for "UTF8::strstr()"
7517
     *
7518
     * @see UTF8::strstr()
7519
     *
7520
     * @param string $haystack
7521
     * @param string $needle
7522
     * @param bool   $before_needle
7523
     * @param string $encoding
7524
     * @param bool   $cleanUtf8
7525
     *
7526
     * @return false|string
7527
     */
7528 2
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7529
    {
7530 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7531
    }
7532
7533
    /**
7534
     * Case-sensitive string comparison.
7535
     *
7536
     * @param string $str1 <p>The first string.</p>
7537
     * @param string $str2 <p>The second string.</p>
7538
     *
7539
     * @return int
7540
     *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7541
     *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7542
     *              <strong>0</strong> if they are equal
7543
     */
7544 29
    public static function strcmp(string $str1, string $str2): int
7545
    {
7546
        /** @noinspection PhpUndefinedClassInspection */
7547 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7548 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7549 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7550
        );
7551
    }
7552
7553
    /**
7554
     * Find length of initial segment not matching mask.
7555
     *
7556
     * @param string $str
7557
     * @param string $charList
7558
     * @param int    $offset
7559
     * @param int    $length
7560
     *
7561
     * @return int|null
7562
     */
7563 11
    public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7564
    {
7565 11
        if ($charList === '') {
7566 1
            return null;
7567
        }
7568
7569 10
        if ($offset || $length !== null) {
7570 2
            $strTmp = self::substr($str, $offset, $length);
7571 2
            if ($strTmp === false) {
7572
                return null;
7573
            }
7574 2
            $str = $strTmp;
7575
        }
7576
7577 10
        if ($str === '') {
7578 1
            return null;
7579
        }
7580
7581 9
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7581
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7582 9
            return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7583
        }
7584
7585 1
        return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7586
    }
7587
7588
    /**
7589
     * alias for "UTF8::stristr()"
7590
     *
7591
     * @see UTF8::stristr()
7592
     *
7593
     * @param string $haystack
7594
     * @param string $needle
7595
     * @param bool   $before_needle
7596
     * @param string $encoding
7597
     * @param bool   $cleanUtf8
7598
     *
7599
     * @return false|string
7600
     */
7601 1
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7602
    {
7603 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7604
    }
7605
7606
    /**
7607
     * Create a UTF-8 string from code points.
7608
     *
7609
     * INFO: opposite to UTF8::codepoints()
7610
     *
7611
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7612
     *
7613
     * @return string UTF-8 encoded string
7614
     */
7615 4
    public static function string(array $array): string
7616
    {
7617 4
        return \implode(
7618 4
            '',
7619 4
            \array_map(
7620
                [
7621 4
                    self::class,
7622
                    'chr',
7623
                ],
7624 4
                $array
7625
            )
7626
        );
7627
    }
7628
7629
    /**
7630
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7631
     *
7632
     * @param string $str <p>The input string.</p>
7633
     *
7634
     * @return bool
7635
     *              <strong>true</strong> if the string has BOM at the start,<br>
7636
     *              <strong>false</strong> otherwise
7637
     */
7638 6
    public static function string_has_bom(string $str): bool
7639
    {
7640 6
        foreach (self::$BOM as $bomString => $bomByteLength) {
7641 6
            if (\strpos($str, $bomString) === 0) {
7642 6
                return true;
7643
            }
7644
        }
7645
7646 6
        return false;
7647
    }
7648
7649
    /**
7650
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7651
     *
7652
     * @see http://php.net/manual/en/function.strip-tags.php
7653
     *
7654
     * @param string $str             <p>
7655
     *                                The input string.
7656
     *                                </p>
7657
     * @param string $allowable_tags  [optional] <p>
7658
     *                                You can use the optional second parameter to specify tags which should
7659
     *                                not be stripped.
7660
     *                                </p>
7661
     *                                <p>
7662
     *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7663
     *                                can not be changed with allowable_tags.
7664
     *                                </p>
7665
     * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7666
     *
7667
     * @return string the stripped string
7668
     */
7669 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7670
    {
7671 4
        if ($str === '') {
7672 1
            return '';
7673
        }
7674
7675 4
        if ($cleanUtf8 === true) {
7676 2
            $str = self::clean($str);
7677
        }
7678
7679 4
        return \strip_tags($str, $allowable_tags);
7680
    }
7681
7682
    /**
7683
     * Strip all whitespace characters. This includes tabs and newline
7684
     * characters, as well as multibyte whitespace such as the thin space
7685
     * and ideographic space.
7686
     *
7687
     * @param string $str
7688
     *
7689
     * @return string
7690
     */
7691 36
    public static function strip_whitespace(string $str): string
7692
    {
7693 36
        if ($str === '') {
7694 3
            return '';
7695
        }
7696
7697 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7698
    }
7699
7700
    /**
7701
     * Finds position of first occurrence of a string within another, case insensitive.
7702
     *
7703
     * @see http://php.net/manual/en/function.mb-stripos.php
7704
     *
7705
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7706
     * @param string $needle    <p>The string to find in haystack.</p>
7707
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7708
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7709
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7710
     *
7711
     * @return false|int
7712
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7713
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7714
     */
7715 75
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7716
    {
7717 75
        if ($haystack === '' || $needle === '') {
7718 5
            return false;
7719
        }
7720
7721 74
        if ($cleanUtf8 === true) {
7722
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7723
            // if invalid characters are found in $haystack before $needle
7724 1
            $haystack = self::clean($haystack);
7725 1
            $needle = self::clean($needle);
7726
        }
7727
7728 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7729 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7730
        }
7731
7732 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7733
            self::checkForSupport();
7734
        }
7735
7736 74
        if (self::$SUPPORT['mbstring'] === true) {
7737 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7738 74
            if ($returnTmp !== false) {
7739 54
                return $returnTmp;
7740
            }
7741
        }
7742
7743
        if (
7744 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7745
            &&
7746 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7747
            &&
7748 31
            self::$SUPPORT['intl'] === true
7749
        ) {
7750 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7751 31
            if ($returnTmp !== false) {
7752
                return $returnTmp;
7753
            }
7754
        }
7755
7756
        //
7757
        // fallback for ascii only
7758
        //
7759
7760 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7761 15
            return \stripos($haystack, $needle, $offset);
7762
        }
7763
7764
        //
7765
        // fallback via vanilla php
7766
        //
7767
7768 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7769 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7770
7771 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7772
    }
7773
7774
    /**
7775
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7776
     *
7777
     * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7778
     * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7779
     * @param bool   $before_needle  [optional] <p>
7780
     *                               If <b>TRUE</b>, it returns the part of the
7781
     *                               haystack before the first occurrence of the needle (excluding the needle).
7782
     *                               </p>
7783
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7784
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7785
     *
7786
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7787
     */
7788 12
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7789
    {
7790 12
        if ($haystack === '' || $needle === '') {
7791 3
            return false;
7792
        }
7793
7794 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7795 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7796
        }
7797
7798 9
        if ($cleanUtf8 === true) {
7799
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7800
            // if invalid characters are found in $haystack before $needle
7801 1
            $needle = self::clean($needle);
7802 1
            $haystack = self::clean($haystack);
7803
        }
7804
7805 9
        if (!$needle) {
7806
            return $haystack;
7807
        }
7808
7809 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7810
            self::checkForSupport();
7811
        }
7812
7813
        if (
7814 9
            $encoding !== 'UTF-8'
7815
            &&
7816 9
            self::$SUPPORT['mbstring'] === false
7817
        ) {
7818
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7819
        }
7820
7821 9
        if (self::$SUPPORT['mbstring'] === true) {
7822 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7823
        }
7824
7825
        if (
7826
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7827
            &&
7828
            self::$SUPPORT['intl'] === true
7829
        ) {
7830
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7831
            if ($returnTmp !== false) {
7832
                return $returnTmp;
7833
            }
7834
        }
7835
7836
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7837
            return \stristr($haystack, $needle, $before_needle);
7838
        }
7839
7840
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7841
7842
        if (!isset($match[1])) {
7843
            return false;
7844
        }
7845
7846
        if ($before_needle) {
7847
            return $match[1];
7848
        }
7849
7850
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7850
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7851
    }
7852
7853
    /**
7854
     * Get the string length, not the byte-length!
7855
     *
7856
     * @see     http://php.net/manual/en/function.mb-strlen.php
7857
     *
7858
     * @param string $str       <p>The string being checked for length.</p>
7859
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7860
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7861
     *
7862
     * @return false|int
7863
     *             The number <strong>(int)</strong> of characters in the string $str having character encoding
7864
     *             $encoding.
7865
     *             (One multi-byte character counted as +1).
7866
     *             <br>
7867
     *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7868
     */
7869 259
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7870
    {
7871 259
        if ($str === '') {
7872 37
            return 0;
7873
        }
7874
7875 257
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7876 83
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7877
        }
7878
7879
        //
7880
        // fallback for binary || ascii only
7881
        //
7882
7883
        if (
7884 257
            $encoding === 'CP850'
7885
            ||
7886 257
            $encoding === 'ASCII'
7887
        ) {
7888 2
            return self::strlen_in_byte($str);
7889
        }
7890
7891 257
        if ($cleanUtf8 === true) {
7892
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7893
            // if invalid characters are found in $str
7894 4
            $str = self::clean($str);
7895
        }
7896
7897 257
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7898
            self::checkForSupport();
7899
        }
7900
7901
        if (
7902 257
            $encoding !== 'UTF-8'
7903
            &&
7904 257
            self::$SUPPORT['mbstring'] === false
7905
            &&
7906 257
            self::$SUPPORT['iconv'] === false
7907
        ) {
7908 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7909
        }
7910
7911
        //
7912
        // fallback via mbstring
7913
        //
7914
7915 257
        if (self::$SUPPORT['mbstring'] === true) {
7916 251
            $returnTmp = \mb_strlen($str, $encoding);
7917 251
            if ($returnTmp !== false) {
7918 251
                return $returnTmp;
7919
            }
7920
        }
7921
7922
        //
7923
        // fallback via iconv
7924
        //
7925
7926 8
        if (self::$SUPPORT['iconv'] === true) {
7927
            $returnTmp = \iconv_strlen($str, $encoding);
7928
            if ($returnTmp !== false) {
7929
                return $returnTmp;
7930
            }
7931
        }
7932
7933
        //
7934
        // fallback via intl
7935
        //
7936
7937
        if (
7938 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7939
            &&
7940 8
            self::$SUPPORT['intl'] === true
7941
        ) {
7942
            $returnTmp = \grapheme_strlen($str);
7943
            if ($returnTmp !== null) {
7944
                return $returnTmp;
7945
            }
7946
        }
7947
7948
        //
7949
        // fallback for ascii only
7950
        //
7951
7952 8
        if (self::is_ascii($str)) {
7953 4
            return \strlen($str);
7954
        }
7955
7956
        //
7957
        // fallback via vanilla php
7958
        //
7959
7960 8
        \preg_match_all('/./us', $str, $parts);
7961
7962 8
        $returnTmp = \count($parts[0]);
7963 8
        if ($returnTmp === 0 && isset($str[0])) {
7964
            return false;
7965
        }
7966
7967 8
        return $returnTmp;
7968
    }
7969
7970
    /**
7971
     * Get string length in byte.
7972
     *
7973
     * @param string $str
7974
     *
7975
     * @return int
7976
     */
7977 192
    public static function strlen_in_byte(string $str): int
7978
    {
7979 192
        if ($str === '') {
7980
            return 0;
7981
        }
7982
7983 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7984
            self::checkForSupport();
7985
        }
7986
7987 192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7988
            // "mb_" is available if overload is used, so use it ...
7989
            return \mb_strlen($str, 'CP850'); // 8-BIT
7990
        }
7991
7992 192
        return \strlen($str);
7993
    }
7994
7995
    /**
7996
     * Case insensitive string comparisons using a "natural order" algorithm.
7997
     *
7998
     * INFO: natural order version of UTF8::strcasecmp()
7999
     *
8000
     * @param string $str1     <p>The first string.</p>
8001
     * @param string $str2     <p>The second string.</p>
8002
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8003
     *
8004
     * @return int
8005
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8006
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8007
     *             <strong>0</strong> if they are equal
8008
     */
8009 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8010
    {
8011 2
        return self::strnatcmp(
8012 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8013 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8014
        );
8015
    }
8016
8017
    /**
8018
     * String comparisons using a "natural order" algorithm
8019
     *
8020
     * INFO: natural order version of UTF8::strcmp()
8021
     *
8022
     * @see  http://php.net/manual/en/function.strnatcmp.php
8023
     *
8024
     * @param string $str1 <p>The first string.</p>
8025
     * @param string $str2 <p>The second string.</p>
8026
     *
8027
     * @return int
8028
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8029
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8030
     *             <strong>0</strong> if they are equal
8031
     */
8032 4
    public static function strnatcmp(string $str1, string $str2): int
8033
    {
8034 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
8035
    }
8036
8037
    /**
8038
     * Case-insensitive string comparison of the first n characters.
8039
     *
8040
     * @see  http://php.net/manual/en/function.strncasecmp.php
8041
     *
8042
     * @param string $str1     <p>The first string.</p>
8043
     * @param string $str2     <p>The second string.</p>
8044
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8045
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8046
     *
8047
     * @return int
8048
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8049
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8050
     *             <strong>0</strong> if they are equal
8051
     */
8052 2
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
8053
    {
8054 2
        return self::strncmp(
8055 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8056 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8057 2
            $len
8058
        );
8059
    }
8060
8061
    /**
8062
     * String comparison of the first n characters.
8063
     *
8064
     * @see  http://php.net/manual/en/function.strncmp.php
8065
     *
8066
     * @param string $str1 <p>The first string.</p>
8067
     * @param string $str2 <p>The second string.</p>
8068
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
8069
     *
8070
     * @return int
8071
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8072
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8073
     *             <strong>0</strong> if they are equal
8074
     */
8075 4
    public static function strncmp(string $str1, string $str2, int $len): int
8076
    {
8077 4
        $str1 = (string) self::substr($str1, 0, $len);
8078 4
        $str2 = (string) self::substr($str2, 0, $len);
8079
8080 4
        return self::strcmp($str1, $str2);
8081
    }
8082
8083
    /**
8084
     * Search a string for any of a set of characters.
8085
     *
8086
     * @see  http://php.net/manual/en/function.strpbrk.php
8087
     *
8088
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8089
     * @param string $char_list <p>This parameter is case sensitive.</p>
8090
     *
8091
     * @return false|string string starting from the character found, or false if it is not found
8092
     */
8093 2
    public static function strpbrk(string $haystack, string $char_list)
8094
    {
8095 2
        if ($haystack === '' || $char_list === '') {
8096 2
            return false;
8097
        }
8098
8099 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8100 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8101
        }
8102
8103 2
        return false;
8104
    }
8105
8106
    /**
8107
     * Find position of first occurrence of string in a string.
8108
     *
8109
     * @see http://php.net/manual/en/function.mb-strpos.php
8110
     *
8111
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8112
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8113
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8114
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8115
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8116
     *
8117
     * @return false|int
8118
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8119
     *                   string.<br> If needle is not found it returns false.
8120
     */
8121 142
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
8122
    {
8123 142
        if ($haystack === '') {
8124 4
            return false;
8125
        }
8126
8127
        // iconv and mbstring do not support integer $needle
8128 141
        if ((int) $needle === $needle && $needle >= 0) {
8129
            $needle = (string) self::chr($needle);
8130
        }
8131 141
        $needle = (string) $needle;
8132
8133 141
        if ($needle === '') {
8134 2
            return false;
8135
        }
8136
8137 141
        if ($cleanUtf8 === true) {
8138
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8139
            // if invalid characters are found in $haystack before $needle
8140 3
            $needle = self::clean($needle);
8141 3
            $haystack = self::clean($haystack);
8142
        }
8143
8144 141
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8145 55
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8146
        }
8147
8148 141
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8149
            self::checkForSupport();
8150
        }
8151
8152
        //
8153
        // fallback for binary || ascii only
8154
        //
8155
8156
        if (
8157 141
            $encoding === 'CP850'
8158
            ||
8159 141
            $encoding === 'ASCII'
8160
        ) {
8161 2
            return self::strpos_in_byte($haystack, $needle, $offset);
8162
        }
8163
8164
        if (
8165 141
            $encoding !== 'UTF-8'
8166
            &&
8167 141
            self::$SUPPORT['iconv'] === false
8168
            &&
8169 141
            self::$SUPPORT['mbstring'] === false
8170
        ) {
8171 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8172
        }
8173
8174
        //
8175
        // fallback via mbstring
8176
        //
8177
8178 141
        if (self::$SUPPORT['mbstring'] === true) {
8179 141
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8180 141
            if ($returnTmp !== false) {
8181 86
                return $returnTmp;
8182
            }
8183
        }
8184
8185
        //
8186
        // fallback via intl
8187
        //
8188
8189
        if (
8190 69
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8191
            &&
8192 69
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8193
            &&
8194 69
            self::$SUPPORT['intl'] === true
8195
        ) {
8196 69
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8197 69
            if ($returnTmp !== false) {
8198
                return $returnTmp;
8199
            }
8200
        }
8201
8202
        //
8203
        // fallback via iconv
8204
        //
8205
8206
        if (
8207 69
            $offset >= 0 // iconv_strpos() can't handle negative offset
8208
            &&
8209 69
            self::$SUPPORT['iconv'] === true
8210
        ) {
8211
            // ignore invalid negative offset to keep compatibility
8212
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8213 69
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8214 69
            if ($returnTmp !== false) {
8215
                return $returnTmp;
8216
            }
8217
        }
8218
8219
        //
8220
        // fallback for ascii only
8221
        //
8222
8223 69
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8224 35
            return \strpos($haystack, $needle, $offset);
8225
        }
8226
8227
        //
8228
        // fallback via vanilla php
8229
        //
8230
8231 39
        if ($haystackIsAscii) {
8232
            $haystackTmp = \substr($haystack, $offset);
8233
        } else {
8234 39
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8235
        }
8236 39
        if ($haystackTmp === false) {
8237
            $haystackTmp = '';
8238
        }
8239 39
        $haystack = (string) $haystackTmp;
8240
8241 39
        if ($offset < 0) {
8242 2
            $offset = 0;
8243
        }
8244
8245 39
        $pos = \strpos($haystack, $needle);
8246 39
        if ($pos === false) {
8247 39
            return false;
8248
        }
8249
8250 4
        if ($pos) {
8251 4
            return $offset + (self::strlen(\substr($haystack, 0, $pos), $encoding));
8252
        }
8253
8254 2
        return $offset + 0;
8255
    }
8256
8257
    /**
8258
     * Find position of first occurrence of string in a string.
8259
     *
8260
     * @param string $haystack <p>
8261
     *                         The string being checked.
8262
     *                         </p>
8263
     * @param string $needle   <p>
8264
     *                         The position counted from the beginning of haystack.
8265
     *                         </p>
8266
     * @param int    $offset   [optional] <p>
8267
     *                         The search offset. If it is not specified, 0 is used.
8268
     *                         </p>
8269
     *
8270
     * @return false|int The numeric position of the first occurrence of needle in the
8271
     *                   haystack string. If needle is not found, it returns false.
8272
     */
8273 81
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8274
    {
8275 81
        if ($haystack === '' || $needle === '') {
8276
            return false;
8277
        }
8278
8279 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8280
            self::checkForSupport();
8281
        }
8282
8283 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8284
            // "mb_" is available if overload is used, so use it ...
8285
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8286
        }
8287
8288 81
        return \strpos($haystack, $needle, $offset);
8289
    }
8290
8291
    /**
8292
     * Finds the last occurrence of a character in a string within another.
8293
     *
8294
     * @see http://php.net/manual/en/function.mb-strrchr.php
8295
     *
8296
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8297
     * @param string $needle        <p>The string to find in haystack</p>
8298
     * @param bool   $before_needle [optional] <p>
8299
     *                              Determines which portion of haystack
8300
     *                              this function returns.
8301
     *                              If set to true, it returns all of haystack
8302
     *                              from the beginning to the last occurrence of needle.
8303
     *                              If set to false, it returns all of haystack
8304
     *                              from the last occurrence of needle to the end,
8305
     *                              </p>
8306
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8307
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8308
     *
8309
     * @return false|string the portion of haystack or false if needle is not found
8310
     */
8311 4
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8312
    {
8313 4
        if ($haystack === '' || $needle === '') {
8314 2
            return false;
8315
        }
8316
8317 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8318 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8319
        }
8320
8321 4
        if ($cleanUtf8 === true) {
8322
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8323
            // if invalid characters are found in $haystack before $needle
8324 2
            $needle = self::clean($needle);
8325 2
            $haystack = self::clean($haystack);
8326
        }
8327
8328 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8329
            self::checkForSupport();
8330
        }
8331
8332
        if (
8333 4
            $encoding !== 'UTF-8'
8334
            &&
8335 4
            self::$SUPPORT['mbstring'] === false
8336
        ) {
8337
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8338
        }
8339
8340 4
        if (self::$SUPPORT['mbstring'] === true) {
8341 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8342
        }
8343
8344
        //
8345
        // fallback for binary || ascii only
8346
        //
8347
8348
        if (
8349
            $before_needle === false
8350
            &&
8351
            (
8352
                $encoding === 'CP850'
8353
                ||
8354
                $encoding === 'ASCII'
8355
            )
8356
        ) {
8357
            return \strrchr($haystack, $needle);
8358
        }
8359
8360
        //
8361
        // fallback via iconv
8362
        //
8363
8364
        if (self::$SUPPORT['iconv'] === true) {
8365
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8366
            if ($needleTmp === false) {
8367
                return false;
8368
            }
8369
            $needle = (string) $needleTmp;
8370
8371
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8372
            if ($pos === false) {
8373
                return false;
8374
            }
8375
8376
            if ($before_needle) {
8377
                return self::substr($haystack, 0, $pos, $encoding);
8378
            }
8379
8380
            return self::substr($haystack, $pos, null, $encoding);
8381
        }
8382
8383
        //
8384
        // fallback via vanilla php
8385
        //
8386
8387
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8388
        if ($needleTmp === false) {
8389
            return false;
8390
        }
8391
        $needle = (string) $needleTmp;
8392
8393
        $pos = self::strrpos($haystack, $needle, null, $encoding);
8394
        if ($pos === false) {
8395
            return false;
8396
        }
8397
8398
        if ($before_needle) {
8399
            return self::substr($haystack, 0, $pos, $encoding);
8400
        }
8401
8402
        return self::substr($haystack, $pos, null, $encoding);
8403
    }
8404
8405
    /**
8406
     * Reverses characters order in the string.
8407
     *
8408
     * @param string $str <p>The input string.</p>
8409
     *
8410
     * @return string the string with characters in the reverse sequence
8411
     */
8412 10
    public static function strrev(string $str): string
8413
    {
8414 10
        if ($str === '') {
8415 4
            return '';
8416
        }
8417
8418 8
        $reversed = '';
8419 8
        $i = self::strlen($str);
8420 8
        while ($i--) {
8421 8
            $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8421
            $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8422
        }
8423
8424 8
        return $reversed;
8425
    }
8426
8427
    /**
8428
     * Finds the last occurrence of a character in a string within another, case insensitive.
8429
     *
8430
     * @see http://php.net/manual/en/function.mb-strrichr.php
8431
     *
8432
     * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8433
     * @param string $needle         <p>The string to find in haystack.</p>
8434
     * @param bool   $before_needle  [optional] <p>
8435
     *                               Determines which portion of haystack
8436
     *                               this function returns.
8437
     *                               If set to true, it returns all of haystack
8438
     *                               from the beginning to the last occurrence of needle.
8439
     *                               If set to false, it returns all of haystack
8440
     *                               from the last occurrence of needle to the end,
8441
     *                               </p>
8442
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8443
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8444
     *
8445
     * @return false|string the portion of haystack or<br>false if needle is not found
8446
     */
8447 3
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8448
    {
8449 3
        if ($haystack === '' || $needle === '') {
8450 2
            return false;
8451
        }
8452
8453 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8454 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8455
        }
8456
8457 3
        if ($cleanUtf8 === true) {
8458
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8459
            // if invalid characters are found in $haystack before $needle
8460 2
            $needle = self::clean($needle);
8461 2
            $haystack = self::clean($haystack);
8462
        }
8463
8464 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8465
            self::checkForSupport();
8466
        }
8467
8468
        //
8469
        // fallback via mbstring
8470
        //
8471
8472 3
        if (self::$SUPPORT['mbstring'] === true) {
8473 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8474
        }
8475
8476
        //
8477
        // fallback via vanilla php
8478
        //
8479
8480
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8481
        if ($needleTmp === false) {
8482
            return false;
8483
        }
8484
        $needle = (string) $needleTmp;
8485
8486
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8487
        if ($pos === false) {
8488
            return false;
8489
        }
8490
8491
        if ($before_needle) {
8492
            return self::substr($haystack, 0, $pos, $encoding);
8493
        }
8494
8495
        return self::substr($haystack, $pos, null, $encoding);
8496
    }
8497
8498
    /**
8499
     * Find position of last occurrence of a case-insensitive string.
8500
     *
8501
     * @param string     $haystack  <p>The string to look in.</p>
8502
     * @param int|string $needle    <p>The string to look for.</p>
8503
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8504
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8505
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8506
     *
8507
     * @return false|int
8508
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8509
     *                   string.<br>If needle is not found, it returns false.
8510
     */
8511 4
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8512
    {
8513 4
        if ($haystack === '') {
8514
            return false;
8515
        }
8516
8517
        // iconv and mbstring do not support integer $needle
8518 4
        if ((int) $needle === $needle && $needle >= 0) {
8519
            $needle = (string) self::chr($needle);
8520
        }
8521 4
        $needle = (string) $needle;
8522
8523 4
        if ($needle === '') {
8524
            return false;
8525
        }
8526
8527 4
        if ($cleanUtf8 === true) {
8528
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8529 2
            $needle = self::clean($needle);
8530 2
            $haystack = self::clean($haystack);
8531
        }
8532
8533 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8534 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8535
        }
8536
8537
        //
8538
        // fallback for binary || ascii only
8539
        //
8540
8541
        if (
8542 4
            $encoding === 'CP850'
8543
            ||
8544 4
            $encoding === 'ASCII'
8545
        ) {
8546
            return self::strripos_in_byte($haystack, $needle, $offset);
8547
        }
8548
8549 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8550
            self::checkForSupport();
8551
        }
8552
8553
        if (
8554 4
            $encoding !== 'UTF-8'
8555
            &&
8556 4
            self::$SUPPORT['mbstring'] === false
8557
        ) {
8558
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8559
        }
8560
8561
        //
8562
        // fallback via mbstrig
8563
        //
8564
8565 4
        if (self::$SUPPORT['mbstring'] === true) {
8566 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8567
        }
8568
8569
        //
8570
        // fallback via intl
8571
        //
8572
8573
        if (
8574
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8575
            &&
8576
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8577
            &&
8578
            self::$SUPPORT['intl'] === true
8579
        ) {
8580
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8581
            if ($returnTmp !== false) {
8582
                return $returnTmp;
8583
            }
8584
        }
8585
8586
        //
8587
        // fallback for ascii only
8588
        //
8589
8590
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8591
            return self::strripos_in_byte($haystack, $needle, $offset);
8592
        }
8593
8594
        //
8595
        // fallback via vanilla php
8596
        //
8597
8598
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8599
        $needle = self::strtocasefold($needle, true, false, $encoding);
8600
8601
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8602
    }
8603
8604
    /**
8605
     * Finds position of last occurrence of a string within another, case insensitive.
8606
     *
8607
     * @param string $haystack <p>
8608
     *                         The string from which to get the position of the last occurrence
8609
     *                         of needle.
8610
     *                         </p>
8611
     * @param string $needle   <p>
8612
     *                         The string to find in haystack.
8613
     *                         </p>
8614
     * @param int    $offset   [optional] <p>
8615
     *                         The position in haystack
8616
     *                         to start searching.
8617
     *                         </p>
8618
     *
8619
     * @return false|int return the numeric position of the last occurrence of needle in the
8620
     *                   haystack string, or false if needle is not found
8621
     */
8622
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8623
    {
8624
        if ($haystack === '' || $needle === '') {
8625
            return false;
8626
        }
8627
8628
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8629
            self::checkForSupport();
8630
        }
8631
8632
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8633
            // "mb_" is available if overload is used, so use it ...
8634
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8635
        }
8636
8637
        return \strripos($haystack, $needle, $offset);
8638
    }
8639
8640
    /**
8641
     * Find position of last occurrence of a string in a string.
8642
     *
8643
     * @see http://php.net/manual/en/function.mb-strrpos.php
8644
     *
8645
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8646
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8647
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8648
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8649
     *                              the end of the string.
8650
     *                              </p>
8651
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8652
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8653
     *
8654
     * @return false|int
8655
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8656
     *                   string.<br>If needle is not found, it returns false.
8657
     */
8658 38
    public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8659
    {
8660 38
        if ($haystack === '') {
8661 3
            return false;
8662
        }
8663
8664
        // iconv and mbstring do not support integer $needle
8665 37
        if ((int) $needle === $needle && $needle >= 0) {
8666 2
            $needle = (string) self::chr($needle);
8667
        }
8668 37
        $needle = (string) $needle;
8669
8670 37
        if ($needle === '') {
8671 2
            return false;
8672
        }
8673
8674 37
        if ($cleanUtf8 === true) {
8675
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8676 4
            $needle = self::clean($needle);
8677 4
            $haystack = self::clean($haystack);
8678
        }
8679
8680 37
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8681 14
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8682
        }
8683
8684
        //
8685
        // fallback for binary || ascii only
8686
        //
8687
8688
        if (
8689 37
            $encoding === 'CP850'
8690
            ||
8691 37
            $encoding === 'ASCII'
8692
        ) {
8693 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8693
            return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8694
        }
8695
8696 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8697
            self::checkForSupport();
8698
        }
8699
8700
        if (
8701 37
            $encoding !== 'UTF-8'
8702
            &&
8703 37
            self::$SUPPORT['mbstring'] === false
8704
        ) {
8705
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8706
        }
8707
8708
        //
8709
        // fallback via mbstring
8710
        //
8711
8712 37
        if (self::$SUPPORT['mbstring'] === true) {
8713 37
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8714
        }
8715
8716
        //
8717
        // fallback via intl
8718
        //
8719
8720
        if (
8721
            $offset !== null
8722
            &&
8723
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8724
            &&
8725
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8726
            &&
8727
            self::$SUPPORT['intl'] === true
8728
        ) {
8729
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8730
            if ($returnTmp !== false) {
8731
                return $returnTmp;
8732
            }
8733
        }
8734
8735
        //
8736
        // fallback for ascii only
8737
        //
8738
8739
        if (
8740
            $offset !== null
8741
            &&
8742
            self::is_ascii($haystack)
8743
            &&
8744
            self::is_ascii($needle)
8745
        ) {
8746
            return self::strrpos_in_byte($haystack, $needle, $offset);
8747
        }
8748
8749
        //
8750
        // fallback via vanilla php
8751
        //
8752
8753
        $haystackTmp = null;
8754
        if ($offset > 0) {
8755
            $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8755
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8756
        } elseif ($offset < 0) {
8757
            $haystackTmp = self::substr($haystack, 0, $offset);
8758
            $offset = 0;
8759
        }
8760
8761
        if ($haystackTmp !== null) {
8762
            if ($haystackTmp === false) {
8763
                $haystackTmp = '';
8764
            }
8765
            $haystack = (string) $haystackTmp;
8766
        }
8767
8768
        $pos = self::strrpos_in_byte($haystack, $needle);
8769
        if ($pos === false) {
8770
            return false;
8771
        }
8772
8773
        return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8774
    }
8775
8776
    /**
8777
     * Find position of last occurrence of a string in a string.
8778
     *
8779
     * @param string $haystack <p>
8780
     *                         The string being checked, for the last occurrence
8781
     *                         of needle.
8782
     *                         </p>
8783
     * @param string $needle   <p>
8784
     *                         The string to find in haystack.
8785
     *                         </p>
8786
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8787
     *                         the string. Negative values will stop searching at an arbitrary point
8788
     *                         prior to the end of the string.
8789
     *
8790
     * @return false|int The numeric position of the last occurrence of needle in the
8791
     *                   haystack string. If needle is not found, it returns false.
8792
     */
8793 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8794
    {
8795 2
        if ($haystack === '' || $needle === '') {
8796
            return false;
8797
        }
8798
8799 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8800
            self::checkForSupport();
8801
        }
8802
8803 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8804
            // "mb_" is available if overload is used, so use it ...
8805
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8806
        }
8807
8808 2
        return \strrpos($haystack, $needle, $offset);
8809
    }
8810
8811
    /**
8812
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8813
     * mask.
8814
     *
8815
     * @param string $str    <p>The input string.</p>
8816
     * @param string $mask   <p>The mask of chars</p>
8817
     * @param int    $offset [optional]
8818
     * @param int    $length [optional]
8819
     *
8820
     * @return int
8821
     */
8822 10
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8823
    {
8824 10
        if ($offset || $length !== null) {
8825 2
            $strTmp = self::substr($str, $offset, $length);
8826 2
            if ($strTmp === false) {
8827
                $strTmp = '';
8828
            }
8829 2
            $str = (string) $strTmp;
8830
        }
8831
8832 10
        if ($str === '' || $mask === '') {
8833 2
            return 0;
8834
        }
8835
8836 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8836
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8837
    }
8838
8839
    /**
8840
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8841
     *
8842
     * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8843
     * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8844
     * @param bool   $before_needle  [optional] <p>
8845
     *                               If <b>TRUE</b>, strstr() returns the part of the
8846
     *                               haystack before the first occurrence of the needle (excluding the needle).
8847
     *                               </p>
8848
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8849
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8850
     *
8851
     * @return false|string
8852
     *                       A sub-string,<br>or <strong>false</strong> if needle is not found
8853
     */
8854 5
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8855
    {
8856 5
        if ($haystack === '' || $needle === '') {
8857 2
            return false;
8858
        }
8859
8860 5
        if ($cleanUtf8 === true) {
8861
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8862
            // if invalid characters are found in $haystack before $needle
8863
            $needle = self::clean($needle);
8864
            $haystack = self::clean($haystack);
8865
        }
8866
8867 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8868 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8869
        }
8870
8871
        //
8872
        // fallback for binary || ascii only
8873
        //
8874
8875
        if (
8876 5
            $encoding === 'CP850'
8877
            ||
8878 5
            $encoding === 'ASCII'
8879
        ) {
8880
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8881
        }
8882
8883 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8884
            self::checkForSupport();
8885
        }
8886
8887
        if (
8888 5
            $encoding !== 'UTF-8'
8889
            &&
8890 5
            self::$SUPPORT['mbstring'] === false
8891
        ) {
8892
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8893
        }
8894
8895
        //
8896
        // fallback via mbstring
8897
        //
8898
8899 5
        if (self::$SUPPORT['mbstring'] === true) {
8900 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8901
        }
8902
8903
        //
8904
        // fallback via intl
8905
        //
8906
8907
        if (
8908
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8909
            &&
8910
            self::$SUPPORT['intl'] === true
8911
        ) {
8912
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8913
            if ($returnTmp !== false) {
8914
                return $returnTmp;
8915
            }
8916
        }
8917
8918
        //
8919
        // fallback for ascii only
8920
        //
8921
8922
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8923
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8924
        }
8925
8926
        //
8927
        // fallback via vanilla php
8928
        //
8929
8930
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8931
8932
        if (!isset($match[1])) {
8933
            return false;
8934
        }
8935
8936
        if ($before_needle) {
8937
            return $match[1];
8938
        }
8939
8940
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8940
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8941
    }
8942
8943
    /**
8944
     *  * Finds first occurrence of a string within another.
8945
     *
8946
     * @param string $haystack      <p>
8947
     *                              The string from which to get the first occurrence
8948
     *                              of needle.
8949
     *                              </p>
8950
     * @param string $needle        <p>
8951
     *                              The string to find in haystack.
8952
     *                              </p>
8953
     * @param bool   $before_needle [optional] <p>
8954
     *                              Determines which portion of haystack
8955
     *                              this function returns.
8956
     *                              If set to true, it returns all of haystack
8957
     *                              from the beginning to the first occurrence of needle.
8958
     *                              If set to false, it returns all of haystack
8959
     *                              from the first occurrence of needle to the end,
8960
     *                              </p>
8961
     *
8962
     * @return false|string the portion of haystack,
8963
     *                      or false if needle is not found
8964
     */
8965
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8966
    {
8967
        if ($haystack === '' || $needle === '') {
8968
            return false;
8969
        }
8970
8971
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8972
            self::checkForSupport();
8973
        }
8974
8975
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8976
            // "mb_" is available if overload is used, so use it ...
8977
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8978
        }
8979
8980
        return \strstr($haystack, $needle, $before_needle);
8981
    }
8982
8983
    /**
8984
     * Unicode transformation for case-less matching.
8985
     *
8986
     * @see http://unicode.org/reports/tr21/tr21-5.html
8987
     *
8988
     * @param string      $str       <p>The input string.</p>
8989
     * @param bool        $full      [optional] <p>
8990
     *                               <b>true</b>, replace full case folding chars (default)<br>
8991
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8992
     *                               </p>
8993
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8994
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8995
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8996
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8997
     *                               is for some languages better ...</p>
8998
     *
8999
     * @return string
9000
     */
9001 53
    public static function strtocasefold(
9002
        string $str,
9003
        bool $full = true,
9004
        bool $cleanUtf8 = false,
9005
        string $encoding = 'UTF-8',
9006
        string $lang = null,
9007
        $lower = true
9008
    ): string {
9009 53
        if ($str === '') {
9010 5
            return '';
9011
        }
9012
9013 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
9014
9015 52
        if ($lower === true) {
9016 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9017
        }
9018
9019 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9020
    }
9021
9022
    /**
9023
     * Make a string lowercase.
9024
     *
9025
     * @see http://php.net/manual/en/function.mb-strtolower.php
9026
     *
9027
     * @param string      $str                   <p>The string being lowercased.</p>
9028
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9029
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9030
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9031
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9032
     *
9033
     * @return string string with all alphabetic characters converted to lowercase
9034
     */
9035 156
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9036
    {
9037
        // init
9038 156
        $str = (string) $str;
9039
9040 156
        if ($str === '') {
9041 12
            return '';
9042
        }
9043
9044 154
        if ($cleanUtf8 === true) {
9045
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9046
            // if invalid characters are found in $haystack before $needle
9047 4
            $str = self::clean($str);
9048
        }
9049
9050 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9051 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9052
        }
9053
9054
        // hack for old php version or for the polyfill ...
9055 154
        if ($tryToKeepStringLength === true) {
9056
            $str = self::fixStrCaseHelper($str, true);
9057
        }
9058
9059 154
        if ($lang !== null) {
9060 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9061
                self::checkForSupport();
9062
            }
9063
9064 2
            if (self::$SUPPORT['intl'] === true) {
9065 2
                $langCode = $lang . '-Lower';
9066 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9067
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9068
9069
                    $langCode = 'Any-Lower';
9070
                }
9071
9072
                /** @noinspection PhpComposerExtensionStubsInspection */
9073 2
                return \transliterator_transliterate($langCode, $str);
9074
            }
9075
9076
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9077
        }
9078
9079
        // always fallback via symfony polyfill
9080 154
        return \mb_strtolower($str, $encoding);
9081
    }
9082
9083
    /**
9084
     * Generic case sensitive transformation for collation matching.
9085
     *
9086
     * @param string $str <p>The input string</p>
9087
     *
9088
     * @return string
9089
     */
9090 6
    private static function strtonatfold(string $str): string
9091
    {
9092
        /** @noinspection PhpUndefinedClassInspection */
9093 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
9094
    }
9095
9096
    /**
9097
     * Make a string uppercase.
9098
     *
9099
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9100
     *
9101
     * @param string      $str                   <p>The string being uppercased.</p>
9102
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9103
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9104
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9105
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9106
     *
9107
     * @return string string with all alphabetic characters converted to uppercase
9108
     */
9109 163
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9110
    {
9111
        // init
9112 163
        $str = (string) $str;
9113
9114 163
        if ($str === '') {
9115 12
            return '';
9116
        }
9117
9118 161
        if ($cleanUtf8 === true) {
9119
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9120
            // if invalid characters are found in $haystack before $needle
9121 3
            $str = self::clean($str);
9122
        }
9123
9124 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9125 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9126
        }
9127
9128
        // hack for old php version or for the polyfill ...
9129 161
        if ($tryToKeepStringLength === true) {
9130 2
            $str = self::fixStrCaseHelper($str, false);
9131
        }
9132
9133 161
        if ($lang !== null) {
9134 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9135
                self::checkForSupport();
9136
            }
9137
9138 2
            if (self::$SUPPORT['intl'] === true) {
9139 2
                $langCode = $lang . '-Upper';
9140 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9141
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9142
9143
                    $langCode = 'Any-Upper';
9144
                }
9145
9146
                /** @noinspection PhpComposerExtensionStubsInspection */
9147 2
                return \transliterator_transliterate($langCode, $str);
9148
            }
9149
9150
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9151
        }
9152
9153
        // always fallback via symfony polyfill
9154 161
        return \mb_strtoupper($str, $encoding);
9155
    }
9156
9157
    /**
9158
     * Translate characters or replace sub-strings.
9159
     *
9160
     * @see  http://php.net/manual/en/function.strtr.php
9161
     *
9162
     * @param string          $str  <p>The string being translated.</p>
9163
     * @param string|string[] $from <p>The string replacing from.</p>
9164
     * @param string|string[] $to   <p>The string being translated to to.</p>
9165
     *
9166
     * @return string
9167
     *                This function returns a copy of str, translating all occurrences of each character in from to the
9168
     *                corresponding character in to
9169
     */
9170 2
    public static function strtr(string $str, $from, $to = \INF): string
9171
    {
9172 2
        if ($str === '') {
9173
            return '';
9174
        }
9175
9176 2
        if ($from === $to) {
9177
            return $str;
9178
        }
9179
9180 2
        if ($to !== \INF) {
9181 2
            $from = self::str_split($from);
9182 2
            $to = self::str_split($to);
9183 2
            $countFrom = \count($from);
9184 2
            $countTo = \count($to);
9185
9186 2
            if ($countFrom > $countTo) {
9187 2
                $from = \array_slice($from, 0, $countTo);
9188 2
            } elseif ($countFrom < $countTo) {
9189 2
                $to = \array_slice($to, 0, $countFrom);
9190
            }
9191
9192 2
            $from = \array_combine($from, $to);
9193
        }
9194
9195 2
        if (\is_string($from)) {
9196 2
            return \str_replace($from, '', $str);
9197
        }
9198
9199 2
        return \strtr($str, $from);
9200
    }
9201
9202
    /**
9203
     * Return the width of a string.
9204
     *
9205
     * @param string $str       <p>The input string.</p>
9206
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9207
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9208
     *
9209
     * @return int
9210
     */
9211 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9212
    {
9213 2
        if ($str === '') {
9214 2
            return 0;
9215
        }
9216
9217 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9218 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9219
        }
9220
9221 2
        if ($cleanUtf8 === true) {
9222
            // iconv and mbstring are not tolerant to invalid encoding
9223
            // further, their behaviour is inconsistent with that of PHP's substr
9224 2
            $str = self::clean($str);
9225
        }
9226
9227 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9228
            self::checkForSupport();
9229
        }
9230
9231
        //
9232
        // fallback via mbstring
9233
        //
9234
9235 2
        if (self::$SUPPORT['mbstring'] === true) {
9236 2
            return \mb_strwidth($str, $encoding);
9237
        }
9238
9239
        //
9240
        // fallback via vanilla php
9241
        //
9242
9243
        if ($encoding !== 'UTF-8') {
9244
            $str = self::encode('UTF-8', $str, false, $encoding);
9245
        }
9246
9247
        $wide = 0;
9248
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9249
9250
        return ($wide << 1) + self::strlen($str, 'UTF-8');
9251
    }
9252
9253
    /**
9254
     * Get part of a string.
9255
     *
9256
     * @see http://php.net/manual/en/function.mb-substr.php
9257
     *
9258
     * @param string $str       <p>The string being checked.</p>
9259
     * @param int    $offset    <p>The first position used in str.</p>
9260
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9261
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9262
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9263
     *
9264
     * @return false|string
9265
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9266
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9267
     *                      characters long, <b>FALSE</b> will be returned.
9268
     */
9269 401
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9270
    {
9271 401
        if ($str === '') {
9272 26
            return '';
9273
        }
9274
9275
        // Empty string
9276 396
        if ($length === 0) {
9277 20
            return '';
9278
        }
9279
9280 393
        if ($cleanUtf8 === true) {
9281
            // iconv and mbstring are not tolerant to invalid encoding
9282
            // further, their behaviour is inconsistent with that of PHP's substr
9283 2
            $str = self::clean($str);
9284
        }
9285
9286
        // Whole string
9287 393
        if (!$offset && $length === null) {
9288 40
            return $str;
9289
        }
9290
9291 364
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9292 161
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9293
        }
9294
9295 364
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9296
            self::checkForSupport();
9297
        }
9298
9299
        //
9300
        // fallback for binary || ascii only
9301
        //
9302
9303
        if (
9304 364
            $encoding === 'CP850'
9305
            ||
9306 364
            $encoding === 'ASCII'
9307
        ) {
9308 2
            return self::substr_in_byte($str, $offset, $length);
9309
        }
9310
9311
        //
9312
        // fallback via mbstring
9313
        //
9314
9315 362
        if (self::$SUPPORT['mbstring'] === true) {
9316 362
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9317 362
            if ($return !== false) {
9318 362
                return $return;
9319
            }
9320
        }
9321
9322
        // otherwise we need the string-length and can't fake it via "2147483647"
9323 4
        $str_length = 0;
9324 4
        if ($offset || $length === null) {
9325 4
            $str_length = self::strlen($str, $encoding);
9326
        }
9327
9328
        // e.g.: invalid chars + mbstring not installed
9329 4
        if ($str_length === false) {
9330
            return false;
9331
        }
9332
9333
        // Empty string
9334 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9335
            return '';
9336
        }
9337
9338
        // Impossible
9339 4
        if ($offset && $offset > $str_length) {
9340
            // "false" is the php native return type here,
9341
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9342
            return '';
9343
        }
9344
9345 4
        if ($length === null) {
9346 4
            $length = (int) $str_length;
9347
        } else {
9348 2
            $length = (int) $length;
9349
        }
9350
9351
        if (
9352 4
            $encoding !== 'UTF-8'
9353
            &&
9354 4
            self::$SUPPORT['mbstring'] === false
9355
        ) {
9356 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9357
        }
9358
9359
        //
9360
        // fallback via intl
9361
        //
9362
9363
        if (
9364 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9365
            &&
9366 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
9367
            &&
9368 4
            self::$SUPPORT['intl'] === true
9369
        ) {
9370
            $returnTmp = \grapheme_substr($str, $offset, $length);
9371
            if ($returnTmp !== false) {
9372
                return $returnTmp;
9373
            }
9374
        }
9375
9376
        //
9377
        // fallback via iconv
9378
        //
9379
9380
        if (
9381 4
            $length >= 0 // "iconv_substr()" can't handle negative length
9382
            &&
9383 4
            self::$SUPPORT['iconv'] === true
9384
        ) {
9385
            $returnTmp = \iconv_substr($str, $offset, $length);
9386
            if ($returnTmp !== false) {
9387
                return $returnTmp;
9388
            }
9389
        }
9390
9391
        //
9392
        // fallback for ascii only
9393
        //
9394
9395 4
        if (self::is_ascii($str)) {
9396
            return \substr($str, $offset, $length);
9397
        }
9398
9399
        //
9400
        // fallback via vanilla php
9401
        //
9402
9403
        // split to array, and remove invalid characters
9404 4
        $array = self::split($str);
9405
9406
        // extract relevant part, and join to make sting again
9407 4
        return \implode('', \array_slice($array, $offset, $length));
9408
    }
9409
9410
    /**
9411
     * Binary safe comparison of two strings from an offset, up to length characters.
9412
     *
9413
     * @param string   $str1               <p>The main string being compared.</p>
9414
     * @param string   $str2               <p>The secondary string being compared.</p>
9415
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9416
     *                                     counting from the end of the string.</p>
9417
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9418
     *                                     of the length of the str compared to the length of main_str less the
9419
     *                                     offset.</p>
9420
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9421
     *                                     insensitive.</p>
9422
     *
9423
     * @return int
9424
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9425
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9426
     *             <strong>0</strong> if they are equal
9427
     */
9428 2
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9429
    {
9430
        if (
9431 2
            $offset !== 0
9432
            ||
9433 2
            $length !== null
9434
        ) {
9435 2
            $str1Tmp = self::substr($str1, $offset, $length);
9436 2
            if ($str1Tmp === false) {
9437
                $str1Tmp = '';
9438
            }
9439 2
            $str1 = (string) $str1Tmp;
9440
9441 2
            $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9441
            $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9442 2
            if ($str2Tmp === false) {
9443
                $str2Tmp = '';
9444
            }
9445 2
            $str2 = (string) $str2Tmp;
9446
        }
9447
9448 2
        if ($case_insensitivity === true) {
9449 2
            return self::strcasecmp($str1, $str2);
9450
        }
9451
9452 2
        return self::strcmp($str1, $str2);
9453
    }
9454
9455
    /**
9456
     * Count the number of substring occurrences.
9457
     *
9458
     * @see  http://php.net/manual/en/function.substr-count.php
9459
     *
9460
     * @param string $haystack   <p>The string to search in.</p>
9461
     * @param string $needle     <p>The substring to search for.</p>
9462
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9463
     * @param int    $length     [optional] <p>
9464
     *                           The maximum length after the specified offset to search for the
9465
     *                           substring. It outputs a warning if the offset plus the length is
9466
     *                           greater than the haystack length.
9467
     *                           </p>
9468
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9469
     * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9470
     *
9471
     * @return false|int this functions returns an integer or false if there isn't a string
9472
     */
9473 18
    public static function substr_count(
9474
        string $haystack,
9475
        string $needle,
9476
        int $offset = 0,
9477
        int $length = null,
9478
        string $encoding = 'UTF-8',
9479
        bool $cleanUtf8 = false
9480
    ) {
9481 18
        if ($haystack === '' || $needle === '') {
9482 2
            return false;
9483
        }
9484
9485 18
        if ($offset || $length !== null) {
9486 2
            if ($length === null) {
9487 2
                $lengthTmp = self::strlen($haystack);
9488 2
                if ($lengthTmp === false) {
9489
                    return false;
9490
                }
9491 2
                $length = (int) $lengthTmp;
9492
            }
9493
9494
            if (
9495
                (
9496 2
                    $length !== 0
9497
                    &&
9498 2
                    $offset !== 0
9499
                )
9500
                &&
9501 2
                ($length + $offset) <= 0
9502
                &&
9503 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9504
            ) {
9505
                return false;
9506
            }
9507
9508 2
            $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9509 2
            if ($haystackTmp === false) {
9510
                $haystackTmp = '';
9511
            }
9512 2
            $haystack = (string) $haystackTmp;
9513
        }
9514
9515 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9516 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9517
        }
9518
9519 18
        if ($cleanUtf8 === true) {
9520
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9521
            // if invalid characters are found in $haystack before $needle
9522
            $needle = self::clean($needle);
9523
            $haystack = self::clean($haystack);
9524
        }
9525
9526 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9527
            self::checkForSupport();
9528
        }
9529
9530
        if (
9531 18
            $encoding !== 'UTF-8'
9532
            &&
9533 18
            self::$SUPPORT['mbstring'] === false
9534
        ) {
9535
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9536
        }
9537
9538 18
        if (self::$SUPPORT['mbstring'] === true) {
9539 18
            return \mb_substr_count($haystack, $needle, $encoding);
9540
        }
9541
9542
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9543
9544
        return \count($matches);
9545
    }
9546
9547
    /**
9548
     * Count the number of substring occurrences.
9549
     *
9550
     * @param string $haystack <p>
9551
     *                         The string being checked.
9552
     *                         </p>
9553
     * @param string $needle   <p>
9554
     *                         The string being found.
9555
     *                         </p>
9556
     * @param int    $offset   [optional] <p>
9557
     *                         The offset where to start counting
9558
     *                         </p>
9559
     * @param int    $length   [optional] <p>
9560
     *                         The maximum length after the specified offset to search for the
9561
     *                         substring. It outputs a warning if the offset plus the length is
9562
     *                         greater than the haystack length.
9563
     *                         </p>
9564
     *
9565
     * @return false|int the number of times the
9566
     *                   needle substring occurs in the
9567
     *                   haystack string
9568
     */
9569 36
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9570
    {
9571 36
        if ($haystack === '' || $needle === '') {
9572
            return 0;
9573
        }
9574
9575 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9576
            self::checkForSupport();
9577
        }
9578
9579
        if (
9580 36
            ($offset || $length !== null)
9581
            &&
9582 36
            self::$SUPPORT['mbstring_func_overload'] === true
9583
        ) {
9584
            if ($length === null) {
9585
                $lengthTmp = self::strlen($haystack);
9586
                if ($lengthTmp === false) {
9587
                    return false;
9588
                }
9589
                $length = (int) $lengthTmp;
9590
            }
9591
9592
            if (
9593
                (
9594
                    $length !== 0
9595
                    &&
9596
                    $offset !== 0
9597
                )
9598
                &&
9599
                ($length + $offset) <= 0
9600
                &&
9601
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9602
            ) {
9603
                return false;
9604
            }
9605
9606
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9607
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9608
                $haystackTmp = '';
9609
            }
9610
            $haystack = (string) $haystackTmp;
9611
        }
9612
9613 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9614
            // "mb_" is available if overload is used, so use it ...
9615
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9616
        }
9617
9618 36
        return \substr_count($haystack, $needle, $offset, $length);
9619
    }
9620
9621
    /**
9622
     * Returns the number of occurrences of $substring in the given string.
9623
     * By default, the comparison is case-sensitive, but can be made insensitive
9624
     * by setting $caseSensitive to false.
9625
     *
9626
     * @param string $str           <p>The input string.</p>
9627
     * @param string $substring     <p>The substring to search for.</p>
9628
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9629
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9630
     *
9631
     * @return int
9632
     */
9633 15
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9634
    {
9635 15
        if ($str === '' || $substring === '') {
9636 2
            return 0;
9637
        }
9638
9639
        // only a fallback to prevent BC in the api ...
9640 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9641 4
            $encoding = (string) $caseSensitive;
9642
        }
9643
9644 13
        if (!$caseSensitive) {
9645 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9646 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9647
        }
9648
9649 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9650
    }
9651
9652
    /**
9653
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9654
     *
9655
     * @param string $haystack <p>The string to search in.</p>
9656
     * @param string $needle   <p>The substring to search for.</p>
9657
     *
9658
     * @return string return the sub-string
9659
     */
9660 2
    public static function substr_ileft(string $haystack, string $needle): string
9661
    {
9662 2
        if ($haystack === '') {
9663 2
            return '';
9664
        }
9665
9666 2
        if ($needle === '') {
9667 2
            return $haystack;
9668
        }
9669
9670 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9671 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9671
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9672 2
            if ($haystackTmp === false) {
9673
                $haystackTmp = '';
9674
            }
9675 2
            $haystack = (string) $haystackTmp;
9676
        }
9677
9678 2
        return $haystack;
9679
    }
9680
9681
    /**
9682
     * Get part of a string process in bytes.
9683
     *
9684
     * @param string $str    <p>The string being checked.</p>
9685
     * @param int    $offset <p>The first position used in str.</p>
9686
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9687
     *
9688
     * @return false|string
9689
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9690
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9691
     *                      characters long, <b>FALSE</b> will be returned.
9692
     */
9693 51
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9694
    {
9695 51
        if ($str === '') {
9696
            return '';
9697
        }
9698
9699
        // Empty string
9700 51
        if ($length === 0) {
9701
            return '';
9702
        }
9703
9704
        // Whole string
9705 51
        if (!$offset && $length === null) {
9706
            return $str;
9707
        }
9708
9709 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9710
            self::checkForSupport();
9711
        }
9712
9713 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9714
            // "mb_" is available if overload is used, so use it ...
9715
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9716
        }
9717
9718 51
        return \substr($str, $offset, $length ?? 2147483647);
9719
    }
9720
9721
    /**
9722
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9723
     *
9724
     * @param string $haystack <p>The string to search in.</p>
9725
     * @param string $needle   <p>The substring to search for.</p>
9726
     *
9727
     * @return string return the sub-string
9728
     */
9729 2
    public static function substr_iright(string $haystack, string $needle): string
9730
    {
9731 2
        if ($haystack === '') {
9732 2
            return '';
9733
        }
9734
9735 2
        if ($needle === '') {
9736 2
            return $haystack;
9737
        }
9738
9739 2
        if (self::str_iends_with($haystack, $needle) === true) {
9740 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9741 2
            if ($haystackTmp === false) {
9742
                $haystackTmp = '';
9743
            }
9744 2
            $haystack = (string) $haystackTmp;
9745
        }
9746
9747 2
        return $haystack;
9748
    }
9749
9750
    /**
9751
     * Removes an prefix ($needle) from start of the string ($haystack).
9752
     *
9753
     * @param string $haystack <p>The string to search in.</p>
9754
     * @param string $needle   <p>The substring to search for.</p>
9755
     *
9756
     * @return string return the sub-string
9757
     */
9758 2
    public static function substr_left(string $haystack, string $needle): string
9759
    {
9760 2
        if ($haystack === '') {
9761 2
            return '';
9762
        }
9763
9764 2
        if ($needle === '') {
9765 2
            return $haystack;
9766
        }
9767
9768 2
        if (self::str_starts_with($haystack, $needle) === true) {
9769 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9769
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9770 2
            if ($haystackTmp === false) {
9771
                $haystackTmp = '';
9772
            }
9773 2
            $haystack = (string) $haystackTmp;
9774
        }
9775
9776 2
        return $haystack;
9777
    }
9778
9779
    /**
9780
     * Replace text within a portion of a string.
9781
     *
9782
     * source: https://gist.github.com/stemar/8287074
9783
     *
9784
     * @param string|string[] $str              <p>The input string or an array of stings.</p>
9785
     * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9786
     * @param int|int[]       $offset           <p>
9787
     *                                          If start is positive, the replacing will begin at the start'th offset
9788
     *                                          into string.
9789
     *                                          <br><br>
9790
     *                                          If start is negative, the replacing will begin at the start'th character
9791
     *                                          from the end of string.
9792
     *                                          </p>
9793
     * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9794
     *                                          portion of string which is to be replaced. If it is negative, it
9795
     *                                          represents the number of characters from the end of string at which to
9796
     *                                          stop replacing. If it is not given, then it will default to strlen(
9797
     *                                          string ); i.e. end the replacing at the end of string. Of course, if
9798
     *                                          length is zero then this function will have the effect of inserting
9799
     *                                          replacement into string at the given start offset.</p>
9800
     * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9801
     *
9802
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9803
     */
9804 10
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9805
    {
9806 10
        if (\is_array($str) === true) {
9807 1
            $num = \count($str);
9808
9809
            // the replacement
9810 1
            if (\is_array($replacement) === true) {
9811 1
                $replacement = \array_slice($replacement, 0, $num);
9812
            } else {
9813 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9814
            }
9815
9816
            // the offset
9817 1
            if (\is_array($offset) === true) {
9818 1
                $offset = \array_slice($offset, 0, $num);
9819 1
                foreach ($offset as &$valueTmp) {
9820 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9821
                }
9822 1
                unset($valueTmp);
9823
            } else {
9824 1
                $offset = \array_pad([$offset], $num, $offset);
9825
            }
9826
9827
            // the length
9828 1
            if ($length === null) {
9829 1
                $length = \array_fill(0, $num, 0);
9830 1
            } elseif (\is_array($length) === true) {
9831 1
                $length = \array_slice($length, 0, $num);
9832 1
                foreach ($length as &$valueTmpV2) {
9833 1
                    if ($valueTmpV2 !== null) {
9834 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9835
                    } else {
9836 1
                        $valueTmpV2 = 0;
9837
                    }
9838
                }
9839 1
                unset($valueTmpV2);
9840
            } else {
9841 1
                $length = \array_pad([$length], $num, $length);
9842
            }
9843
9844
            // recursive call
9845 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9846
        }
9847
9848 10
        if (\is_array($replacement) === true) {
9849 1
            if (\count($replacement) > 0) {
9850 1
                $replacement = $replacement[0];
9851
            } else {
9852 1
                $replacement = '';
9853
            }
9854
        }
9855
9856
        // init
9857 10
        $str = (string) $str;
9858 10
        $replacement = (string) $replacement;
9859
9860 10
        if ($str === '') {
9861 1
            return $replacement;
9862
        }
9863
9864 9
        if (self::is_ascii($str)) {
9865 6
            return ($length === null) ?
9866
                \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9866
                \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9867 6
                \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9867
                \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9868
        }
9869
9870 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9871
            self::checkForSupport();
9872
        }
9873
9874 8
        if (self::$SUPPORT['mbstring'] === true) {
9875 8
            $string_length = self::strlen($str, $encoding);
9876
9877 8
            if ($offset < 0) {
9878 1
                $offset = \max(0, $string_length + $offset);
9879 8
            } elseif ($offset > $string_length) {
9880
                $offset = $string_length;
9881
            }
9882
9883 8
            if ($length < 0) {
9884 1
                $length = \max(0, $string_length - $offset + $length);
9885 8
            } elseif ($length === null || $length > $string_length) {
9886 3
                $length = $string_length;
9887
            }
9888
9889 8
            if (($offset + $length) > $string_length) {
9890 3
                $length = $string_length - $offset;
9891
            }
9892
9893 8
            return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9893
            return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9893
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9894
        }
9895
9896
        \preg_match_all('/./us', $str, $smatches);
9897
        \preg_match_all('/./us', $replacement, $rmatches);
9898
9899
        if ($length === null) {
9900
            $lengthTmp = self::strlen($str, $encoding);
9901
            if ($lengthTmp === false) {
9902
                // e.g.: non mbstring support + invalid chars
9903
                return '';
9904
            }
9905
            $length = (int) $lengthTmp;
9906
        }
9907
9908
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9908
        \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9908
        \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
9909
9910
        return \implode('', $smatches[0]);
9911
    }
9912
9913
    /**
9914
     * Removes an suffix ($needle) from end of the string ($haystack).
9915
     *
9916
     * @param string $haystack <p>The string to search in.</p>
9917
     * @param string $needle   <p>The substring to search for.</p>
9918
     *
9919
     * @return string return the sub-string
9920
     */
9921 2
    public static function substr_right(string $haystack, string $needle): string
9922
    {
9923 2
        if ($haystack === '') {
9924 2
            return '';
9925
        }
9926
9927 2
        if ($needle === '') {
9928 2
            return $haystack;
9929
        }
9930
9931 2
        if (self::str_ends_with($haystack, $needle) === true) {
9932 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9933 2
            if ($haystackTmp === false) {
9934
                $haystackTmp = '';
9935
            }
9936 2
            $haystack = (string) $haystackTmp;
9937
        }
9938
9939 2
        return $haystack;
9940
    }
9941
9942
    /**
9943
     * Returns a case swapped version of the string.
9944
     *
9945
     * @param string $str       <p>The input string.</p>
9946
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9947
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9948
     *
9949
     * @return string each character's case swapped
9950
     */
9951 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9952
    {
9953 6
        if ($str === '') {
9954 1
            return '';
9955
        }
9956
9957 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9958 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9959
        }
9960
9961 6
        if ($cleanUtf8 === true) {
9962
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9963
            // if invalid characters are found in $haystack before $needle
9964 2
            $str = self::clean($str);
9965
        }
9966
9967 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9968
    }
9969
9970
    /**
9971
     * Checks whether mbstring is available on the server.
9972
     *
9973
     * @return bool
9974
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
9975
     */
9976
    public static function symfony_polyfill_used(): bool
9977
    {
9978
        // init
9979
        $return = false;
9980
9981
        $returnTmp = \extension_loaded('mbstring') ? true : false;
9982
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9983
            $return = true;
9984
        }
9985
9986
        $returnTmp = \extension_loaded('iconv') ? true : false;
9987
        if ($returnTmp === false && \function_exists('iconv')) {
9988
            $return = true;
9989
        }
9990
9991
        return $return;
9992
    }
9993
9994
    /**
9995
     * @param string $str
9996
     * @param int    $tabLength
9997
     *
9998
     * @return string
9999
     */
10000 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10001
    {
10002 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
10003
    }
10004
10005
    /**
10006
     * Converts the first character of each word in the string to uppercase
10007
     * and all other chars to lowercase.
10008
     *
10009
     * @param string      $str                   <p>The input string.</p>
10010
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10011
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10012
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10013
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10014
     *
10015
     * @return string string with all characters of $str being title-cased
10016
     */
10017 5
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10018
    {
10019 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10020 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10021
        }
10022
10023 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
10024
    }
10025
10026
    /**
10027
     * alias for "UTF8::to_ascii()"
10028
     *
10029
     * @see        UTF8::to_ascii()
10030
     *
10031
     * @param string $str
10032
     * @param string $subst_chr
10033
     * @param bool   $strict
10034
     *
10035
     * @return string
10036
     *
10037
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10038
     */
10039 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10040
    {
10041 7
        return self::to_ascii($str, $subst_chr, $strict);
10042
    }
10043
10044
    /**
10045
     * alias for "UTF8::to_iso8859()"
10046
     *
10047
     * @see        UTF8::to_iso8859()
10048
     *
10049
     * @param string|string[] $str
10050
     *
10051
     * @return string|string[]
10052
     *
10053
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10054
     */
10055 2
    public static function toIso8859($str)
10056
    {
10057 2
        return self::to_iso8859($str);
10058
    }
10059
10060
    /**
10061
     * alias for "UTF8::to_latin1()"
10062
     *
10063
     * @see        UTF8::to_latin1()
10064
     *
10065
     * @param string|string[] $str
10066
     *
10067
     * @return string|string[]
10068
     *
10069
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10070
     */
10071 2
    public static function toLatin1($str)
10072
    {
10073 2
        return self::to_latin1($str);
10074
    }
10075
10076
    /**
10077
     * alias for "UTF8::to_utf8()"
10078
     *
10079
     * @see        UTF8::to_utf8()
10080
     *
10081
     * @param string|string[] $str
10082
     *
10083
     * @return string|string[]
10084
     *
10085
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10086
     */
10087 2
    public static function toUTF8($str)
10088
    {
10089 2
        return self::to_utf8($str);
10090
    }
10091
10092
    /**
10093
     * Convert a string into ASCII.
10094
     *
10095
     * @param string $str     <p>The input string.</p>
10096
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10097
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10098
     *                        performance</p>
10099
     *
10100
     * @return string
10101
     */
10102 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10103
    {
10104 38
        static $UTF8_TO_ASCII;
10105
10106 38
        if ($str === '') {
10107 3
            return '';
10108
        }
10109
10110
        // check if we only have ASCII, first (better performance)
10111 35
        if (self::is_ascii($str) === true) {
10112 9
            return $str;
10113
        }
10114
10115 28
        $str = self::clean(
10116 28
            $str,
10117 28
            true,
10118 28
            true,
10119 28
            true,
10120 28
            false,
10121 28
            true,
10122 28
            true
10123
        );
10124
10125
        // check again, if we only have ASCII, now ...
10126 28
        if (self::is_ascii($str) === true) {
10127 10
            return $str;
10128
        }
10129
10130 19
        if ($strict === true) {
10131 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10132
                self::checkForSupport();
10133
            }
10134
10135 1
            if (self::$SUPPORT['intl'] === true) {
10136
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10137
                /** @noinspection PhpComposerExtensionStubsInspection */
10138 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10139
10140
                // check again, if we only have ASCII, now ...
10141 1
                if (self::is_ascii($str) === true) {
10142 1
                    return $str;
10143
                }
10144
            }
10145
        }
10146
10147 19
        if (self::$ORD === null) {
10148
            self::$ORD = self::getData('ord');
10149
        }
10150
10151 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10152 19
        $chars = $ar[0];
10153 19
        $ord = null;
10154 19
        foreach ($chars as &$c) {
10155 19
            $ordC0 = self::$ORD[$c[0]];
10156
10157 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
10158 15
                continue;
10159
            }
10160
10161 19
            $ordC1 = self::$ORD[$c[1]];
10162
10163
            // ASCII - next please
10164 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
10165 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10166
            }
10167
10168 19
            if ($ordC0 >= 224) {
10169 8
                $ordC2 = self::$ORD[$c[2]];
10170
10171 8
                if ($ordC0 <= 239) {
10172 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10173
                }
10174
10175 8
                if ($ordC0 >= 240) {
10176 2
                    $ordC3 = self::$ORD[$c[3]];
10177
10178 2
                    if ($ordC0 <= 247) {
10179 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10180
                    }
10181
10182 2
                    if ($ordC0 >= 248) {
10183
                        $ordC4 = self::$ORD[$c[4]];
10184
10185
                        if ($ordC0 <= 251) {
10186
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10187
                        }
10188
10189
                        if ($ordC0 >= 252) {
10190
                            $ordC5 = self::$ORD[$c[5]];
10191
10192
                            if ($ordC0 <= 253) {
10193
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10194
                            }
10195
                        }
10196
                    }
10197
                }
10198
            }
10199
10200 19
            if ($ordC0 === 254 || $ordC0 === 255) {
10201
                $c = $unknown;
10202
10203
                continue;
10204
            }
10205
10206 19
            if ($ord === null) {
10207
                $c = $unknown;
10208
10209
                continue;
10210
            }
10211
10212 19
            $bank = $ord >> 8;
10213 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
10214 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
10215 9
                if ($UTF8_TO_ASCII[$bank] === false) {
10216 2
                    $UTF8_TO_ASCII[$bank] = [];
10217
                }
10218
            }
10219
10220 19
            $newchar = $ord & 255;
10221
10222 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10223
10224
                // keep for debugging
10225
                /*
10226
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10227
                echo "char: " . $c . "\n";
10228
                echo "ord: " . $ord . "\n";
10229
                echo "newchar: " . $newchar . "\n";
10230
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10231
                echo "bank:" . $bank . "\n\n";
10232
                 */
10233
10234 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
10235
            } else {
10236
10237
                // keep for debugging missing chars
10238
                /*
10239
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10240
                echo "char: " . $c . "\n";
10241
                echo "ord: " . $ord . "\n";
10242
                echo "newchar: " . $newchar . "\n";
10243
                echo "bank:" . $bank . "\n\n";
10244
                 */
10245
10246 19
                $c = $unknown;
10247
            }
10248
        }
10249
10250 19
        return \implode('', $chars);
10251
    }
10252
10253
    /**
10254
     * @param mixed $str
10255
     *
10256
     * @return bool
10257
     */
10258 19
    public static function to_boolean($str): bool
10259
    {
10260
        // init
10261 19
        $str = (string) $str;
10262
10263 19
        if ($str === '') {
10264 2
            return false;
10265
        }
10266
10267 17
        $key = \strtolower($str);
10268
10269
        // Info: http://php.net/manual/en/filter.filters.validate.php
10270
        $map = [
10271 17
            'true'  => true,
10272
            '1'     => true,
10273
            'on'    => true,
10274
            'yes'   => true,
10275
            'false' => false,
10276
            '0'     => false,
10277
            'off'   => false,
10278
            'no'    => false,
10279
        ];
10280
10281 17
        if (isset($map[$key])) {
10282 13
            return $map[$key];
10283
        }
10284
10285
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10286 4
        if (\is_numeric($str)) {
10287 2
            return ((float) $str + 0) > 0;
10288
        }
10289
10290 2
        return (bool) self::trim($str);
10291
    }
10292
10293
    /**
10294
     * Convert given string to safe filename (and keep string case).
10295
     *
10296
     * @param string $string
10297
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10298
     *                                  simply replaced with hyphen.
10299
     * @param string $fallback_char
10300
     *
10301
     * @return string
10302
     */
10303 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10304
    {
10305 1
        if ($use_transliterate === true) {
10306 1
            $string = self::str_transliterate($string, $fallback_char);
10307
        }
10308
10309 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
10310
10311 1
        $string = (string) \preg_replace(
10312
            [
10313 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10314 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10315 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10316
            ],
10317
            [
10318 1
                '',
10319 1
                $fallback_char,
10320 1
                $fallback_char,
10321
            ],
10322 1
            $string
10323
        );
10324
10325
        // trim "$fallback_char" from beginning and end of the string
10326 1
        return \trim($string, $fallback_char);
10327
    }
10328
10329
    /**
10330
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10331
     *
10332
     * @param string|string[] $str
10333
     *
10334
     * @return string|string[]
10335
     */
10336 7
    public static function to_iso8859($str)
10337
    {
10338 7
        if (\is_array($str) === true) {
10339 2
            foreach ($str as $k => $v) {
10340 2
                $str[$k] = self::to_iso8859($v);
10341
            }
10342
10343 2
            return $str;
10344
        }
10345
10346 7
        $str = (string) $str;
10347 7
        if ($str === '') {
10348 2
            return '';
10349
        }
10350
10351 7
        return self::utf8_decode($str);
10352
    }
10353
10354
    /**
10355
     * alias for "UTF8::to_iso8859()"
10356
     *
10357
     * @see UTF8::to_iso8859()
10358
     *
10359
     * @param string|string[] $str
10360
     *
10361
     * @return string|string[]
10362
     */
10363 2
    public static function to_latin1($str)
10364
    {
10365 2
        return self::to_iso8859($str);
10366
    }
10367
10368
    /**
10369
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10370
     *
10371
     * <ul>
10372
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10373
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10374
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10375
     * case.</li>
10376
     * </ul>
10377
     *
10378
     * @param string|string[] $str                    <p>Any string or array.</p>
10379
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10380
     *
10381
     * @return string|string[] the UTF-8 encoded string
10382
     */
10383 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10384
    {
10385 37
        if (\is_array($str) === true) {
10386 4
            foreach ($str as $k => $v) {
10387 4
                $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10388
            }
10389
10390 4
            return $str;
10391
        }
10392
10393 37
        $str = (string) $str;
10394 37
        if ($str === '') {
10395 6
            return $str;
10396
        }
10397
10398 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10399
            self::checkForSupport();
10400
        }
10401
10402 37
        $max = self::strlen_in_byte($str);
10403 37
        $buf = '';
10404
10405
        /** @noinspection ForeachInvariantsInspection */
10406 37
        for ($i = 0; $i < $max; $i++) {
10407 37
            $c1 = $str[$i];
10408
10409 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10410
10411 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10412
10413 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10414
10415 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10416 17
                        $buf .= $c1 . $c2;
10417 17
                        $i++;
10418
                    } else { // not valid UTF8 - convert it
10419 31
                        $buf .= self::to_utf8_convert_helper($c1);
10420
                    }
10421 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10422
10423 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10424 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10425
10426 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10427 14
                        $buf .= $c1 . $c2 . $c3;
10428 14
                        $i += 2;
10429
                    } else { // not valid UTF8 - convert it
10430 32
                        $buf .= self::to_utf8_convert_helper($c1);
10431
                    }
10432 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10433
10434 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10435 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10436 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10437
10438 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10439 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10440 8
                        $i += 3;
10441
                    } else { // not valid UTF8 - convert it
10442 26
                        $buf .= self::to_utf8_convert_helper($c1);
10443
                    }
10444
                } else { // doesn't look like UTF8, but should be converted
10445 34
                    $buf .= self::to_utf8_convert_helper($c1);
10446
                }
10447 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10448
10449 4
                $buf .= self::to_utf8_convert_helper($c1);
10450
            } else { // it doesn't need conversion
10451 34
                $buf .= $c1;
10452
            }
10453
        }
10454
10455
        // decode unicode escape sequences
10456 37
        $buf = \preg_replace_callback(
10457 37
            '/\\\\u([0-9a-f]{4})/i',
10458
            function ($match) {
10459
                // always fallback via symfony polyfill
10460 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10461 37
            },
10462 37
            $buf
10463
        );
10464
10465
        // decode UTF-8 codepoints
10466 37
        if ($decodeHtmlEntityToUtf8 === true) {
10467 2
            $buf = self::html_entity_decode($buf);
10468
        }
10469
10470 37
        return $buf;
10471
    }
10472
10473
    /**
10474
     * @param int|string $input
10475
     *
10476
     * @return string
10477
     */
10478 30
    private static function to_utf8_convert_helper($input): string
10479
    {
10480
        // init
10481 30
        $buf = '';
10482
10483 30
        if (self::$ORD === null) {
10484 1
            self::$ORD = self::getData('ord');
10485
        }
10486
10487 30
        if (self::$CHR === null) {
10488 1
            self::$CHR = self::getData('chr');
10489
        }
10490
10491 30
        if (self::$WIN1252_TO_UTF8 === null) {
10492 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10493
        }
10494
10495 30
        $ordC1 = self::$ORD[$input];
10496 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10497 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10498
        } else {
10499 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10500 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10501 2
            $buf .= $cc1 . $cc2;
10502
        }
10503
10504 30
        return $buf;
10505
    }
10506
10507
    /**
10508
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10509
     *
10510
     * INFO: This is slower then "trim()"
10511
     *
10512
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10513
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10514
     *
10515
     * @param string $str   <p>The string to be trimmed</p>
10516
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10517
     *
10518
     * @return string the trimmed string
10519
     */
10520 214
    public static function trim(string $str = '', $chars = \INF): string
10521
    {
10522 214
        if ($str === '') {
10523 11
            return '';
10524
        }
10525
10526
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10527 206
        if ($chars === \INF || !$chars) {
10528 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10529
        } else {
10530 47
            $chars = \preg_quote($chars, '/');
10531 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10532
        }
10533
10534 206
        return self::regex_replace($str, $pattern, '', '', '/');
10535
    }
10536
10537
    /**
10538
     * Makes string's first char uppercase.
10539
     *
10540
     * @param string      $str                   <p>The input string.</p>
10541
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10542
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10543
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10544
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10545
     *
10546
     * @return string the resulting string
10547
     */
10548 79
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10549
    {
10550 79
        if ($cleanUtf8 === true) {
10551
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10552
            // if invalid characters are found in $haystack before $needle
10553 1
            $str = self::clean($str);
10554
        }
10555
10556 79
        $strPartTwo = self::substr($str, 1, null, $encoding);
10557 79
        if ($strPartTwo === false) {
10558
            $strPartTwo = '';
10559
        }
10560
10561 79
        $strPartOne = self::strtoupper(
10562 79
            (string) self::substr($str, 0, 1, $encoding),
10563 79
            $encoding,
10564 79
            $cleanUtf8,
10565 79
            $lang,
10566 79
            $tryToKeepStringLength
10567
        );
10568
10569 79
        return $strPartOne . $strPartTwo;
10570
    }
10571
10572
    /**
10573
     * alias for "UTF8::ucfirst()"
10574
     *
10575
     * @see UTF8::ucfirst()
10576
     *
10577
     * @param string $str
10578
     * @param string $encoding
10579
     * @param bool   $cleanUtf8
10580
     *
10581
     * @return string
10582
     */
10583 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10584
    {
10585 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10586
    }
10587
10588
    /**
10589
     * Uppercase for all words in the string.
10590
     *
10591
     * @param string   $str        <p>The input string.</p>
10592
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10593
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10594
     *                             word.</p>
10595
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10596
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10597
     *
10598
     * @return string
10599
     */
10600 8
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10601
    {
10602 8
        if (!$str) {
10603 2
            return '';
10604
        }
10605
10606
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10607
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10608
10609 7
        if ($cleanUtf8 === true) {
10610
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10611
            // if invalid characters are found in $haystack before $needle
10612 1
            $str = self::clean($str);
10613
        }
10614
10615 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10616
10617
        if (
10618 7
            $usePhpDefaultFunctions === true
10619
            &&
10620 7
            self::is_ascii($str) === true
10621
        ) {
10622
            return \ucwords($str);
10623
        }
10624
10625 7
        $words = self::str_to_words($str, $charlist);
10626 7
        $newWords = [];
10627
10628 7
        if (\count($exceptions) > 0) {
10629 1
            $useExceptions = true;
10630
        } else {
10631 7
            $useExceptions = false;
10632
        }
10633
10634 7
        foreach ($words as $word) {
10635 7
            if (!$word) {
10636 7
                continue;
10637
            }
10638
10639
            if (
10640 7
                $useExceptions === false
10641
                ||
10642
                (
10643 1
                    $useExceptions === true
10644
                    &&
10645 7
                    !\in_array($word, $exceptions, true)
10646
                )
10647
            ) {
10648 7
                $word = self::ucfirst($word, $encoding);
10649
            }
10650
10651 7
            $newWords[] = $word;
10652
        }
10653
10654 7
        return \implode('', $newWords);
10655
    }
10656
10657
    /**
10658
     * Multi decode html entity & fix urlencoded-win1252-chars.
10659
     *
10660
     * e.g:
10661
     * 'test+test'                     => 'test test'
10662
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10663
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10664
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10665
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10666
     * 'Düsseldorf'                   => 'Düsseldorf'
10667
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10668
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10669
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10670
     *
10671
     * @param string $str          <p>The input string.</p>
10672
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10673
     *
10674
     * @return string
10675
     */
10676 2
    public static function urldecode(string $str, bool $multi_decode = true): string
10677
    {
10678 2
        if ($str === '') {
10679 2
            return '';
10680
        }
10681
10682 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10683 2
        if (\preg_match($pattern, $str)) {
10684 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10685
        }
10686
10687 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10688
10689
        do {
10690 2
            $str_compare = $str;
10691
10692 2
            $str = self::fix_simple_utf8(
10693 2
                \urldecode(
10694 2
                    self::html_entity_decode(
10695 2
                        self::to_utf8($str),
10696 2
                        $flags
10697
                    )
10698
                )
10699
            );
10700 2
        } while ($multi_decode === true && $str_compare !== $str);
10701
10702 2
        return $str;
10703
    }
10704
10705
    /**
10706
     * Return a array with "urlencoded"-win1252 -> UTF-8
10707
     *
10708
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10709
     *
10710
     * @return string[]
10711
     */
10712 2
    public static function urldecode_fix_win1252_chars(): array
10713
    {
10714
        return [
10715 2
            '%20' => ' ',
10716
            '%21' => '!',
10717
            '%22' => '"',
10718
            '%23' => '#',
10719
            '%24' => '$',
10720
            '%25' => '%',
10721
            '%26' => '&',
10722
            '%27' => "'",
10723
            '%28' => '(',
10724
            '%29' => ')',
10725
            '%2A' => '*',
10726
            '%2B' => '+',
10727
            '%2C' => ',',
10728
            '%2D' => '-',
10729
            '%2E' => '.',
10730
            '%2F' => '/',
10731
            '%30' => '0',
10732
            '%31' => '1',
10733
            '%32' => '2',
10734
            '%33' => '3',
10735
            '%34' => '4',
10736
            '%35' => '5',
10737
            '%36' => '6',
10738
            '%37' => '7',
10739
            '%38' => '8',
10740
            '%39' => '9',
10741
            '%3A' => ':',
10742
            '%3B' => ';',
10743
            '%3C' => '<',
10744
            '%3D' => '=',
10745
            '%3E' => '>',
10746
            '%3F' => '?',
10747
            '%40' => '@',
10748
            '%41' => 'A',
10749
            '%42' => 'B',
10750
            '%43' => 'C',
10751
            '%44' => 'D',
10752
            '%45' => 'E',
10753
            '%46' => 'F',
10754
            '%47' => 'G',
10755
            '%48' => 'H',
10756
            '%49' => 'I',
10757
            '%4A' => 'J',
10758
            '%4B' => 'K',
10759
            '%4C' => 'L',
10760
            '%4D' => 'M',
10761
            '%4E' => 'N',
10762
            '%4F' => 'O',
10763
            '%50' => 'P',
10764
            '%51' => 'Q',
10765
            '%52' => 'R',
10766
            '%53' => 'S',
10767
            '%54' => 'T',
10768
            '%55' => 'U',
10769
            '%56' => 'V',
10770
            '%57' => 'W',
10771
            '%58' => 'X',
10772
            '%59' => 'Y',
10773
            '%5A' => 'Z',
10774
            '%5B' => '[',
10775
            '%5C' => '\\',
10776
            '%5D' => ']',
10777
            '%5E' => '^',
10778
            '%5F' => '_',
10779
            '%60' => '`',
10780
            '%61' => 'a',
10781
            '%62' => 'b',
10782
            '%63' => 'c',
10783
            '%64' => 'd',
10784
            '%65' => 'e',
10785
            '%66' => 'f',
10786
            '%67' => 'g',
10787
            '%68' => 'h',
10788
            '%69' => 'i',
10789
            '%6A' => 'j',
10790
            '%6B' => 'k',
10791
            '%6C' => 'l',
10792
            '%6D' => 'm',
10793
            '%6E' => 'n',
10794
            '%6F' => 'o',
10795
            '%70' => 'p',
10796
            '%71' => 'q',
10797
            '%72' => 'r',
10798
            '%73' => 's',
10799
            '%74' => 't',
10800
            '%75' => 'u',
10801
            '%76' => 'v',
10802
            '%77' => 'w',
10803
            '%78' => 'x',
10804
            '%79' => 'y',
10805
            '%7A' => 'z',
10806
            '%7B' => '{',
10807
            '%7C' => '|',
10808
            '%7D' => '}',
10809
            '%7E' => '~',
10810
            '%7F' => '',
10811
            '%80' => '`',
10812
            '%81' => '',
10813
            '%82' => '‚',
10814
            '%83' => 'ƒ',
10815
            '%84' => '„',
10816
            '%85' => '…',
10817
            '%86' => '†',
10818
            '%87' => '‡',
10819
            '%88' => 'ˆ',
10820
            '%89' => '‰',
10821
            '%8A' => 'Š',
10822
            '%8B' => '‹',
10823
            '%8C' => 'Œ',
10824
            '%8D' => '',
10825
            '%8E' => 'Ž',
10826
            '%8F' => '',
10827
            '%90' => '',
10828
            '%91' => '‘',
10829
            '%92' => '’',
10830
            '%93' => '“',
10831
            '%94' => '”',
10832
            '%95' => '•',
10833
            '%96' => '–',
10834
            '%97' => '—',
10835
            '%98' => '˜',
10836
            '%99' => '™',
10837
            '%9A' => 'š',
10838
            '%9B' => '›',
10839
            '%9C' => 'œ',
10840
            '%9D' => '',
10841
            '%9E' => 'ž',
10842
            '%9F' => 'Ÿ',
10843
            '%A0' => '',
10844
            '%A1' => '¡',
10845
            '%A2' => '¢',
10846
            '%A3' => '£',
10847
            '%A4' => '¤',
10848
            '%A5' => '¥',
10849
            '%A6' => '¦',
10850
            '%A7' => '§',
10851
            '%A8' => '¨',
10852
            '%A9' => '©',
10853
            '%AA' => 'ª',
10854
            '%AB' => '«',
10855
            '%AC' => '¬',
10856
            '%AD' => '',
10857
            '%AE' => '®',
10858
            '%AF' => '¯',
10859
            '%B0' => '°',
10860
            '%B1' => '±',
10861
            '%B2' => '²',
10862
            '%B3' => '³',
10863
            '%B4' => '´',
10864
            '%B5' => 'µ',
10865
            '%B6' => '¶',
10866
            '%B7' => '·',
10867
            '%B8' => '¸',
10868
            '%B9' => '¹',
10869
            '%BA' => 'º',
10870
            '%BB' => '»',
10871
            '%BC' => '¼',
10872
            '%BD' => '½',
10873
            '%BE' => '¾',
10874
            '%BF' => '¿',
10875
            '%C0' => 'À',
10876
            '%C1' => 'Á',
10877
            '%C2' => 'Â',
10878
            '%C3' => 'Ã',
10879
            '%C4' => 'Ä',
10880
            '%C5' => 'Å',
10881
            '%C6' => 'Æ',
10882
            '%C7' => 'Ç',
10883
            '%C8' => 'È',
10884
            '%C9' => 'É',
10885
            '%CA' => 'Ê',
10886
            '%CB' => 'Ë',
10887
            '%CC' => 'Ì',
10888
            '%CD' => 'Í',
10889
            '%CE' => 'Î',
10890
            '%CF' => 'Ï',
10891
            '%D0' => 'Ð',
10892
            '%D1' => 'Ñ',
10893
            '%D2' => 'Ò',
10894
            '%D3' => 'Ó',
10895
            '%D4' => 'Ô',
10896
            '%D5' => 'Õ',
10897
            '%D6' => 'Ö',
10898
            '%D7' => '×',
10899
            '%D8' => 'Ø',
10900
            '%D9' => 'Ù',
10901
            '%DA' => 'Ú',
10902
            '%DB' => 'Û',
10903
            '%DC' => 'Ü',
10904
            '%DD' => 'Ý',
10905
            '%DE' => 'Þ',
10906
            '%DF' => 'ß',
10907
            '%E0' => 'à',
10908
            '%E1' => 'á',
10909
            '%E2' => 'â',
10910
            '%E3' => 'ã',
10911
            '%E4' => 'ä',
10912
            '%E5' => 'å',
10913
            '%E6' => 'æ',
10914
            '%E7' => 'ç',
10915
            '%E8' => 'è',
10916
            '%E9' => 'é',
10917
            '%EA' => 'ê',
10918
            '%EB' => 'ë',
10919
            '%EC' => 'ì',
10920
            '%ED' => 'í',
10921
            '%EE' => 'î',
10922
            '%EF' => 'ï',
10923
            '%F0' => 'ð',
10924
            '%F1' => 'ñ',
10925
            '%F2' => 'ò',
10926
            '%F3' => 'ó',
10927
            '%F4' => 'ô',
10928
            '%F5' => 'õ',
10929
            '%F6' => 'ö',
10930
            '%F7' => '÷',
10931
            '%F8' => 'ø',
10932
            '%F9' => 'ù',
10933
            '%FA' => 'ú',
10934
            '%FB' => 'û',
10935
            '%FC' => 'ü',
10936
            '%FD' => 'ý',
10937
            '%FE' => 'þ',
10938
            '%FF' => 'ÿ',
10939
        ];
10940
    }
10941
10942
    /**
10943
     * Decodes an UTF-8 string to ISO-8859-1.
10944
     *
10945
     * @param string $str <p>The input string.</p>
10946
     * @param bool   $keepUtf8Chars
10947
     *
10948
     * @return string
10949
     */
10950 13
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10951
    {
10952 13
        if ($str === '') {
10953 5
            return '';
10954
        }
10955
10956 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10957 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10958
10959 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10960 1
            if (self::$WIN1252_TO_UTF8 === null) {
10961
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10962
            }
10963
10964 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10965 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10966
        }
10967
10968
        /** @noinspection PhpInternalEntityUsedInspection */
10969 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10970
10971 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10972
            self::checkForSupport();
10973
        }
10974
10975
        // save for later comparision
10976 13
        $str_backup = $str;
10977 13
        $len = self::strlen_in_byte($str);
10978
10979 13
        if (self::$ORD === null) {
10980
            self::$ORD = self::getData('ord');
10981
        }
10982
10983 13
        if (self::$CHR === null) {
10984
            self::$CHR = self::getData('chr');
10985
        }
10986
10987 13
        $noCharFound = '?';
10988
        /** @noinspection ForeachInvariantsInspection */
10989 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10990 13
            switch ($str[$i] & "\xF0") {
10991 13
                case "\xC0":
10992 12
                case "\xD0":
10993 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10994 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10995
10996 13
                    break;
10997
10998
                /** @noinspection PhpMissingBreakStatementInspection */
10999 12
                case "\xF0":
11000
                    ++$i;
11001
                // no break
11002 12
                case "\xE0":
11003 10
                    $str[$j] = $noCharFound;
11004 10
                    $i += 2;
11005
11006 10
                    break;
11007
11008
                default:
11009 12
                    $str[$j] = $str[$i];
11010
            }
11011
        }
11012
11013 13
        $return = self::substr_in_byte($str, 0, $j);
11014 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
11015
            $return = '';
11016
        }
11017
11018
        if (
11019 13
            $keepUtf8Chars === true
11020
            &&
11021 13
            self::strlen($return) >= self::strlen($str_backup)
11022
        ) {
11023 2
            return $str_backup;
11024
        }
11025
11026 13
        return $return;
11027
    }
11028
11029
    /**
11030
     * Encodes an ISO-8859-1 string to UTF-8.
11031
     *
11032
     * @param string $str <p>The input string.</p>
11033
     *
11034
     * @return string
11035
     */
11036 14
    public static function utf8_encode(string $str): string
11037
    {
11038 14
        if ($str === '') {
11039 13
            return '';
11040
        }
11041
11042 14
        $str = \utf8_encode($str);
11043
11044
        // the polyfill maybe return false
11045
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11046 14
        if ($str === false) {
11047
            return '';
11048
        }
11049
11050 14
        if (\strpos($str, "\xC2") === false) {
11051 6
            return $str;
11052
        }
11053
11054 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11055 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11056
11057 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11058 1
            if (self::$WIN1252_TO_UTF8 === null) {
11059
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11060
            }
11061
11062 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11063 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11064
        }
11065
11066 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11067
    }
11068
11069
    /**
11070
     * fix -> utf8-win1252 chars
11071
     *
11072
     * @param string $str <p>The input string.</p>
11073
     *
11074
     * @return string
11075
     *
11076
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11077
     */
11078 2
    public static function utf8_fix_win1252_chars(string $str): string
11079
    {
11080 2
        return self::fix_simple_utf8($str);
11081
    }
11082
11083
    /**
11084
     * Returns an array with all utf8 whitespace characters.
11085
     *
11086
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11087
     *
11088
     * @author: Derek E. [email protected]
11089
     *
11090
     * @return string[]
11091
     *                 An array with all known whitespace characters as values and the type of whitespace as keys
11092
     *                 as defined in above URL
11093
     */
11094 2
    public static function whitespace_table(): array
11095
    {
11096 2
        return self::$WHITESPACE_TABLE;
11097
    }
11098
11099
    /**
11100
     * Limit the number of words in a string.
11101
     *
11102
     * @param string $str      <p>The input string.</p>
11103
     * @param int    $limit    <p>The limit of words as integer.</p>
11104
     * @param string $strAddOn <p>Replacement for the striped string.</p>
11105
     *
11106
     * @return string
11107
     */
11108 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11109
    {
11110 2
        if ($str === '') {
11111 2
            return '';
11112
        }
11113
11114 2
        if ($limit < 1) {
11115 2
            return '';
11116
        }
11117
11118 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11119
11120
        if (
11121 2
            !isset($matches[0])
11122
            ||
11123 2
            self::strlen($str) === self::strlen($matches[0])
11124
        ) {
11125 2
            return $str;
11126
        }
11127
11128 2
        return self::rtrim($matches[0]) . $strAddOn;
11129
    }
11130
11131
    /**
11132
     * Wraps a string to a given number of characters
11133
     *
11134
     * @see  http://php.net/manual/en/function.wordwrap.php
11135
     *
11136
     * @param string $str   <p>The input string.</p>
11137
     * @param int    $width [optional] <p>The column width.</p>
11138
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11139
     * @param bool   $cut   [optional] <p>
11140
     *                      If the cut is set to true, the string is
11141
     *                      always wrapped at or before the specified width. So if you have
11142
     *                      a word that is larger than the given width, it is broken apart.
11143
     *                      </p>
11144
     *
11145
     * @return string the given string wrapped at the specified column
11146
     */
11147 10
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11148
    {
11149 10
        if ($str === '' || $break === '') {
11150 3
            return '';
11151
        }
11152
11153 8
        $w = '';
11154 8
        $strSplit = \explode($break, $str);
11155 8
        if ($strSplit === false) {
11156
            $count = 0;
11157
        } else {
11158 8
            $count = \count($strSplit);
11159
        }
11160
11161 8
        $chars = [];
11162
        /** @noinspection ForeachInvariantsInspection */
11163 8
        for ($i = 0; $i < $count; ++$i) {
11164 8
            if ($i) {
11165 1
                $chars[] = $break;
11166 1
                $w .= '#';
11167
            }
11168
11169 8
            $c = $strSplit[$i];
11170 8
            unset($strSplit[$i]);
11171
11172 8
            if ($c !== null) {
11173 8
                foreach (self::split($c) as $c) {
11174 8
                    $chars[] = $c;
11175 8
                    $w .= $c === ' ' ? ' ' : '?';
11176
                }
11177
            }
11178
        }
11179
11180 8
        $strReturn = '';
11181 8
        $j = 0;
11182 8
        $b = $i = -1;
11183 8
        $w = \wordwrap($w, $width, '#', $cut);
11184
11185 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
11186 6
            for (++$i; $i < $b; ++$i) {
11187 6
                $strReturn .= $chars[$j];
11188 6
                unset($chars[$j++]);
11189
            }
11190
11191 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
11192 3
                unset($chars[$j++]);
11193
            }
11194
11195 6
            $strReturn .= $break;
11196
        }
11197
11198 8
        return $strReturn . \implode('', $chars);
11199
    }
11200
11201
    /**
11202
     * Line-Wrap the string after $limit, but also after the next word.
11203
     *
11204
     * @param string $str
11205
     * @param int    $limit
11206
     *
11207
     * @return string
11208
     */
11209 1
    public static function wordwrap_per_line(string $str, int $limit): string
11210
    {
11211 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
11212
11213 1
        $string = '';
11214 1
        foreach ($strings as $value) {
11215 1
            if ($value === false) {
11216
                continue;
11217
            }
11218
11219 1
            $string .= \wordwrap($value, $limit);
11220 1
            $string .= "\n";
11221
        }
11222
11223 1
        return $string;
11224
    }
11225
11226
    /**
11227
     * Returns an array of Unicode White Space characters.
11228
     *
11229
     * @return string[] an array with numeric code point as key and White Space Character as value
11230
     */
11231 2
    public static function ws(): array
11232
    {
11233 2
        return self::$WHITESPACE;
11234
    }
11235
}
11236