Passed
Push — master ( 00d063...84c19e )
by Lars
04:45
created

UTF8::removeBOM()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 0
cts 1
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Adds the specified amount of left and right padding to the given string.
269
     * The default character used is a space.
270
     *
271
     * @param string $str
272
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
273
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
274
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
275
     * @param string $encoding [optional] <p>Default: UTF-8</p>
276
     *
277
     * @return string string with padding applied
278
     */
279 25
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
280
    {
281 25
        $strlen = self::strlen($str, $encoding);
282
283 25
        if ($left && $right) {
284 8
            $length = ($left + $right) + $strlen;
285 8
            $type = \STR_PAD_BOTH;
286 17
        } elseif ($left) {
287 7
            $length = $left + $strlen;
288 7
            $type = \STR_PAD_LEFT;
289 10
        } elseif ($right) {
290 10
            $length = $right + $strlen;
291 10
            $type = \STR_PAD_RIGHT;
292
        } else {
293
            $length = ($left + $right) + $strlen;
294
            $type = \STR_PAD_BOTH;
295
        }
296
297 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
298
    }
299
300
    /**
301
     * Changes all keys in an array.
302
     *
303
     * @param array $array <p>The array to work on</p>
304
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
305
     *                     or <strong>CASE_LOWER</strong> (default)</p>
306
     *
307
     * @return string[] an array with its keys lower or uppercased
308
     */
309 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
310
    {
311
        if (
312 2
            $case !== \CASE_LOWER
313
            &&
314 2
            $case !== \CASE_UPPER
315
        ) {
316
            $case = \CASE_LOWER;
317
        }
318
319 2
        $return = [];
320 2
        foreach ($array as $key => $value) {
321 2
            if ($case === \CASE_LOWER) {
322 2
                $key = self::strtolower($key);
323
            } else {
324 2
                $key = self::strtoupper($key);
325
            }
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @return string
345
     */
346 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
347
    {
348 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
349 16
        if ($posStart === false) {
350 2
            return '';
351
        }
352
353 14
        $substrIndex = $posStart + self::strlen($start, $encoding);
354 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
355
        if (
356 14
            $posEnd === false
357
            ||
358 14
            $posEnd === $substrIndex
359
        ) {
360 4
            return '';
361
        }
362
363 10
        $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
364
365 10
        if ($return === false) {
366
            return '';
367
        }
368
369 10
        return $return;
370
    }
371
372
    /**
373
     * Convert binary into an string.
374
     *
375
     * @param mixed $bin 1|0
376
     *
377
     * @return string
378
     */
379 2
    public static function binary_to_str($bin): string
380
    {
381 2
        if (!isset($bin[0])) {
382
            return '';
383
        }
384
385 2
        $convert = \base_convert($bin, 2, 16);
386 2
        if ($convert === '0') {
387 1
            return '';
388
        }
389
390 2
        return \pack('H*', $convert);
391
    }
392
393
    /**
394
     * Returns the UTF-8 Byte Order Mark Character.
395
     *
396
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
397
     *
398
     * @return string UTF-8 Byte Order Mark
399
     */
400 4
    public static function bom(): string
401
    {
402 4
        return "\xef\xbb\xbf";
403
    }
404
405
    /**
406
     * @alias of UTF8::chr_map()
407
     *
408
     * @see   UTF8::chr_map()
409
     *
410
     * @param array|string $callback
411
     * @param string       $str
412
     *
413
     * @return string[]
414
     */
415 2
    public static function callback($callback, string $str): array
416
    {
417 2
        return self::chr_map($callback, $str);
418
    }
419
420
    /**
421
     * Returns the character at $index, with indexes starting at 0.
422
     *
423
     * @param string $str
424
     * @param int    $index    <p>Position of the character.</p>
425
     * @param string $encoding [optional] <p>Default is UTF-8</p>
426
     *
427
     * @return string the character at $index
428
     */
429 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
430
    {
431 9
        return (string) self::substr($str, $index, 1, $encoding);
432
    }
433
434
    /**
435
     * Returns an array consisting of the characters in the string.
436
     *
437
     * @param string $str <p>The input string.</p>
438
     *
439
     * @return string[] an array of chars
440
     */
441 3
    public static function chars(string $str): array
442
    {
443 3
        return self::str_split($str, 1);
444
    }
445
446
    /**
447
     * This method will auto-detect your server environment for UTF-8 support.
448
     *
449
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
450
     */
451 37
    public static function checkForSupport()
452
    {
453 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
454
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
455
456
            // http://php.net/manual/en/book.mbstring.php
457
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
458
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
459
460
            // http://php.net/manual/en/book.iconv.php
461
            self::$SUPPORT['iconv'] = self::iconv_loaded();
462
463
            // http://php.net/manual/en/book.intl.php
464
            self::$SUPPORT['intl'] = self::intl_loaded();
465
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
466
467
            if (
468
                self::$SUPPORT['intl'] === true
469
                &&
470
                \function_exists('transliterator_list_ids') === true
471
            ) {
472
                /** @noinspection PhpComposerExtensionStubsInspection */
473
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
474
            }
475
476
            // http://php.net/manual/en/class.intlchar.php
477
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
478
479
            // http://php.net/manual/en/book.ctype.php
480
            self::$SUPPORT['ctype'] = self::ctype_loaded();
481
482
            // http://php.net/manual/en/class.finfo.php
483
            self::$SUPPORT['finfo'] = self::finfo_loaded();
484
485
            // http://php.net/manual/en/book.json.php
486
            self::$SUPPORT['json'] = self::json_loaded();
487
488
            // http://php.net/manual/en/book.pcre.php
489
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
490
491
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
492
        }
493 37
    }
494
495
    /**
496
     * Generates a UTF-8 encoded character from the given code point.
497
     *
498
     * INFO: opposite to UTF8::ord()
499
     *
500
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
501
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
502
     *
503
     * @return string|null multi-byte character, returns null on failure or empty input
504
     */
505 17
    public static function chr($code_point, string $encoding = 'UTF-8')
506
    {
507
        // init
508 17
        static $CHAR_CACHE = [];
509
510 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
511
            self::checkForSupport();
512
        }
513
514 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
515 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
516
        }
517
518
        if (
519 17
            $encoding !== 'UTF-8'
520
            &&
521 17
            $encoding !== 'ISO-8859-1'
522
            &&
523 17
            $encoding !== 'WINDOWS-1252'
524
            &&
525 17
            self::$SUPPORT['mbstring'] === false
526
        ) {
527
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
528
        }
529
530 17
        $cacheKey = $code_point . $encoding;
531 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
532 16
            return $CHAR_CACHE[$cacheKey];
533
        }
534
535 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
536
537 10
            if (self::$CHR === null) {
538
                $chrTmp = self::getData('chr');
539
                if ($chrTmp) {
540
                    self::$CHR = (array) $chrTmp;
541
                }
542
            }
543
544 10
            $chr = self::$CHR[$code_point];
545
546 10
            if ($encoding !== 'UTF-8') {
547 1
                $chr = self::encode($encoding, $chr);
548
            }
549
550 10
            return $CHAR_CACHE[$cacheKey] = $chr;
551
        }
552
553 7
        if (self::$SUPPORT['intlChar'] === true) {
554
            /** @noinspection PhpComposerExtensionStubsInspection */
555 7
            $chr = \IntlChar::chr($code_point);
556
557 7
            if ($encoding !== 'UTF-8') {
558
                $chr = self::encode($encoding, $chr);
559
            }
560
561 7
            return $CHAR_CACHE[$cacheKey] = $chr;
562
        }
563
564
        if (self::$CHR === null) {
565
            $chrTmp = self::getData('chr');
566
            if ($chrTmp) {
567
                self::$CHR = (array) $chrTmp;
568
            }
569
        }
570
571
        $code_point = (int) $code_point;
572
        if ($code_point <= 0x7F) {
573
            $chr = self::$CHR[$code_point];
574
        } elseif ($code_point <= 0x7FF) {
575
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
576
                   self::$CHR[($code_point & 0x3F) + 0x80];
577
        } elseif ($code_point <= 0xFFFF) {
578
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
579
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
580
                   self::$CHR[($code_point & 0x3F) + 0x80];
581
        } else {
582
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
583
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
584
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
585
                   self::$CHR[($code_point & 0x3F) + 0x80];
586
        }
587
588
        if ($encoding !== 'UTF-8') {
589
            $chr = self::encode($encoding, $chr);
590
        }
591
592
        return $CHAR_CACHE[$cacheKey] = $chr;
593
    }
594
595
    /**
596
     * Applies callback to all characters of a string.
597
     *
598
     * @param array|string $callback <p>The callback function.</p>
599
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
600
     *
601
     * @return string[] the outcome of callback
602
     */
603 2
    public static function chr_map($callback, string $str): array
604
    {
605 2
        $chars = self::split($str);
606
607 2
        return \array_map($callback, $chars);
608
    }
609
610
    /**
611
     * Generates an array of byte length of each character of a Unicode string.
612
     *
613
     * 1 byte => U+0000  - U+007F
614
     * 2 byte => U+0080  - U+07FF
615
     * 3 byte => U+0800  - U+FFFF
616
     * 4 byte => U+10000 - U+10FFFF
617
     *
618
     * @param string $str <p>The original unicode string.</p>
619
     *
620
     * @return int[] an array of byte lengths of each character
621
     */
622 4
    public static function chr_size_list(string $str): array
623
    {
624 4
        if ($str === '') {
625 4
            return [];
626
        }
627
628 4
        $strSplit = self::split($str);
629
630 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
631
            self::checkForSupport();
632
        }
633
634 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
635
            return \array_map(
636
                function ($data) {
637
                    return self::strlen_in_byte($data);
638
                },
639
                $strSplit
640
            );
641
        }
642
643 4
        return \array_map('\strlen', $strSplit);
644
    }
645
646
    /**
647
     * Get a decimal code representation of a specific character.
648
     *
649
     * @param string $char <p>The input character.</p>
650
     *
651
     * @return int
652
     */
653 4
    public static function chr_to_decimal(string $char): int
654
    {
655 4
        $code = self::ord($char[0]);
656 4
        $bytes = 1;
657
658 4
        if (!($code & 0x80)) {
659
            // 0xxxxxxx
660 4
            return $code;
661
        }
662
663 4
        if (($code & 0xe0) === 0xc0) {
664
            // 110xxxxx
665 4
            $bytes = 2;
666 4
            $code &= ~0xc0;
667 4
        } elseif (($code & 0xf0) === 0xe0) {
668
            // 1110xxxx
669 4
            $bytes = 3;
670 4
            $code &= ~0xe0;
671 2
        } elseif (($code & 0xf8) === 0xf0) {
672
            // 11110xxx
673 2
            $bytes = 4;
674 2
            $code &= ~0xf0;
675
        }
676
677 4
        for ($i = 2; $i <= $bytes; $i++) {
678
            // 10xxxxxx
679 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
680
        }
681
682 4
        return $code;
683
    }
684
685
    /**
686
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
687
     *
688
     * @param int|string $char <p>The input character</p>
689
     * @param string     $pfix [optional]
690
     *
691
     * @return string The code point encoded as U+xxxx
692
     */
693 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
694
    {
695 2
        if ($char === '') {
696 2
            return '';
697
        }
698
699 2
        if ($char === '&#0;') {
700 2
            $char = '';
701
        }
702
703 2
        return self::int_to_hex(self::ord($char), $pfix);
704
    }
705
706
    /**
707
     * alias for "UTF8::chr_to_decimal()"
708
     *
709
     * @see UTF8::chr_to_decimal()
710
     *
711
     * @param string $chr
712
     *
713
     * @return int
714
     */
715 2
    public static function chr_to_int(string $chr): int
716
    {
717 2
        return self::chr_to_decimal($chr);
718
    }
719
720
    /**
721
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
722
     *
723
     * @param string $body     <p>The original string to be split.</p>
724
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
725
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
726
     *
727
     * @return string the chunked string
728
     */
729 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
730
    {
731 4
        return \implode($end, self::split($body, $chunklen));
732
    }
733
734
    /**
735
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
736
     *
737
     * @param string $str                           <p>The string to be sanitized.</p>
738
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
739
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
740
     *                                              whitespace.</p>
741
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
742
     *                                              e.g.: "…"
743
     *                                              => "..."</p>
744
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
745
     *                                              combination with
746
     *                                              $normalize_whitespace</p>
747
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
748
     *                                              mark e.g.: "�"</p>
749
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
750
     *                                              characters e.g.: "\0"</p>
751
     *
752
     * @return string clean UTF-8 encoded string
753
     */
754 111
    public static function clean(
755
        string $str,
756
        bool $remove_bom = false,
757
        bool $normalize_whitespace = false,
758
        bool $normalize_msword = false,
759
        bool $keep_non_breaking_space = false,
760
        bool $replace_diamond_question_mark = false,
761
        bool $remove_invisible_characters = true
762
    ): string {
763
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
764
        // caused connection reset problem on larger strings
765
766 111
        $regx = '/
767
          (
768
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
769
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
770
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
771
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
772
            ){1,100}                      # ...one or more times
773
          )
774
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
775
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
776
        /x';
777 111
        $str = (string) \preg_replace($regx, '$1', $str);
778
779 111
        if ($replace_diamond_question_mark === true) {
780 60
            $str = self::replace_diamond_question_mark($str, '');
781
        }
782
783 111
        if ($remove_invisible_characters === true) {
784 111
            $str = self::remove_invisible_characters($str);
785
        }
786
787 111
        if ($normalize_whitespace === true) {
788 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
789
        }
790
791 111
        if ($normalize_msword === true) {
792 32
            $str = self::normalize_msword($str);
793
        }
794
795 111
        if ($remove_bom === true) {
796 62
            $str = self::remove_bom($str);
797
        }
798
799 111
        return $str;
800
    }
801
802
    /**
803
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
804
     *
805
     * @param string $str <p>The input string.</p>
806
     *
807
     * @return string
808
     */
809 33
    public static function cleanup($str): string
810
    {
811
        // init
812 33
        $str = (string) $str;
813
814 33
        if ($str === '') {
815 5
            return '';
816
        }
817
818
        // fixed ISO <-> UTF-8 Errors
819 33
        $str = self::fix_simple_utf8($str);
820
821
        // remove all none UTF-8 symbols
822
        // && remove diamond question mark (�)
823
        // && remove remove invisible characters (e.g. "\0")
824
        // && remove BOM
825
        // && normalize whitespace chars (but keep non-breaking-spaces)
826 33
        return self::clean(
827 33
            $str,
828 33
            true,
829 33
            true,
830 33
            false,
831 33
            true,
832 33
            true,
833 33
            true
834
        );
835
    }
836
837
    /**
838
     * Accepts a string or a array of strings and returns an array of Unicode code points.
839
     *
840
     * INFO: opposite to UTF8::string()
841
     *
842
     * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
843
     * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
844
     *                                    default, code points will be returned as integers.</p>
845
     *
846
     * @return array<int|string>
847
     *                           The array of code points:<br>
848
     *                           array<int> for $u_style === false<br>
849
     *                           array<string> for $u_style === true<br>
850
     */
851 12
    public static function codepoints($arg, bool $u_style = false): array
852
    {
853 12
        if (\is_string($arg) === true) {
854 12
            $arg = self::split($arg);
855
        }
856
857 12
        $arg = \array_map(
858
            [
859 12
                self::class,
860
                'ord',
861
            ],
862 12
            $arg
863
        );
864
865 12
        if (\count($arg) === 0) {
866 7
            return [];
867
        }
868
869 11
        if ($u_style) {
870 2
            $arg = \array_map(
871
                [
872 2
                    self::class,
873
                    'int_to_hex',
874
                ],
875 2
                $arg
876
            );
877
        }
878
879 11
        return $arg;
880
    }
881
882
    /**
883
     * Trims the string and replaces consecutive whitespace characters with a
884
     * single space. This includes tabs and newline characters, as well as
885
     * multibyte whitespace such as the thin space and ideographic space.
886
     *
887
     * @param string $str <p>The input string.</p>
888
     *
889
     * @return string string with a trimmed $str and condensed whitespace
890
     */
891 13
    public static function collapse_whitespace(string $str): string
892
    {
893 13
        return self::trim(
894 13
            self::regex_replace($str, '[[:space:]]+', ' ')
895
        );
896
    }
897
898
    /**
899
     * Returns count of characters used in a string.
900
     *
901
     * @param string $str       <p>The input string.</p>
902
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
903
     *
904
     * @return int[] an associative array of Character as keys and
905
     *               their count as values
906
     */
907 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
908
    {
909 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
910
    }
911
912
    /**
913
     * Remove css media-queries.
914
     *
915
     * @param string $str
916
     *
917
     * @return string
918
     */
919 1
    public static function css_stripe_media_queries(string $str): string
920
    {
921 1
        return (string) \preg_replace(
922 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
923 1
            '',
924 1
            $str
925
        );
926
    }
927
928
    /**
929
     * Checks whether ctype is available on the server.
930
     *
931
     * @return bool
932
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
933
     */
934
    public static function ctype_loaded(): bool
935
    {
936
        return \extension_loaded('ctype');
937
    }
938
939
    /**
940
     * Converts a int-value into an UTF-8 character.
941
     *
942
     * @param mixed $int
943
     *
944
     * @return string
945
     */
946 10
    public static function decimal_to_chr($int): string
947
    {
948 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
949
    }
950
951
    /**
952
     * Decodes a MIME header field
953
     *
954
     * @param string $str
955
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
956
     *
957
     * @return false|string
958
     *                      A decoded MIME field on success,
959
     *                      or false if an error occurs during the decoding
960
     */
961
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
962
    {
963
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
964
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
965
        }
966
967
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
968
            self::checkForSupport();
969
        }
970
971
        if (self::$SUPPORT['iconv'] === true) {
972
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
973
        }
974
975
        if ($encoding !== 'UTF-8') {
976
            $str = self::encode($encoding, $str);
977
        }
978
979
        return \mb_decode_mimeheader($str);
980
    }
981
982
    /**
983
     * Encode a string with a new charset-encoding.
984
     *
985
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
986
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
987
     *
988
     * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
989
     * @param string $str                         <p>The input string</p>
990
     * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
991
     *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
992
     *                                            string-encoding</p>
993
     * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
994
     *                                            A empty string will trigger the autodetect anyway.</p>
995
     *
996
     * @return string
997
     */
998 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
999
    {
1000 28
        if ($str === '' || $toEncoding === '') {
1001 12
            return $str;
1002
        }
1003
1004 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1005 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1006
        }
1007
1008 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1009 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1010
        }
1011
1012 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1013
            return $str;
1014
        }
1015
1016 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1017
            self::checkForSupport();
1018
        }
1019
1020 28
        if ($toEncoding === 'JSON') {
1021 1
            return self::json_encode($str);
1022
        }
1023 28
        if ($fromEncoding === 'JSON') {
1024 1
            $str = self::json_decode($str);
1025 1
            $fromEncoding = '';
1026
        }
1027
1028 28
        if ($toEncoding === 'BASE64') {
1029 2
            return \base64_encode($str);
1030
        }
1031 28
        if ($fromEncoding === 'BASE64') {
1032 2
            $str = \base64_decode($str, true);
1033 2
            $fromEncoding = '';
1034
        }
1035
1036 28
        if ($toEncoding === 'HTML-ENTITIES') {
1037 2
            return self::html_encode($str, true, 'UTF-8');
1038
        }
1039 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1040 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1041 2
            $fromEncoding = '';
1042
        }
1043
1044 28
        $fromEncodingDetected = false;
1045
        if (
1046 28
            $autodetectFromEncoding === true
1047
            ||
1048 28
            !$fromEncoding
1049
        ) {
1050 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1051
        }
1052
1053
        // DEBUG
1054
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1055
1056 28
        if ($fromEncodingDetected !== false) {
1057 24
            $fromEncoding = $fromEncodingDetected;
1058 6
        } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1059
            // fallback for the "autodetect"-mode
1060 6
            return self::to_utf8($str);
1061
        }
1062
1063
        if (
1064 24
            !$fromEncoding
1065
            ||
1066 24
            $fromEncoding === $toEncoding
1067
        ) {
1068 15
            return $str;
1069
        }
1070
1071
        if (
1072 18
            $toEncoding === 'UTF-8'
1073
            &&
1074
            (
1075 17
                $fromEncoding === 'WINDOWS-1252'
1076
                ||
1077 18
                $fromEncoding === 'ISO-8859-1'
1078
            )
1079
        ) {
1080 14
            return self::to_utf8($str);
1081
        }
1082
1083
        if (
1084 10
            $toEncoding === 'ISO-8859-1'
1085
            &&
1086
            (
1087 5
                $fromEncoding === 'WINDOWS-1252'
1088
                ||
1089 10
                $fromEncoding === 'UTF-8'
1090
            )
1091
        ) {
1092 5
            return self::to_iso8859($str);
1093
        }
1094
1095
        if (
1096 9
            $toEncoding !== 'UTF-8'
1097
            &&
1098 9
            $toEncoding !== 'ISO-8859-1'
1099
            &&
1100 9
            $toEncoding !== 'WINDOWS-1252'
1101
            &&
1102 9
            self::$SUPPORT['mbstring'] === false
1103
        ) {
1104
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1105
        }
1106
1107 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1108
            self::checkForSupport();
1109
        }
1110
1111 9
        if (self::$SUPPORT['mbstring'] === true) {
1112
            // info: do not use the symfony polyfill here
1113 9
            $strEncoded = \mb_convert_encoding(
1114 9
                $str,
1115 9
                $toEncoding,
1116 9
                $fromEncoding
1117
            );
1118
1119 9
            if ($strEncoded) {
1120 9
                return $strEncoded;
1121
            }
1122
        }
1123
1124
        $return = \iconv($fromEncoding, $toEncoding, $str);
1125
        if ($return !== false) {
1126
            return $return;
1127
        }
1128
1129
        return $str;
1130
    }
1131
1132
    /**
1133
     * @param string $str
1134
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1135
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1136
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1137
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1138
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1139
     *
1140
     * @return false|string
1141
     *                      An encoded MIME field on success,
1142
     *                      or false if an error occurs during the encoding
1143
     */
1144
    public static function encode_mimeheader(
1145
        $str,
1146
        $fromCharset = 'UTF-8',
1147
        $toCharset = 'UTF-8',
1148
        $transferEncoding = 'Q',
1149
        $linefeed = "\r\n",
1150
        $indent = 76
1151
    ) {
1152
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1153
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1154
        }
1155
1156
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1157
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1158
        }
1159
1160
        return \iconv_mime_encode(
1161
            '',
1162
            $str,
1163
            [
1164
                'scheme'           => $transferEncoding,
1165
                'line-length'      => $indent,
1166
                'input-charset'    => $fromCharset,
1167
                'output-charset'   => $toCharset,
1168
                'line-break-chars' => $linefeed,
1169
            ]
1170
        );
1171
    }
1172
1173
    /**
1174
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1175
     *
1176
     * @param string   $str                    <p>The input string.</p>
1177
     * @param string   $search                 <p>The searched string.</p>
1178
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1179
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1180
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1181
     *
1182
     * @return string
1183
     */
1184 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1185
    {
1186 1
        if ($str === '') {
1187 1
            return '';
1188
        }
1189
1190 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1191
1192 1
        if ($length === null) {
1193 1
            $length = (int) \round(self::strlen($str, $encoding) / 2, 0);
1194
        }
1195
1196 1
        if (empty($search)) {
1197 1
            $stringLength = self::strlen($str, $encoding);
1198
1199 1
            if ($length > 0) {
1200 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1201
            } else {
1202 1
                $end = 0;
1203
            }
1204
1205 1
            $pos = (int) \min(
1206 1
                self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1206
                self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1207 1
                self::strpos($str, '.', $end, $encoding)
1208
            );
1209
1210 1
            if ($pos) {
1211 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1212 1
                if ($strSub === false) {
1213
                    return '';
1214
                }
1215
1216 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1217
            }
1218
1219
            return $str;
1220
        }
1221
1222 1
        $wordPos = self::stripos($str, $search, 0, $encoding);
1223 1
        $halfSide = (int) ($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1224
1225 1
        $pos_start = 0;
1226 1
        if ($halfSide > 0) {
1227 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1228 1
            if ($halfText !== false) {
1229 1
                $pos_start = (int) \max(
1230 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1231 1
                    self::strrpos($halfText, '.', 0, $encoding)
1232
                );
1233
            }
1234
        }
1235
1236 1
        if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type false|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1237 1
            $l = $pos_start + $length - 1;
1238 1
            $realLength = self::strlen($str, $encoding);
1239
1240 1
            if ($l > $realLength) {
1241
                $l = $realLength;
1242
            }
1243
1244 1
            $pos_end = (int) \min(
1245 1
                    self::strpos($str, ' ', $l, $encoding),
1246 1
                    self::strpos($str, '.', $l, $encoding)
1247 1
                ) - $pos_start;
1248
1249 1
            if (!$pos_end || $pos_end <= 0) {
1250 1
                $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1250
                $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1251 1
                if ($strSub !== false) {
1252 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1253
                } else {
1254 1
                    $extract = '';
1255
                }
1256
            } else {
1257 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1258 1
                if ($strSub !== false) {
1259 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1260
                } else {
1261 1
                    $extract = '';
1262
                }
1263
            }
1264
        } else {
1265 1
            $l = $length - 1;
1266 1
            $trueLength = self::strlen($str, $encoding);
1267
1268 1
            if ($l > $trueLength) {
1269
                $l = $trueLength;
1270
            }
1271
1272 1
            $pos_end = \min(
1273 1
                self::strpos($str, ' ', $l, $encoding),
1274 1
                self::strpos($str, '.', $l, $encoding)
1275
            );
1276
1277 1
            if ($pos_end) {
1278 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1279 1
                if ($strSub !== false) {
1280 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1281
                } else {
1282 1
                    $extract = '';
1283
                }
1284
            } else {
1285 1
                $extract = $str;
1286
            }
1287
        }
1288
1289 1
        return $extract;
1290
    }
1291
1292
    /**
1293
     * Reads entire file into a string.
1294
     *
1295
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1296
     *
1297
     * @see http://php.net/manual/en/function.file-get-contents.php
1298
     *
1299
     * @param string        $filename             <p>
1300
     *                                            Name of the file to read.
1301
     *                                            </p>
1302
     * @param bool          $use_include_path     [optional] <p>
1303
     *                                            Prior to PHP 5, this parameter is called
1304
     *                                            use_include_path and is a bool.
1305
     *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1306
     *                                            to trigger include path
1307
     *                                            search.
1308
     *                                            </p>
1309
     * @param resource|null $context              [optional] <p>
1310
     *                                            A valid context resource created with
1311
     *                                            stream_context_create. If you don't need to use a
1312
     *                                            custom context, you can skip this parameter by &null;.
1313
     *                                            </p>
1314
     * @param int|null      $offset               [optional] <p>
1315
     *                                            The offset where the reading starts.
1316
     *                                            </p>
1317
     * @param int|null      $maxLength            [optional] <p>
1318
     *                                            Maximum length of data read. The default is to read until end
1319
     *                                            of file is reached.
1320
     *                                            </p>
1321
     * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1322
     * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1323
     *                                            some files, because they used non default utf-8 chars. Binary files
1324
     *                                            like images or pdf will not be converted.</p>
1325
     * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1326
     *                                            A empty string will trigger the autodetect anyway.</p>
1327
     *
1328
     * @return false|string the function returns the read data or false on failure
1329
     */
1330 12
    public static function file_get_contents(
1331
        string $filename,
1332
        bool $use_include_path = false,
1333
        $context = null,
1334
        int $offset = null,
1335
        int $maxLength = null,
1336
        int $timeout = 10,
1337
        bool $convertToUtf8 = true,
1338
        string $fromEncoding = ''
1339
    ) {
1340
        // init
1341 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1342
1343 12
        if ($timeout && $context === null) {
1344 9
            $context = \stream_context_create(
1345
                [
1346
                    'http' => [
1347 9
                        'timeout' => $timeout,
1348
                    ],
1349
                ]
1350
            );
1351
        }
1352
1353 12
        if ($offset === null) {
1354 12
            $offset = 0;
1355
        }
1356
1357 12
        if (\is_int($maxLength) === true) {
1358 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1359
        } else {
1360 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1361
        }
1362
1363
        // return false on error
1364 12
        if ($data === false) {
1365
            return false;
1366
        }
1367
1368 12
        if ($convertToUtf8 === true) {
1369
            if (
1370 12
                self::is_binary($data, true) === true
1371
                &&
1372 12
                self::is_utf16($data, false) === false
1373
                &&
1374 12
                self::is_utf32($data, false) === false
1375 7
            ) {
1376
                // do nothing, it's binary and not UTF16 or UTF32
1377
            } else {
1378 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1379 9
                $data = self::cleanup($data);
1380
            }
1381
        }
1382
1383 12
        return $data;
1384
    }
1385
1386
    /**
1387
     * Checks if a file starts with BOM (Byte Order Mark) character.
1388
     *
1389
     * @param string $file_path <p>Path to a valid file.</p>
1390
     *
1391
     * @throws \RuntimeException if file_get_contents() returned false
1392
     *
1393
     * @return bool
1394
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1395
     */
1396 2
    public static function file_has_bom(string $file_path): bool
1397
    {
1398 2
        $file_content = \file_get_contents($file_path);
1399 2
        if ($file_content === false) {
1400
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1401
        }
1402
1403 2
        return self::string_has_bom($file_content);
1404
    }
1405
1406
    /**
1407
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1408
     *
1409
     * @param mixed  $var
1410
     * @param int    $normalization_form
1411
     * @param string $leading_combining
1412
     *
1413
     * @return mixed
1414
     */
1415 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1416
    {
1417 43
        switch (\gettype($var)) {
1418 43
            case 'array':
1419 6
                foreach ($var as $k => $v) {
1420
                    /** @noinspection AlterInForeachInspection */
1421 6
                    $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1422
                }
1423
1424 6
                break;
1425 43
            case 'object':
1426 4
                foreach ($var as $k => $v) {
1427 4
                    $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1428
                }
1429
1430 4
                break;
1431 43
            case 'string':
1432
1433 43
                if (\strpos($var, "\r") !== false) {
1434
                    // Workaround https://bugs.php.net/65732
1435 3
                    $var = self::normalize_line_ending($var);
1436
                }
1437
1438 43
                if (self::is_ascii($var) === false) {
1439
                    /** @noinspection PhpUndefinedClassInspection */
1440 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1441 21
                        $n = '-';
1442
                    } else {
1443
                        /** @noinspection PhpUndefinedClassInspection */
1444 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1445
1446 13
                        if (isset($n[0])) {
1447 7
                            $var = $n;
1448
                        } else {
1449 9
                            $var = self::encode('UTF-8', $var, true);
1450
                        }
1451
                    }
1452
1453
                    if (
1454 26
                        $var[0] >= "\x80"
1455
                        &&
1456 26
                        isset($n[0], $leading_combining[0])
1457
                        &&
1458 26
                        \preg_match('/^\p{Mn}/u', $var)
1459
                    ) {
1460
                        // Prevent leading combining chars
1461
                        // for NFC-safe concatenations.
1462 3
                        $var = $leading_combining . $var;
1463
                    }
1464
                }
1465
1466 43
                break;
1467
        }
1468
1469 43
        return $var;
1470
    }
1471
1472
    /**
1473
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1474
     *
1475
     * Gets a specific external variable by name and optionally filters it
1476
     *
1477
     * @see  http://php.net/manual/en/function.filter-input.php
1478
     *
1479
     * @param int    $type          <p>
1480
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1481
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1482
     *                              <b>INPUT_ENV</b>.
1483
     *                              </p>
1484
     * @param string $variable_name <p>
1485
     *                              Name of a variable to get.
1486
     *                              </p>
1487
     * @param int    $filter        [optional] <p>
1488
     *                              The ID of the filter to apply. The
1489
     *                              manual page lists the available filters.
1490
     *                              </p>
1491
     * @param mixed  $options       [optional] <p>
1492
     *                              Associative array of options or bitwise disjunction of flags. If filter
1493
     *                              accepts options, flags can be provided in "flags" field of array.
1494
     *                              </p>
1495
     *
1496
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1497
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1498
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1499
     */
1500
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1501
    {
1502
        if (\func_num_args() < 4) {
1503
            $var = \filter_input($type, $variable_name, $filter);
1504
        } else {
1505
            $var = \filter_input($type, $variable_name, $filter, $options);
1506
        }
1507
1508
        return self::filter($var);
1509
    }
1510
1511
    /**
1512
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1513
     *
1514
     * Gets external variables and optionally filters them
1515
     *
1516
     * @see  http://php.net/manual/en/function.filter-input-array.php
1517
     *
1518
     * @param int   $type       <p>
1519
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1520
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1521
     *                          <b>INPUT_ENV</b>.
1522
     *                          </p>
1523
     * @param mixed $definition [optional] <p>
1524
     *                          An array defining the arguments. A valid key is a string
1525
     *                          containing a variable name and a valid value is either a filter type, or an array
1526
     *                          optionally specifying the filter, flags and options. If the value is an
1527
     *                          array, valid keys are filter which specifies the
1528
     *                          filter type,
1529
     *                          flags which specifies any flags that apply to the
1530
     *                          filter, and options which specifies any options that
1531
     *                          apply to the filter. See the example below for a better understanding.
1532
     *                          </p>
1533
     *                          <p>
1534
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1535
     *                          input array are filtered by this filter.
1536
     *                          </p>
1537
     * @param bool  $add_empty  [optional] <p>
1538
     *                          Add missing keys as <b>NULL</b> to the return value.
1539
     *                          </p>
1540
     *
1541
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1542
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1543
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1544
     *               is not set and <b>NULL</b> if the filter fails.
1545
     */
1546
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1547
    {
1548
        if (\func_num_args() < 2) {
1549
            $a = \filter_input_array($type);
1550
        } else {
1551
            $a = \filter_input_array($type, $definition, $add_empty);
1552
        }
1553
1554
        return self::filter($a);
1555
    }
1556
1557
    /**
1558
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1559
     *
1560
     * Filters a variable with a specified filter
1561
     *
1562
     * @see  http://php.net/manual/en/function.filter-var.php
1563
     *
1564
     * @param mixed $variable <p>
1565
     *                        Value to filter.
1566
     *                        </p>
1567
     * @param int   $filter   [optional] <p>
1568
     *                        The ID of the filter to apply. The
1569
     *                        manual page lists the available filters.
1570
     *                        </p>
1571
     * @param mixed $options  [optional] <p>
1572
     *                        Associative array of options or bitwise disjunction of flags. If filter
1573
     *                        accepts options, flags can be provided in "flags" field of array. For
1574
     *                        the "callback" filter, callable type should be passed. The
1575
     *                        callback must accept one argument, the value to be filtered, and return
1576
     *                        the value after filtering/sanitizing it.
1577
     *                        </p>
1578
     *                        <p>
1579
     *                        <code>
1580
     *                        // for filters that accept options, use this format
1581
     *                        $options = array(
1582
     *                        'options' => array(
1583
     *                        'default' => 3, // value to return if the filter fails
1584
     *                        // other options here
1585
     *                        'min_range' => 0
1586
     *                        ),
1587
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1588
     *                        );
1589
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1590
     *                        // for filter that only accept flags, you can pass them directly
1591
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1592
     *                        // for filter that only accept flags, you can also pass as an array
1593
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1594
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1595
     *                        // callback validate filter
1596
     *                        function foo($value)
1597
     *                        {
1598
     *                        // Expected format: Surname, GivenNames
1599
     *                        if (strpos($value, ", ") === false) return false;
1600
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1601
     *                        $empty = (empty($surname) || empty($givennames));
1602
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1603
     *                        if ($empty || $notstrings) {
1604
     *                        return false;
1605
     *                        } else {
1606
     *                        return $value;
1607
     *                        }
1608
     *                        }
1609
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1610
     *                        </code>
1611
     *                        </p>
1612
     *
1613
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1614
     */
1615 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1616
    {
1617 2
        if (\func_num_args() < 3) {
1618 2
            $variable = \filter_var($variable, $filter);
1619
        } else {
1620 2
            $variable = \filter_var($variable, $filter, $options);
1621
        }
1622
1623 2
        return self::filter($variable);
1624
    }
1625
1626
    /**
1627
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1628
     *
1629
     * Gets multiple variables and optionally filters them
1630
     *
1631
     * @see  http://php.net/manual/en/function.filter-var-array.php
1632
     *
1633
     * @param array $data       <p>
1634
     *                          An array with string keys containing the data to filter.
1635
     *                          </p>
1636
     * @param mixed $definition [optional] <p>
1637
     *                          An array defining the arguments. A valid key is a string
1638
     *                          containing a variable name and a valid value is either a
1639
     *                          filter type, or an
1640
     *                          array optionally specifying the filter, flags and options.
1641
     *                          If the value is an array, valid keys are filter
1642
     *                          which specifies the filter type,
1643
     *                          flags which specifies any flags that apply to the
1644
     *                          filter, and options which specifies any options that
1645
     *                          apply to the filter. See the example below for a better understanding.
1646
     *                          </p>
1647
     *                          <p>
1648
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1649
     *                          input array are filtered by this filter.
1650
     *                          </p>
1651
     * @param bool  $add_empty  [optional] <p>
1652
     *                          Add missing keys as <b>NULL</b> to the return value.
1653
     *                          </p>
1654
     *
1655
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1656
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1657
     *               set
1658
     */
1659 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1660
    {
1661 2
        if (\func_num_args() < 2) {
1662 2
            $a = \filter_var_array($data);
1663
        } else {
1664 2
            $a = \filter_var_array($data, $definition, $add_empty);
1665
        }
1666
1667 2
        return self::filter($a);
1668
    }
1669
1670
    /**
1671
     * Checks whether finfo is available on the server.
1672
     *
1673
     * @return bool
1674
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1675
     */
1676
    public static function finfo_loaded(): bool
1677
    {
1678
        return \class_exists('finfo');
1679
    }
1680
1681
    /**
1682
     * Returns the first $n characters of the string.
1683
     *
1684
     * @param string $str      <p>The input string.</p>
1685
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1686
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1687
     *
1688
     * @return string
1689
     */
1690 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1691
    {
1692 13
        if ($n <= 0) {
1693 4
            return '';
1694
        }
1695
1696 9
        $strSub = self::substr($str, 0, $n, $encoding);
1697 9
        if ($strSub === false) {
1698
            return '';
1699
        }
1700
1701 9
        return $strSub;
1702
    }
1703
1704
    /**
1705
     * Check if the number of unicode characters are not more than the specified integer.
1706
     *
1707
     * @param string $str      the original string to be checked
1708
     * @param int    $box_size the size in number of chars to be checked against string
1709
     *
1710
     * @return bool true if string is less than or equal to $box_size, false otherwise
1711
     */
1712 2
    public static function fits_inside(string $str, int $box_size): bool
1713
    {
1714 2
        return self::strlen($str) <= $box_size;
1715
    }
1716
1717
    /**
1718
     * @param string $str
1719
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1720
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1721
     *
1722
     * @return string
1723
     */
1724 54
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1725
    {
1726 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
1727 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
1728
1729 54
        if ($useLower === true) {
1730 2
            $str = (string) \str_replace(
1731 2
                $upper,
1732 2
                $lower,
1733 2
                $str
1734
            );
1735
        } else {
1736 52
            $str = (string) \str_replace(
1737 52
                $lower,
1738 52
                $upper,
1739 52
                $str
1740
            );
1741
        }
1742
1743 54
        if ($fullCaseFold) {
1744 52
            static $FULL_CASE_FOLD = null;
1745 52
            if ($FULL_CASE_FOLD === null) {
1746 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
1747
            }
1748
1749 52
            if ($useLower === true) {
1750 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1751
            } else {
1752 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1753
            }
1754
        }
1755
1756 54
        return $str;
1757
    }
1758
1759
    /**
1760
     * Try to fix simple broken UTF-8 strings.
1761
     *
1762
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1763
     *
1764
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1765
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1766
     * See: http://en.wikipedia.org/wiki/Windows-1252
1767
     *
1768
     * @param string $str <p>The input string</p>
1769
     *
1770
     * @return string
1771
     */
1772 42
    public static function fix_simple_utf8(string $str): string
1773
    {
1774 42
        if ($str === '') {
1775 4
            return '';
1776
        }
1777
1778 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1779 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1780
1781 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1782 1
            if (self::$BROKEN_UTF8_FIX === null) {
1783 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1784
            }
1785
1786 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1786
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1787 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1787
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1788
        }
1789
1790 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1791
    }
1792
1793
    /**
1794
     * Fix a double (or multiple) encoded UTF8 string.
1795
     *
1796
     * @param string|string[] $str you can use a string or an array of strings
1797
     *
1798
     * @return string|string[]
1799
     *                          Will return the fixed input-"array" or
1800
     *                          the fixed input-"string"
1801
     */
1802 2
    public static function fix_utf8($str)
1803
    {
1804 2
        if (\is_array($str) === true) {
1805 2
            foreach ($str as $k => $v) {
1806 2
                $str[$k] = self::fix_utf8($v);
1807
            }
1808
1809 2
            return $str;
1810
        }
1811
1812 2
        $str = (string) $str;
1813 2
        $last = '';
1814 2
        while ($last !== $str) {
1815 2
            $last = $str;
1816 2
            $str = self::to_utf8(
1817 2
                self::utf8_decode($str, true)
1818
            );
1819
        }
1820
1821 2
        return $str;
1822
    }
1823
1824
    /**
1825
     * Get character of a specific character.
1826
     *
1827
     * @param string $char
1828
     *
1829
     * @return string 'RTL' or 'LTR'
1830
     */
1831 2
    public static function getCharDirection(string $char): string
1832
    {
1833 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1834
            self::checkForSupport();
1835
        }
1836
1837 2
        if (self::$SUPPORT['intlChar'] === true) {
1838
            /** @noinspection PhpComposerExtensionStubsInspection */
1839 2
            $tmpReturn = \IntlChar::charDirection($char);
1840
1841
            // from "IntlChar"-Class
1842
            $charDirection = [
1843 2
                'RTL' => [1, 13, 14, 15, 21],
1844
                'LTR' => [0, 11, 12, 20],
1845
            ];
1846
1847 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1848
                return 'LTR';
1849
            }
1850
1851 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1852 2
                return 'RTL';
1853
            }
1854
        }
1855
1856 2
        $c = static::chr_to_decimal($char);
1857
1858 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1859 2
            return 'LTR';
1860
        }
1861
1862 2
        if ($c <= 0x85e) {
1863 2
            if ($c === 0x5be ||
1864 2
                $c === 0x5c0 ||
1865 2
                $c === 0x5c3 ||
1866 2
                $c === 0x5c6 ||
1867 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1868 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1869 2
                $c === 0x608 ||
1870 2
                $c === 0x60b ||
1871 2
                $c === 0x60d ||
1872 2
                $c === 0x61b ||
1873 2
                ($c >= 0x61e && $c <= 0x64a) ||
1874
                ($c >= 0x66d && $c <= 0x66f) ||
1875
                ($c >= 0x671 && $c <= 0x6d5) ||
1876
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1877
                ($c >= 0x6ee && $c <= 0x6ef) ||
1878
                ($c >= 0x6fa && $c <= 0x70d) ||
1879
                $c === 0x710 ||
1880
                ($c >= 0x712 && $c <= 0x72f) ||
1881
                ($c >= 0x74d && $c <= 0x7a5) ||
1882
                $c === 0x7b1 ||
1883
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1884
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1885
                $c === 0x7fa ||
1886
                ($c >= 0x800 && $c <= 0x815) ||
1887
                $c === 0x81a ||
1888
                $c === 0x824 ||
1889
                $c === 0x828 ||
1890
                ($c >= 0x830 && $c <= 0x83e) ||
1891
                ($c >= 0x840 && $c <= 0x858) ||
1892 2
                $c === 0x85e
1893
            ) {
1894 2
                return 'RTL';
1895
            }
1896 2
        } elseif ($c === 0x200f) {
1897
            return 'RTL';
1898 2
        } elseif ($c >= 0xfb1d) {
1899 2
            if ($c === 0xfb1d ||
1900 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1901 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1902 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1903 2
                $c === 0xfb3e ||
1904 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1905 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1906 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1907 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1908 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1909 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1910 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1911 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1912 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1913 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1914 2
                $c === 0x10808 ||
1915 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1916 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1917 2
                $c === 0x1083c ||
1918 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1919 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1920 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1921 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1922 2
                $c === 0x1093f ||
1923 2
                $c === 0x10a00 ||
1924 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1925 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1926 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1927 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1928 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1929 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1930 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1931 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1932 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1933 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1934
            ) {
1935 2
                return 'RTL';
1936
            }
1937
        }
1938
1939 2
        return 'LTR';
1940
    }
1941
1942
    /**
1943
     * get data from "/data/*.ser"
1944
     *
1945
     * @param string $file
1946
     *
1947
     * @return false|mixed will return false on error
1948
     */
1949 13
    private static function getData(string $file)
1950
    {
1951 13
        $file = __DIR__ . '/data/' . $file . '.php';
1952 13
        if (\file_exists($file)) {
1953
            /** @noinspection PhpIncludeInspection */
1954 12
            return require $file;
1955
        }
1956
1957 2
        return false;
1958
    }
1959
1960
    /**
1961
     * Check for php-support.
1962
     *
1963
     * @param string|null $key
1964
     *
1965
     * @return mixed
1966
     *               Return the full support-"array", if $key === null<br>
1967
     *               return bool-value, if $key is used and available<br>
1968
     *               otherwise return <strong>null</strong>
1969
     */
1970 26
    public static function getSupportInfo(string $key = null)
1971
    {
1972 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1973
            self::checkForSupport();
1974
        }
1975
1976 26
        if ($key === null) {
1977 4
            return self::$SUPPORT;
1978
        }
1979
1980 24
        if (!isset(self::$SUPPORT[$key])) {
1981 2
            return null;
1982
        }
1983
1984 22
        return self::$SUPPORT[$key];
1985
    }
1986
1987
    /**
1988
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1989
     *          if you need more supported types, please use e.g. "finfo"
1990
     *
1991
     * @param string $str
1992
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1993
     *
1994
     * @return array
1995
     *               with this keys: 'ext', 'mime', 'type'
1996
     */
1997 39
    public static function get_file_type(
1998
        string $str,
1999
        array $fallback = [
2000
            'ext'  => null,
2001
            'mime' => 'application/octet-stream',
2002
            'type' => null,
2003
        ]
2004
    ): array {
2005 39
        if ($str === '') {
2006
            return $fallback;
2007
        }
2008
2009 39
        $str_info = self::substr_in_byte($str, 0, 2);
2010 39
        if (self::strlen_in_byte($str_info) !== 2) {
2011 10
            return $fallback;
2012
        }
2013
2014 35
        $str_info = \unpack('C2chars', $str_info);
2015 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2016
2017
        // DEBUG
2018
        //var_dump($type_code);
2019
2020
        switch ($type_code) {
2021 35
            case 3780:
2022 5
                $ext = 'pdf';
2023 5
                $mime = 'application/pdf';
2024 5
                $type = 'binary';
2025
2026 5
                break;
2027 35
            case 7790:
2028
                $ext = 'exe';
2029
                $mime = 'application/octet-stream';
2030
                $type = 'binary';
2031
2032
                break;
2033 35
            case 7784:
2034
                $ext = 'midi';
2035
                $mime = 'audio/x-midi';
2036
                $type = 'binary';
2037
2038
                break;
2039 35
            case 8075:
2040 7
                $ext = 'zip';
2041 7
                $mime = 'application/zip';
2042 7
                $type = 'binary';
2043
2044 7
                break;
2045 35
            case 8297:
2046
                $ext = 'rar';
2047
                $mime = 'application/rar';
2048
                $type = 'binary';
2049
2050
                break;
2051 35
            case 255216:
2052
                $ext = 'jpg';
2053
                $mime = 'image/jpeg';
2054
                $type = 'binary';
2055
2056
                break;
2057 35
            case 7173:
2058
                $ext = 'gif';
2059
                $mime = 'image/gif';
2060
                $type = 'binary';
2061
2062
                break;
2063 35
            case 6677:
2064
                $ext = 'bmp';
2065
                $mime = 'image/bmp';
2066
                $type = 'binary';
2067
2068
                break;
2069 35
            case 13780:
2070 7
                $ext = 'png';
2071 7
                $mime = 'image/png';
2072 7
                $type = 'binary';
2073
2074 7
                break;
2075
            default:
2076 32
                return $fallback;
2077
        }
2078
2079
        return [
2080 7
            'ext'  => $ext,
2081 7
            'mime' => $mime,
2082 7
            'type' => $type,
2083
        ];
2084
    }
2085
2086
    /**
2087
     * @param int    $length        <p>Length of the random string.</p>
2088
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2089
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2090
     *
2091
     * @return string
2092
     */
2093 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2094
    {
2095
        // init
2096 1
        $i = 0;
2097 1
        $str = '';
2098 1
        $maxlength = self::strlen($possibleChars, $encoding);
2099
2100 1
        if ($maxlength === 0) {
2101 1
            return '';
2102
        }
2103
2104
        // add random chars
2105 1
        while ($i < $length) {
2106
            try {
2107 1
                $randInt = \random_int(0, $maxlength - 1);
2108
            } catch (\Exception $e) {
2109
                /** @noinspection RandomApiMigrationInspection */
2110
                $randInt = \mt_rand(0, $maxlength - 1);
2111
            }
2112 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2113 1
            $str .= $char;
2114 1
            $i++;
2115
        }
2116
2117 1
        return $str;
2118
    }
2119
2120
    /**
2121
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2122
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2123
     *
2124
     * @return string
2125
     */
2126 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2127
    {
2128 1
        $uniqueHelper = \mt_rand() .
2129 1
                        \session_id() .
2130 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2131 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2132 1
                        $entropyExtra;
2133
2134 1
        $uniqueString = \uniqid($uniqueHelper, true);
2135
2136 1
        if ($md5) {
2137 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2138
        }
2139
2140 1
        return $uniqueString;
2141
    }
2142
2143
    /**
2144
     * alias for "UTF8::string_has_bom()"
2145
     *
2146
     * @see        UTF8::string_has_bom()
2147
     *
2148
     * @param string $str
2149
     *
2150
     * @return bool
2151
     *
2152
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2153
     */
2154 2
    public static function hasBom(string $str): bool
2155
    {
2156 2
        return self::string_has_bom($str);
2157
    }
2158
2159
    /**
2160
     * Returns true if the string contains a lower case char, false otherwise.
2161
     *
2162
     * @param string $str <p>The input string.</p>
2163
     *
2164
     * @return bool whether or not the string contains a lower case character
2165
     */
2166 47
    public static function has_lowercase(string $str): bool
2167
    {
2168 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2169
    }
2170
2171
    /**
2172
     * Returns true if the string contains an upper case char, false otherwise.
2173
     *
2174
     * @param string $str <p>The input string.</p>
2175
     *
2176
     * @return bool whether or not the string contains an upper case character
2177
     */
2178 12
    public static function has_uppercase(string $str): bool
2179
    {
2180 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2181
    }
2182
2183
    /**
2184
     * Converts a hexadecimal-value into an UTF-8 character.
2185
     *
2186
     * @param string $hexdec <p>The hexadecimal value.</p>
2187
     *
2188
     * @return false|string one single UTF-8 character
2189
     */
2190 4
    public static function hex_to_chr(string $hexdec)
2191
    {
2192 4
        return self::decimal_to_chr(\hexdec($hexdec));
2193
    }
2194
2195
    /**
2196
     * Converts hexadecimal U+xxxx code point representation to integer.
2197
     *
2198
     * INFO: opposite to UTF8::int_to_hex()
2199
     *
2200
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2201
     *
2202
     * @return false|int the code point, or false on failure
2203
     */
2204 2
    public static function hex_to_int($hexDec)
2205
    {
2206
        // init
2207 2
        $hexDec = (string) $hexDec;
2208
2209 2
        if ($hexDec === '') {
2210 2
            return false;
2211
        }
2212
2213 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2214 2
            return \intval($match[1], 16);
2215
        }
2216
2217 2
        return false;
2218
    }
2219
2220
    /**
2221
     * alias for "UTF8::html_entity_decode()"
2222
     *
2223
     * @see UTF8::html_entity_decode()
2224
     *
2225
     * @param string $str
2226
     * @param int    $flags
2227
     * @param string $encoding
2228
     *
2229
     * @return string
2230
     */
2231 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2232
    {
2233 4
        return self::html_entity_decode($str, $flags, $encoding);
2234
    }
2235
2236
    /**
2237
     * Converts a UTF-8 string to a series of HTML numbered entities.
2238
     *
2239
     * INFO: opposite to UTF8::html_decode()
2240
     *
2241
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2242
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2243
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2244
     *
2245
     * @return string HTML numbered entities
2246
     */
2247 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2248
    {
2249 13
        if ($str === '') {
2250 4
            return '';
2251
        }
2252
2253 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2254 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2255
        }
2256
2257 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2258
            self::checkForSupport();
2259
        }
2260
2261
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2262 13
        if (self::$SUPPORT['mbstring'] === true) {
2263 13
            $startCode = 0x00;
2264 13
            if ($keepAsciiChars === true) {
2265 13
                $startCode = 0x80;
2266
            }
2267
2268 13
            return \mb_encode_numericentity(
2269 13
                $str,
2270 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2271 13
                $encoding
2272
            );
2273
        }
2274
2275
        //
2276
        // fallback via vanilla php
2277
        //
2278
2279
        return \implode(
2280
            '',
2281
            \array_map(
2282
                function ($chr) use ($keepAsciiChars, $encoding) {
2283
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2284
                },
2285
                self::split($str)
2286
            )
2287
        );
2288
    }
2289
2290
    /**
2291
     * UTF-8 version of html_entity_decode()
2292
     *
2293
     * The reason we are not using html_entity_decode() by itself is because
2294
     * while it is not technically correct to leave out the semicolon
2295
     * at the end of an entity most browsers will still interpret the entity
2296
     * correctly. html_entity_decode() does not convert entities without
2297
     * semicolons, so we are left with our own little solution here. Bummer.
2298
     *
2299
     * Convert all HTML entities to their applicable characters
2300
     *
2301
     * INFO: opposite to UTF8::html_encode()
2302
     *
2303
     * @see http://php.net/manual/en/function.html-entity-decode.php
2304
     *
2305
     * @param string $str      <p>
2306
     *                         The input string.
2307
     *                         </p>
2308
     * @param int    $flags    [optional] <p>
2309
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2310
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2311
     *                         <table>
2312
     *                         Available <i>flags</i> constants
2313
     *                         <tr valign="top">
2314
     *                         <td>Constant Name</td>
2315
     *                         <td>Description</td>
2316
     *                         </tr>
2317
     *                         <tr valign="top">
2318
     *                         <td><b>ENT_COMPAT</b></td>
2319
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2320
     *                         </tr>
2321
     *                         <tr valign="top">
2322
     *                         <td><b>ENT_QUOTES</b></td>
2323
     *                         <td>Will convert both double and single quotes.</td>
2324
     *                         </tr>
2325
     *                         <tr valign="top">
2326
     *                         <td><b>ENT_NOQUOTES</b></td>
2327
     *                         <td>Will leave both double and single quotes unconverted.</td>
2328
     *                         </tr>
2329
     *                         <tr valign="top">
2330
     *                         <td><b>ENT_HTML401</b></td>
2331
     *                         <td>
2332
     *                         Handle code as HTML 4.01.
2333
     *                         </td>
2334
     *                         </tr>
2335
     *                         <tr valign="top">
2336
     *                         <td><b>ENT_XML1</b></td>
2337
     *                         <td>
2338
     *                         Handle code as XML 1.
2339
     *                         </td>
2340
     *                         </tr>
2341
     *                         <tr valign="top">
2342
     *                         <td><b>ENT_XHTML</b></td>
2343
     *                         <td>
2344
     *                         Handle code as XHTML.
2345
     *                         </td>
2346
     *                         </tr>
2347
     *                         <tr valign="top">
2348
     *                         <td><b>ENT_HTML5</b></td>
2349
     *                         <td>
2350
     *                         Handle code as HTML 5.
2351
     *                         </td>
2352
     *                         </tr>
2353
     *                         </table>
2354
     *                         </p>
2355
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2356
     *
2357
     * @return string the decoded string
2358
     */
2359 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2360
    {
2361 40
        if ($str === '') {
2362 12
            return '';
2363
        }
2364
2365 40
        if (!isset($str[3])) { // examples: &; || &x;
2366 19
            return $str;
2367
        }
2368
2369
        if (
2370 39
            \strpos($str, '&') === false
2371
            ||
2372
            (
2373 39
                \strpos($str, '&#') === false
2374
                &&
2375 39
                \strpos($str, ';') === false
2376
            )
2377
        ) {
2378 18
            return $str;
2379
        }
2380
2381 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2382 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2383
        }
2384
2385 39
        if ($flags === null) {
2386 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2387
        }
2388
2389
        if (
2390 39
            $encoding !== 'UTF-8'
2391
            &&
2392 39
            $encoding !== 'ISO-8859-1'
2393
            &&
2394 39
            $encoding !== 'WINDOWS-1252'
2395
            &&
2396 39
            self::$SUPPORT['mbstring'] === false
2397
        ) {
2398
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2399
        }
2400
2401 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2402
            self::checkForSupport();
2403
        }
2404
2405
        do {
2406 39
            $str_compare = $str;
2407
2408
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2409 39
            if (self::$SUPPORT['mbstring'] === true) {
2410 39
                $str = \mb_decode_numericentity(
2411 39
                    $str,
2412 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2413 39
                    $encoding
2414
                );
2415
            } else {
2416
                $str = (string) \preg_replace_callback(
2417
                    "/&#\d{2,6};/",
2418
                    function ($matches) use ($encoding) {
2419
                        // always fallback via symfony polyfill
2420
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2421
2422
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2423
                            return $returnTmp;
2424
                        }
2425
2426
                        return $matches[0];
2427
                    },
2428
                    $str
2429
                );
2430
            }
2431
2432
            // decode numeric & UTF16 two byte entities
2433 39
            $str = \html_entity_decode(
2434 39
                \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2435 39
                $flags,
2436 39
                $encoding
2437
            );
2438 39
        } while ($str_compare !== $str);
2439
2440 39
        return $str;
2441
    }
2442
2443
    /**
2444
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2445
     *
2446
     * @param string $str
2447
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2448
     *
2449
     * @return string
2450
     */
2451 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2452
    {
2453 6
        return self::htmlspecialchars(
2454 6
            $str,
2455 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2456 6
            $encoding
2457
        );
2458
    }
2459
2460
    /**
2461
     * Remove empty html-tag.
2462
     *
2463
     * e.g.: <tag></tag>
2464
     *
2465
     * @param string $str
2466
     *
2467
     * @return string
2468
     */
2469 1
    public static function html_stripe_empty_tags(string $str): string
2470
    {
2471 1
        return (string) \preg_replace(
2472 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2473 1
            '',
2474 1
            $str
2475
        );
2476
    }
2477
2478
    /**
2479
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2480
     *
2481
     * @see http://php.net/manual/en/function.htmlentities.php
2482
     *
2483
     * @param string $str           <p>
2484
     *                              The input string.
2485
     *                              </p>
2486
     * @param int    $flags         [optional] <p>
2487
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2488
     *                              invalid code unit sequences and the used document type. The default is
2489
     *                              ENT_COMPAT | ENT_HTML401.
2490
     *                              <table>
2491
     *                              Available <i>flags</i> constants
2492
     *                              <tr valign="top">
2493
     *                              <td>Constant Name</td>
2494
     *                              <td>Description</td>
2495
     *                              </tr>
2496
     *                              <tr valign="top">
2497
     *                              <td><b>ENT_COMPAT</b></td>
2498
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                              </tr>
2500
     *                              <tr valign="top">
2501
     *                              <td><b>ENT_QUOTES</b></td>
2502
     *                              <td>Will convert both double and single quotes.</td>
2503
     *                              </tr>
2504
     *                              <tr valign="top">
2505
     *                              <td><b>ENT_NOQUOTES</b></td>
2506
     *                              <td>Will leave both double and single quotes unconverted.</td>
2507
     *                              </tr>
2508
     *                              <tr valign="top">
2509
     *                              <td><b>ENT_IGNORE</b></td>
2510
     *                              <td>
2511
     *                              Silently discard invalid code unit sequences instead of returning
2512
     *                              an empty string. Using this flag is discouraged as it
2513
     *                              may have security implications.
2514
     *                              </td>
2515
     *                              </tr>
2516
     *                              <tr valign="top">
2517
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2518
     *                              <td>
2519
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2520
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2521
     *                              </td>
2522
     *                              </tr>
2523
     *                              <tr valign="top">
2524
     *                              <td><b>ENT_DISALLOWED</b></td>
2525
     *                              <td>
2526
     *                              Replace invalid code points for the given document type with a
2527
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2528
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2529
     *                              instance, to ensure the well-formedness of XML documents with
2530
     *                              embedded external content.
2531
     *                              </td>
2532
     *                              </tr>
2533
     *                              <tr valign="top">
2534
     *                              <td><b>ENT_HTML401</b></td>
2535
     *                              <td>
2536
     *                              Handle code as HTML 4.01.
2537
     *                              </td>
2538
     *                              </tr>
2539
     *                              <tr valign="top">
2540
     *                              <td><b>ENT_XML1</b></td>
2541
     *                              <td>
2542
     *                              Handle code as XML 1.
2543
     *                              </td>
2544
     *                              </tr>
2545
     *                              <tr valign="top">
2546
     *                              <td><b>ENT_XHTML</b></td>
2547
     *                              <td>
2548
     *                              Handle code as XHTML.
2549
     *                              </td>
2550
     *                              </tr>
2551
     *                              <tr valign="top">
2552
     *                              <td><b>ENT_HTML5</b></td>
2553
     *                              <td>
2554
     *                              Handle code as HTML 5.
2555
     *                              </td>
2556
     *                              </tr>
2557
     *                              </table>
2558
     *                              </p>
2559
     * @param string $encoding      [optional] <p>
2560
     *                              Like <b>htmlspecialchars</b>,
2561
     *                              <b>htmlentities</b> takes an optional third argument
2562
     *                              <i>encoding</i> which defines encoding used in
2563
     *                              conversion.
2564
     *                              Although this argument is technically optional, you are highly
2565
     *                              encouraged to specify the correct value for your code.
2566
     *                              </p>
2567
     * @param bool   $double_encode [optional] <p>
2568
     *                              When <i>double_encode</i> is turned off PHP will not
2569
     *                              encode existing html entities. The default is to convert everything.
2570
     *                              </p>
2571
     *
2572
     * @return string the encoded string.
2573
     * </p>
2574
     * <p>
2575
     * If the input <i>string</i> contains an invalid code unit
2576
     * sequence within the given <i>encoding</i> an empty string
2577
     * will be returned, unless either the <b>ENT_IGNORE</b> or
2578
     * <b>ENT_SUBSTITUTE</b> flags are set
2579
     */
2580 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2581
    {
2582 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2583 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2584
        }
2585
2586 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2587
2588
        /**
2589
         * PHP doesn't replace a backslash to its html entity since this is something
2590
         * that's mostly used to escape characters when inserting in a database. Since
2591
         * we're using a decent database layer, we don't need this shit and we're replacing
2592
         * the double backslashes by its' html entity equivalent.
2593
         *
2594
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2595
         */
2596 9
        $str = \str_replace('\\', '&#92;', $str);
2597
2598 9
        return self::html_encode($str, true, $encoding);
2599
    }
2600
2601
    /**
2602
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2603
     *
2604
     * INFO: Take a look at "UTF8::htmlentities()"
2605
     *
2606
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2607
     *
2608
     * @param string $str           <p>
2609
     *                              The string being converted.
2610
     *                              </p>
2611
     * @param int    $flags         [optional] <p>
2612
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2613
     *                              invalid code unit sequences and the used document type. The default is
2614
     *                              ENT_COMPAT | ENT_HTML401.
2615
     *                              <table>
2616
     *                              Available <i>flags</i> constants
2617
     *                              <tr valign="top">
2618
     *                              <td>Constant Name</td>
2619
     *                              <td>Description</td>
2620
     *                              </tr>
2621
     *                              <tr valign="top">
2622
     *                              <td><b>ENT_COMPAT</b></td>
2623
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2624
     *                              </tr>
2625
     *                              <tr valign="top">
2626
     *                              <td><b>ENT_QUOTES</b></td>
2627
     *                              <td>Will convert both double and single quotes.</td>
2628
     *                              </tr>
2629
     *                              <tr valign="top">
2630
     *                              <td><b>ENT_NOQUOTES</b></td>
2631
     *                              <td>Will leave both double and single quotes unconverted.</td>
2632
     *                              </tr>
2633
     *                              <tr valign="top">
2634
     *                              <td><b>ENT_IGNORE</b></td>
2635
     *                              <td>
2636
     *                              Silently discard invalid code unit sequences instead of returning
2637
     *                              an empty string. Using this flag is discouraged as it
2638
     *                              may have security implications.
2639
     *                              </td>
2640
     *                              </tr>
2641
     *                              <tr valign="top">
2642
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2643
     *                              <td>
2644
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2645
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2646
     *                              </td>
2647
     *                              </tr>
2648
     *                              <tr valign="top">
2649
     *                              <td><b>ENT_DISALLOWED</b></td>
2650
     *                              <td>
2651
     *                              Replace invalid code points for the given document type with a
2652
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2653
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2654
     *                              instance, to ensure the well-formedness of XML documents with
2655
     *                              embedded external content.
2656
     *                              </td>
2657
     *                              </tr>
2658
     *                              <tr valign="top">
2659
     *                              <td><b>ENT_HTML401</b></td>
2660
     *                              <td>
2661
     *                              Handle code as HTML 4.01.
2662
     *                              </td>
2663
     *                              </tr>
2664
     *                              <tr valign="top">
2665
     *                              <td><b>ENT_XML1</b></td>
2666
     *                              <td>
2667
     *                              Handle code as XML 1.
2668
     *                              </td>
2669
     *                              </tr>
2670
     *                              <tr valign="top">
2671
     *                              <td><b>ENT_XHTML</b></td>
2672
     *                              <td>
2673
     *                              Handle code as XHTML.
2674
     *                              </td>
2675
     *                              </tr>
2676
     *                              <tr valign="top">
2677
     *                              <td><b>ENT_HTML5</b></td>
2678
     *                              <td>
2679
     *                              Handle code as HTML 5.
2680
     *                              </td>
2681
     *                              </tr>
2682
     *                              </table>
2683
     *                              </p>
2684
     * @param string $encoding      [optional] <p>
2685
     *                              Defines encoding used in conversion.
2686
     *                              </p>
2687
     *                              <p>
2688
     *                              For the purposes of this function, the encodings
2689
     *                              ISO-8859-1, ISO-8859-15,
2690
     *                              UTF-8, cp866,
2691
     *                              cp1251, cp1252, and
2692
     *                              KOI8-R are effectively equivalent, provided the
2693
     *                              <i>string</i> itself is valid for the encoding, as
2694
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2695
     *                              the same positions in all of these encodings.
2696
     *                              </p>
2697
     * @param bool   $double_encode [optional] <p>
2698
     *                              When <i>double_encode</i> is turned off PHP will not
2699
     *                              encode existing html entities, the default is to convert everything.
2700
     *                              </p>
2701
     *
2702
     * @return string the converted string.
2703
     * </p>
2704
     * <p>
2705
     * If the input <i>string</i> contains an invalid code unit
2706
     * sequence within the given <i>encoding</i> an empty string
2707
     * will be returned, unless either the <b>ENT_IGNORE</b> or
2708
     * <b>ENT_SUBSTITUTE</b> flags are set
2709
     */
2710 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2711
    {
2712 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2713 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2714
        }
2715
2716 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2717
    }
2718
2719
    /**
2720
     * Checks whether iconv is available on the server.
2721
     *
2722
     * @return bool
2723
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2724
     */
2725
    public static function iconv_loaded(): bool
2726
    {
2727
        return \extension_loaded('iconv') ? true : false;
2728
    }
2729
2730
    /**
2731
     * alias for "UTF8::decimal_to_chr()"
2732
     *
2733
     * @see UTF8::decimal_to_chr()
2734
     *
2735
     * @param mixed $int
2736
     *
2737
     * @return string
2738
     */
2739 4
    public static function int_to_chr($int): string
2740
    {
2741 4
        return self::decimal_to_chr($int);
2742
    }
2743
2744
    /**
2745
     * Converts Integer to hexadecimal U+xxxx code point representation.
2746
     *
2747
     * INFO: opposite to UTF8::hex_to_int()
2748
     *
2749
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2750
     * @param string $pfix [optional]
2751
     *
2752
     * @return string the code point, or empty string on failure
2753
     */
2754 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2755
    {
2756 6
        $hex = \dechex($int);
2757
2758 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2759
2760 6
        return $pfix . $hex . '';
2761
    }
2762
2763
    /**
2764
     * Checks whether intl-char is available on the server.
2765
     *
2766
     * @return bool
2767
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2768
     */
2769
    public static function intlChar_loaded(): bool
2770
    {
2771
        return \class_exists('IntlChar');
2772
    }
2773
2774
    /**
2775
     * Checks whether intl is available on the server.
2776
     *
2777
     * @return bool
2778
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2779
     */
2780 5
    public static function intl_loaded(): bool
2781
    {
2782 5
        return \extension_loaded('intl');
2783
    }
2784
2785
    /**
2786
     * alias for "UTF8::is_ascii()"
2787
     *
2788
     * @see        UTF8::is_ascii()
2789
     *
2790
     * @param string $str
2791
     *
2792
     * @return bool
2793
     *
2794
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2795
     */
2796 2
    public static function isAscii(string $str): bool
2797
    {
2798 2
        return self::is_ascii($str);
2799
    }
2800
2801
    /**
2802
     * alias for "UTF8::is_base64()"
2803
     *
2804
     * @see        UTF8::is_base64()
2805
     *
2806
     * @param string $str
2807
     *
2808
     * @return bool
2809
     *
2810
     * @deprecated <p>use "UTF8::is_base64()"</p>
2811
     */
2812 2
    public static function isBase64($str): bool
2813
    {
2814 2
        return self::is_base64($str);
2815
    }
2816
2817
    /**
2818
     * alias for "UTF8::is_binary()"
2819
     *
2820
     * @see        UTF8::is_binary()
2821
     *
2822
     * @param mixed $str
2823
     * @param bool  $strict
2824
     *
2825
     * @return bool
2826
     *
2827
     * @deprecated <p>use "UTF8::is_binary()"</p>
2828
     */
2829 4
    public static function isBinary($str, $strict = false): bool
2830
    {
2831 4
        return self::is_binary($str, $strict);
2832
    }
2833
2834
    /**
2835
     * alias for "UTF8::is_bom()"
2836
     *
2837
     * @see        UTF8::is_bom()
2838
     *
2839
     * @param string $utf8_chr
2840
     *
2841
     * @return bool
2842
     *
2843
     * @deprecated <p>use "UTF8::is_bom()"</p>
2844
     */
2845 2
    public static function isBom(string $utf8_chr): bool
2846
    {
2847 2
        return self::is_bom($utf8_chr);
2848
    }
2849
2850
    /**
2851
     * alias for "UTF8::is_html()"
2852
     *
2853
     * @see        UTF8::is_html()
2854
     *
2855
     * @param string $str
2856
     *
2857
     * @return bool
2858
     *
2859
     * @deprecated <p>use "UTF8::is_html()"</p>
2860
     */
2861 2
    public static function isHtml(string $str): bool
2862
    {
2863 2
        return self::is_html($str);
2864
    }
2865
2866
    /**
2867
     * alias for "UTF8::is_json()"
2868
     *
2869
     * @see        UTF8::is_json()
2870
     *
2871
     * @param string $str
2872
     *
2873
     * @return bool
2874
     *
2875
     * @deprecated <p>use "UTF8::is_json()"</p>
2876
     */
2877
    public static function isJson(string $str): bool
2878
    {
2879
        return self::is_json($str);
2880
    }
2881
2882
    /**
2883
     * alias for "UTF8::is_utf16()"
2884
     *
2885
     * @see        UTF8::is_utf16()
2886
     *
2887
     * @param mixed $str
2888
     *
2889
     * @return false|int
2890
     *                    <strong>false</strong> if is't not UTF16,<br>
2891
     *                    <strong>1</strong> for UTF-16LE,<br>
2892
     *                    <strong>2</strong> for UTF-16BE
2893
     *
2894
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2895
     */
2896 2
    public static function isUtf16($str)
2897
    {
2898 2
        return self::is_utf16($str);
2899
    }
2900
2901
    /**
2902
     * alias for "UTF8::is_utf32()"
2903
     *
2904
     * @see        UTF8::is_utf32()
2905
     *
2906
     * @param mixed $str
2907
     *
2908
     * @return false|int
2909
     *                   <strong>false</strong> if is't not UTF16,
2910
     *                   <strong>1</strong> for UTF-32LE,
2911
     *                   <strong>2</strong> for UTF-32BE
2912
     *
2913
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2914
     */
2915 2
    public static function isUtf32($str)
2916
    {
2917 2
        return self::is_utf32($str);
2918
    }
2919
2920
    /**
2921
     * alias for "UTF8::is_utf8()"
2922
     *
2923
     * @see        UTF8::is_utf8()
2924
     *
2925
     * @param string $str
2926
     * @param bool   $strict
2927
     *
2928
     * @return bool
2929
     *
2930
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2931
     */
2932 17
    public static function isUtf8($str, $strict = false): bool
2933
    {
2934 17
        return self::is_utf8($str, $strict);
2935
    }
2936
2937
    /**
2938
     * Returns true if the string contains only alphabetic chars, false otherwise.
2939
     *
2940
     * @param string $str
2941
     *
2942
     * @return bool
2943
     *               Whether or not $str contains only alphabetic chars
2944
     */
2945 10
    public static function is_alpha(string $str): bool
2946
    {
2947 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2948
    }
2949
2950
    /**
2951
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2952
     *
2953
     * @param string $str
2954
     *
2955
     * @return bool
2956
     *               Whether or not $str contains only alphanumeric chars
2957
     */
2958 13
    public static function is_alphanumeric(string $str): bool
2959
    {
2960 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2961
    }
2962
2963
    /**
2964
     * Checks if a string is 7 bit ASCII.
2965
     *
2966
     * @param string $str <p>The string to check.</p>
2967
     *
2968
     * @return bool
2969
     *              <strong>true</strong> if it is ASCII<br>
2970
     *              <strong>false</strong> otherwise
2971
     */
2972 201
    public static function is_ascii(string $str): bool
2973
    {
2974 201
        if ($str === '') {
2975 10
            return true;
2976
        }
2977
2978 200
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2979
    }
2980
2981
    /**
2982
     * Returns true if the string is base64 encoded, false otherwise.
2983
     *
2984
     * @param string $str <p>The input string.</p>
2985
     *
2986
     * @return bool whether or not $str is base64 encoded
2987
     */
2988 9
    public static function is_base64($str): bool
2989
    {
2990 9
        if ($str === '') {
2991 3
            return false;
2992
        }
2993
2994 8
        if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2995 2
            return false;
2996
        }
2997
2998 8
        $base64String = (string) \base64_decode($str, true);
2999
3000 8
        return $base64String && \base64_encode($base64String) === $str;
3001
    }
3002
3003
    /**
3004
     * Check if the input is binary... (is look like a hack).
3005
     *
3006
     * @param mixed $input
3007
     * @param bool  $strict
3008
     *
3009
     * @return bool
3010
     */
3011 39
    public static function is_binary($input, bool $strict = false): bool
3012
    {
3013 39
        $input = (string) $input;
3014 39
        if ($input === '') {
3015 10
            return false;
3016
        }
3017
3018 39
        if (\preg_match('~^[01]+$~', $input)) {
3019 12
            return true;
3020
        }
3021
3022 39
        $ext = self::get_file_type($input);
3023 39
        if ($ext['type'] === 'binary') {
3024 7
            return true;
3025
        }
3026
3027 36
        $testLength = self::strlen_in_byte($input);
3028 36
        if ($testLength) {
3029 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3030
                self::checkForSupport();
3031
            }
3032
3033 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3034 36
            if (($testNull / $testLength) > 0.256) {
3035 12
                return true;
3036
            }
3037
        }
3038
3039 34
        if ($strict === true) {
3040 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3041
                self::checkForSupport();
3042
            }
3043
3044 34
            if (self::$SUPPORT['finfo'] === false) {
3045
                throw new \RuntimeException('ext-fileinfo: is not installed');
3046
            }
3047
3048
            /** @noinspection PhpComposerExtensionStubsInspection */
3049 34
            $finfo = new \finfo(\FILEINFO_MIME_ENCODING);
3050 34
            $finfo_encoding = $finfo->buffer($input);
3051 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3052 14
                return true;
3053
            }
3054
        }
3055
3056 30
        return false;
3057
    }
3058
3059
    /**
3060
     * Check if the file is binary.
3061
     *
3062
     * @param string $file
3063
     *
3064
     * @return bool
3065
     */
3066 6
    public static function is_binary_file($file): bool
3067
    {
3068
        // init
3069 6
        $block = '';
3070
3071 6
        $fp = \fopen($file, 'rb');
3072 6
        if (\is_resource($fp)) {
3073 6
            $block = \fread($fp, 512);
3074 6
            \fclose($fp);
3075
        }
3076
3077 6
        if ($block === '') {
3078 2
            return false;
3079
        }
3080
3081 6
        return self::is_binary($block, true);
3082
    }
3083
3084
    /**
3085
     * Returns true if the string contains only whitespace chars, false otherwise.
3086
     *
3087
     * @param string $str
3088
     *
3089
     * @return bool
3090
     *               Whether or not $str contains only whitespace characters
3091
     */
3092 15
    public static function is_blank(string $str): bool
3093
    {
3094 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3095
    }
3096
3097
    /**
3098
     * Checks if the given string is equal to any "Byte Order Mark".
3099
     *
3100
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3101
     *
3102
     * @param string $str <p>The input string.</p>
3103
     *
3104
     * @return bool
3105
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3106
     */
3107 2
    public static function is_bom($str): bool
3108
    {
3109 2
        foreach (self::$BOM as $bomString => $bomByteLength) {
3110 2
            if ($str === $bomString) {
3111 2
                return true;
3112
            }
3113
        }
3114
3115 2
        return false;
3116
    }
3117
3118
    /**
3119
     * Determine whether the string is considered to be empty.
3120
     *
3121
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3122
     * empty() does not generate a warning if the variable does not exist.
3123
     *
3124
     * @param mixed $str
3125
     *
3126
     * @return bool whether or not $str is empty()
3127
     */
3128
    public static function is_empty($str): bool
3129
    {
3130
        return empty($str);
3131
    }
3132
3133
    /**
3134
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3135
     *
3136
     * @param string $str
3137
     *
3138
     * @return bool
3139
     *               Whether or not $str contains only hexadecimal chars
3140
     */
3141 13
    public static function is_hexadecimal(string $str): bool
3142
    {
3143 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3144
    }
3145
3146
    /**
3147
     * Check if the string contains any html-tags <lall>.
3148
     *
3149
     * @param string $str <p>The input string.</p>
3150
     *
3151
     * @return bool
3152
     */
3153 3
    public static function is_html(string $str): bool
3154
    {
3155 3
        if ($str === '') {
3156 3
            return false;
3157
        }
3158
3159
        // init
3160 3
        $matches = [];
3161
3162 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3163
3164 3
        return !(\count($matches) === 0);
3165
    }
3166
3167
    /**
3168
     * Try to check if "$str" is an json-string.
3169
     *
3170
     * @param string $str <p>The input string.</p>
3171
     *
3172
     * @return bool
3173
     */
3174 22
    public static function is_json(string $str): bool
3175
    {
3176 22
        if ($str === '') {
3177 3
            return false;
3178
        }
3179
3180 21
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3181
            self::checkForSupport();
3182
        }
3183
3184 21
        if (self::$SUPPORT['json'] === false) {
3185
            throw new \RuntimeException('ext-json: is not installed');
3186
        }
3187
3188 21
        $json = self::json_decode($str);
3189
3190
        /** @noinspection PhpComposerExtensionStubsInspection */
3191
        return (
3192 21
                   \is_object($json) === true
3193
                   ||
3194 21
                   \is_array($json) === true
3195
               )
3196
               &&
3197 21
               \json_last_error() === \JSON_ERROR_NONE;
3198
    }
3199
3200
    /**
3201
     * @param string $str
3202
     *
3203
     * @return bool
3204
     */
3205 8
    public static function is_lowercase(string $str): bool
3206
    {
3207 8
        if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3208 3
            return true;
3209
        }
3210
3211 5
        return false;
3212
    }
3213
3214
    /**
3215
     * Returns true if the string is serialized, false otherwise.
3216
     *
3217
     * @param string $str
3218
     *
3219
     * @return bool whether or not $str is serialized
3220
     */
3221 7
    public static function is_serialized(string $str): bool
3222
    {
3223 7
        if ($str === '') {
3224 1
            return false;
3225
        }
3226
3227
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3228
        /** @noinspection UnserializeExploitsInspection */
3229 6
        return $str === 'b:0;'
3230
               ||
3231 6
               @\unserialize($str) !== false;
3232
    }
3233
3234
    /**
3235
     * Returns true if the string contains only lower case chars, false
3236
     * otherwise.
3237
     *
3238
     * @param string $str <p>The input string.</p>
3239
     *
3240
     * @return bool
3241
     *               Whether or not $str contains only lower case characters
3242
     */
3243 8
    public static function is_uppercase(string $str): bool
3244
    {
3245 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3246
    }
3247
3248
    /**
3249
     * Check if the string is UTF-16.
3250
     *
3251
     * @param mixed $str <p>The input string.</p>
3252
     * @param bool  $checkIfStringIsBinary
3253
     *
3254
     * @return false|int
3255
     *                   <strong>false</strong> if is't not UTF-16,<br>
3256
     *                   <strong>1</strong> for UTF-16LE,<br>
3257
     *                   <strong>2</strong> for UTF-16BE
3258
     */
3259 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3260
    {
3261
        // init
3262 21
        $str = (string) $str;
3263 21
        $strChars = [];
3264
3265
        if (
3266 21
            $checkIfStringIsBinary === true
3267
            &&
3268 21
            self::is_binary($str, true) === false
3269
        ) {
3270 2
            return false;
3271
        }
3272
3273 21
        if (self::$SUPPORT['mbstring'] === false) {
3274 2
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3275
        }
3276
3277 21
        $str = self::remove_bom($str);
3278
3279 21
        $maybeUTF16LE = 0;
3280 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3281 21
        if ($test) {
3282 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3283 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3284 15
            if ($test3 === $test) {
3285 15
                if (\count($strChars) === 0) {
3286 15
                    $strChars = self::count_chars($str, true);
3287
                }
3288 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3289 15
                    if (\in_array($test3char, $strChars, true) === true) {
3290 15
                        $maybeUTF16LE++;
3291
                    }
3292
                }
3293
            }
3294
        }
3295
3296 21
        $maybeUTF16BE = 0;
3297 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3298 21
        if ($test) {
3299 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3300 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3301 15
            if ($test3 === $test) {
3302 15
                if (\count($strChars) === 0) {
3303 7
                    $strChars = self::count_chars($str, true);
3304
                }
3305 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3306 15
                    if (\in_array($test3char, $strChars, true) === true) {
3307 15
                        $maybeUTF16BE++;
3308
                    }
3309
                }
3310
            }
3311
        }
3312
3313 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3314 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3315 4
                return 1;
3316
            }
3317
3318 6
            return 2;
3319
        }
3320
3321 17
        return false;
3322
    }
3323
3324
    /**
3325
     * Check if the string is UTF-32.
3326
     *
3327
     * @param mixed $str <p>The input string.</p>
3328
     * @param bool  $checkIfStringIsBinary
3329
     *
3330
     * @return false|int
3331
     *                   <strong>false</strong> if is't not UTF-32,<br>
3332
     *                   <strong>1</strong> for UTF-32LE,<br>
3333
     *                   <strong>2</strong> for UTF-32BE
3334
     */
3335 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3336
    {
3337
        // init
3338 17
        $str = (string) $str;
3339 17
        $strChars = [];
3340
3341
        if (
3342 17
            $checkIfStringIsBinary === true
3343
            &&
3344 17
            self::is_binary($str, true) === false
3345
        ) {
3346 2
            return false;
3347
        }
3348
3349 17
        if (self::$SUPPORT['mbstring'] === false) {
3350 2
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3351
        }
3352
3353 17
        $str = self::remove_bom($str);
3354
3355 17
        $maybeUTF32LE = 0;
3356 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3357 17
        if ($test) {
3358 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3359 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3360 11
            if ($test3 === $test) {
3361 11
                if (\count($strChars) === 0) {
3362 11
                    $strChars = self::count_chars($str, true);
3363
                }
3364 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3365 11
                    if (\in_array($test3char, $strChars, true) === true) {
3366 11
                        $maybeUTF32LE++;
3367
                    }
3368
                }
3369
            }
3370
        }
3371
3372 17
        $maybeUTF32BE = 0;
3373 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3374 17
        if ($test) {
3375 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3376 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3377 11
            if ($test3 === $test) {
3378 11
                if (\count($strChars) === 0) {
3379 7
                    $strChars = self::count_chars($str, true);
3380
                }
3381 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3382 11
                    if (\in_array($test3char, $strChars, true) === true) {
3383 11
                        $maybeUTF32BE++;
3384
                    }
3385
                }
3386
            }
3387
        }
3388
3389 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3390 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3391 2
                return 1;
3392
            }
3393
3394 2
            return 2;
3395
        }
3396
3397 17
        return false;
3398
    }
3399
3400
    /**
3401
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3402
     *
3403
     * @see    http://hsivonen.iki.fi/php-utf8/
3404
     *
3405
     * @param string|string[] $str    <p>The string to be checked.</p>
3406
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3407
     *
3408
     * @return bool
3409
     */
3410 106
    public static function is_utf8($str, bool $strict = false): bool
3411
    {
3412 106
        if (\is_array($str) === true) {
3413 2
            foreach ($str as $k => $v) {
3414 2
                if (self::is_utf8($v, $strict) === false) {
3415 2
                    return false;
3416
                }
3417
            }
3418
3419
            return true;
3420
        }
3421
3422 106
        if ($str === '') {
3423 12
            return true;
3424
        }
3425
3426 102
        if ($strict === true) {
3427 2
            $isBinary = self::is_binary($str, true);
3428
3429 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3430 2
                return false;
3431
            }
3432
3433
            if ($isBinary && self::is_utf32($str, false) !== false) {
3434
                return false;
3435
            }
3436
        }
3437
3438 102
        if (self::pcre_utf8_support() !== true) {
3439
3440
            // If even just the first character can be matched, when the /u
3441
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3442
            // invalid, nothing at all will match, even if the string contains
3443
            // some valid sequences
3444
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3445
        }
3446
3447 102
        $mState = 0; // cached expected number of octets after the current octet
3448
        // until the beginning of the next UTF8 character sequence
3449 102
        $mUcs4 = 0; // cached Unicode character
3450 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3451
3452 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3453
            self::checkForSupport();
3454
        }
3455
3456 102
        if (self::$ORD === null) {
3457
            self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3458
        }
3459
3460 102
        $len = self::strlen_in_byte((string) $str);
3461
        /** @noinspection ForeachInvariantsInspection */
3462 102
        for ($i = 0; $i < $len; $i++) {
3463 102
            $in = self::$ORD[$str[$i]];
3464 102
            if ($mState === 0) {
3465
                // When mState is zero we expect either a US-ASCII character or a
3466
                // multi-octet sequence.
3467 102
                if ((0x80 & $in) === 0) {
3468
                    // US-ASCII, pass straight through.
3469 98
                    $mBytes = 1;
3470 83
                } elseif ((0xE0 & $in) === 0xC0) {
3471
                    // First octet of 2 octet sequence.
3472 74
                    $mUcs4 = $in;
3473 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3474 74
                    $mState = 1;
3475 74
                    $mBytes = 2;
3476 58
                } elseif ((0xF0 & $in) === 0xE0) {
3477
                    // First octet of 3 octet sequence.
3478 41
                    $mUcs4 = $in;
3479 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3480 41
                    $mState = 2;
3481 41
                    $mBytes = 3;
3482 30
                } elseif ((0xF8 & $in) === 0xF0) {
3483
                    // First octet of 4 octet sequence.
3484 19
                    $mUcs4 = $in;
3485 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3486 19
                    $mState = 3;
3487 19
                    $mBytes = 4;
3488 13
                } elseif ((0xFC & $in) === 0xF8) {
3489
                    /* First octet of 5 octet sequence.
3490
                     *
3491
                     * This is illegal because the encoded codepoint must be either
3492
                     * (a) not the shortest form or
3493
                     * (b) outside the Unicode range of 0-0x10FFFF.
3494
                     * Rather than trying to resynchronize, we will carry on until the end
3495
                     * of the sequence and let the later error handling code catch it.
3496
                     */
3497 5
                    $mUcs4 = $in;
3498 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3499 5
                    $mState = 4;
3500 5
                    $mBytes = 5;
3501 10
                } elseif ((0xFE & $in) === 0xFC) {
3502
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3503 5
                    $mUcs4 = $in;
3504 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3505 5
                    $mState = 5;
3506 5
                    $mBytes = 6;
3507
                } else {
3508
                    // Current octet is neither in the US-ASCII range nor a legal first
3509
                    // octet of a multi-octet sequence.
3510 102
                    return false;
3511
                }
3512
            } else {
3513
                // When mState is non-zero, we expect a continuation of the multi-octet
3514
                // sequence
3515 83
                if ((0xC0 & $in) === 0x80) {
3516
                    // Legal continuation.
3517 75
                    $shift = ($mState - 1) * 6;
3518 75
                    $tmp = $in;
3519 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3520 75
                    $mUcs4 |= $tmp;
3521
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3522
                    // Unicode code point to be output.
3523 75
                    if (--$mState === 0) {
3524
                        // Check for illegal sequences and code points.
3525
                        //
3526
                        // From Unicode 3.1, non-shortest form is illegal
3527
                        if (
3528 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3529
                            ||
3530 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3531
                            ||
3532 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3533
                            ||
3534 75
                            ($mBytes > 4)
3535
                            ||
3536
                            // From Unicode 3.2, surrogate characters are illegal.
3537 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3538
                            ||
3539
                            // Code points outside the Unicode range are illegal.
3540 75
                            ($mUcs4 > 0x10FFFF)
3541
                        ) {
3542 8
                            return false;
3543
                        }
3544
                        // initialize UTF8 cache
3545 75
                        $mState = 0;
3546 75
                        $mUcs4 = 0;
3547 75
                        $mBytes = 1;
3548
                    }
3549
                } else {
3550
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3551
                    // Incomplete multi-octet sequence.
3552 36
                    return false;
3553
                }
3554
            }
3555
        }
3556
3557 66
        return true;
3558
    }
3559
3560
    /**
3561
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3562
     * Decodes a JSON string
3563
     *
3564
     * @see http://php.net/manual/en/function.json-decode.php
3565
     *
3566
     * @param string $json    <p>
3567
     *                        The <i>json</i> string being decoded.
3568
     *                        </p>
3569
     *                        <p>
3570
     *                        This function only works with UTF-8 encoded strings.
3571
     *                        </p>
3572
     *                        <p>PHP implements a superset of
3573
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3574
     *                        only supports these values when they are nested inside an array or an object.
3575
     *                        </p>
3576
     * @param bool   $assoc   [optional] <p>
3577
     *                        When <b>TRUE</b>, returned objects will be converted into
3578
     *                        associative arrays.
3579
     *                        </p>
3580
     * @param int    $depth   [optional] <p>
3581
     *                        User specified recursion depth.
3582
     *                        </p>
3583
     * @param int    $options [optional] <p>
3584
     *                        Bitmask of JSON decode options. Currently only
3585
     *                        <b>JSON_BIGINT_AS_STRING</b>
3586
     *                        is supported (default is to cast large integers as floats)
3587
     *                        </p>
3588
     *
3589
     * @return mixed
3590
     *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3591
     *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3592
     *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3593
     *                is deeper than the recursion limit.
3594
     */
3595 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3596
    {
3597 24
        $json = self::filter($json);
3598
3599 24
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3600
            self::checkForSupport();
3601
        }
3602
3603 24
        if (self::$SUPPORT['json'] === false) {
3604
            throw new \RuntimeException('ext-json: is not installed');
3605
        }
3606
3607
        /** @noinspection PhpComposerExtensionStubsInspection */
3608 24
        return \json_decode($json, $assoc, $depth, $options);
3609
    }
3610
3611
    /**
3612
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3613
     * Returns the JSON representation of a value.
3614
     *
3615
     * @see http://php.net/manual/en/function.json-encode.php
3616
     *
3617
     * @param mixed $value   <p>
3618
     *                       The <i>value</i> being encoded. Can be any type except
3619
     *                       a resource.
3620
     *                       </p>
3621
     *                       <p>
3622
     *                       All string data must be UTF-8 encoded.
3623
     *                       </p>
3624
     *                       <p>PHP implements a superset of
3625
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3626
     *                       only supports these values when they are nested inside an array or an object.
3627
     *                       </p>
3628
     * @param int   $options [optional] <p>
3629
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3630
     *                       <b>JSON_HEX_TAG</b>,
3631
     *                       <b>JSON_HEX_AMP</b>,
3632
     *                       <b>JSON_HEX_APOS</b>,
3633
     *                       <b>JSON_NUMERIC_CHECK</b>,
3634
     *                       <b>JSON_PRETTY_PRINT</b>,
3635
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3636
     *                       <b>JSON_FORCE_OBJECT</b>,
3637
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3638
     *                       constants is described on
3639
     *                       the JSON constants page.
3640
     *                       </p>
3641
     * @param int   $depth   [optional] <p>
3642
     *                       Set the maximum depth. Must be greater than zero.
3643
     *                       </p>
3644
     *
3645
     * @return false|string
3646
     *                      A JSON encoded <strong>string</strong> on success or<br>
3647
     *                      <strong>FALSE</strong> on failure
3648
     */
3649 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3650
    {
3651 5
        $value = self::filter($value);
3652
3653 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3654
            self::checkForSupport();
3655
        }
3656
3657 5
        if (self::$SUPPORT['json'] === false) {
3658
            throw new \RuntimeException('ext-json: is not installed');
3659
        }
3660
3661
        /** @noinspection PhpComposerExtensionStubsInspection */
3662 5
        return \json_encode($value, $options, $depth);
3663
    }
3664
3665
    /**
3666
     * Checks whether JSON is available on the server.
3667
     *
3668
     * @return bool
3669
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3670
     */
3671
    public static function json_loaded(): bool
3672
    {
3673
        return \function_exists('json_decode');
3674
    }
3675
3676
    /**
3677
     * Makes string's first char lowercase.
3678
     *
3679
     * @param string      $str                   <p>The input string</p>
3680
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3681
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3682
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3683
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3684
     *
3685
     * @return string the resulting string
3686
     */
3687 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3688
    {
3689 46
        $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3690 46
        if ($strPartTwo === false) {
3691
            $strPartTwo = '';
3692
        }
3693
3694 46
        $strPartOne = self::strtolower(
3695 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3696 46
            $encoding,
3697 46
            $cleanUtf8,
3698 46
            $lang,
3699 46
            $tryToKeepStringLength
3700
        );
3701
3702 46
        return $strPartOne . $strPartTwo;
3703
    }
3704
3705
    /**
3706
     * alias for "UTF8::lcfirst()"
3707
     *
3708
     * @see UTF8::lcfirst()
3709
     *
3710
     * @param string      $str
3711
     * @param string      $encoding
3712
     * @param bool        $cleanUtf8
3713
     * @param string|null $lang
3714
     * @param bool        $tryToKeepStringLength
3715
     *
3716
     * @return string
3717
     */
3718 2
    public static function lcword(
3719
        string $str,
3720
        string $encoding = 'UTF-8',
3721
        bool $cleanUtf8 = false,
3722
        string $lang = null,
3723
        bool $tryToKeepStringLength = false
3724
    ): string {
3725 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3726
    }
3727
3728
    /**
3729
     * Lowercase for all words in the string.
3730
     *
3731
     * @param string      $str                   <p>The input string.</p>
3732
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3733
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3734
     *                                           a new word.</p>
3735
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3736
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3737
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3738
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3739
     *
3740
     * @return string
3741
     */
3742 2
    public static function lcwords(
3743
        string $str,
3744
        array $exceptions = [],
3745
        string $charlist = '',
3746
        string $encoding = 'UTF-8',
3747
        bool $cleanUtf8 = false,
3748
        string $lang = null,
3749
        bool $tryToKeepStringLength = false
3750
    ): string {
3751 2
        if (!$str) {
3752 2
            return '';
3753
        }
3754
3755 2
        $words = self::str_to_words($str, $charlist);
3756 2
        $newWords = [];
3757
3758 2
        if (\count($exceptions) > 0) {
3759 2
            $useExceptions = true;
3760
        } else {
3761 2
            $useExceptions = false;
3762
        }
3763
3764 2
        foreach ($words as $word) {
3765 2
            if (!$word) {
3766 2
                continue;
3767
            }
3768
3769
            if (
3770 2
                $useExceptions === false
3771
                ||
3772
                (
3773 2
                    $useExceptions === true
3774
                    &&
3775 2
                    !\in_array($word, $exceptions, true)
3776
                )
3777
            ) {
3778 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3779
            }
3780
3781 2
            $newWords[] = $word;
3782
        }
3783
3784 2
        return \implode('', $newWords);
3785
    }
3786
3787
    /**
3788
     * alias for "UTF8::lcfirst()"
3789
     *
3790
     * @see UTF8::lcfirst()
3791
     *
3792
     * @param string      $str
3793
     * @param string      $encoding
3794
     * @param bool        $cleanUtf8
3795
     * @param string|null $lang
3796
     * @param bool        $tryToKeepStringLength
3797
     *
3798
     * @return string
3799
     */
3800 5
    public static function lowerCaseFirst(
3801
        string $str,
3802
        string $encoding = 'UTF-8',
3803
        bool $cleanUtf8 = false,
3804
        string $lang = null,
3805
        bool $tryToKeepStringLength = false
3806
    ): string {
3807 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3808
    }
3809
3810
    /**
3811
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3812
     *
3813
     * @param string $str   <p>The string to be trimmed</p>
3814
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3815
     *
3816
     * @return string the string with unwanted characters stripped from the left
3817
     */
3818 22
    public static function ltrim(string $str = '', $chars = \INF): string
3819
    {
3820 22
        if ($str === '') {
3821 3
            return '';
3822
        }
3823
3824
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3825 21
        if ($chars === \INF || !$chars) {
3826 14
            $pattern = "^[\pZ\pC]+";
3827
        } else {
3828 10
            $chars = \preg_quote($chars, '/');
3829 10
            $pattern = "^[${chars}]+";
3830
        }
3831
3832
        return self::regex_replace($str, $pattern, '', '', '/');
3833
    }
3834
3835
    /**
3836
     * Returns the UTF-8 character with the maximum code point in the given data.
3837
     *
3838
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3839
     *
3840
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3841
     */
3842
    public static function max($arg)
3843
    {
3844 2
        if (\is_array($arg) === true) {
3845 2
            $arg = \implode('', $arg);
3846
        }
3847
3848 2
        $codepoints = self::codepoints($arg, false);
3849 2
        if (\count($codepoints) === 0) {
3850 2
            return null;
3851
        }
3852
3853 2
        $codepoint_max = \max($codepoints);
3854
3855 2
        return self::chr($codepoint_max);
3856
    }
3857
3858
    /**
3859
     * Calculates and returns the maximum number of bytes taken by any
3860
     * UTF-8 encoded character in the given string.
3861
     *
3862
     * @param string $str <p>The original Unicode string.</p>
3863
     *
3864
     * @return int max byte lengths of the given chars
3865
     */
3866
    public static function max_chr_width(string $str): int
3867
    {
3868 2
        $bytes = self::chr_size_list($str);
3869 2
        if (\count($bytes) > 0) {
3870 2
            return (int) \max($bytes);
3871
        }
3872
3873 2
        return 0;
3874
    }
3875
3876
    /**
3877
     * Checks whether mbstring is available on the server.
3878
     *
3879
     * @return bool
3880
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3881
     */
3882
    public static function mbstring_loaded(): bool
3883
    {
3884 27
        $return = \extension_loaded('mbstring') ? true : false;
3885
3886 27
        if ($return === true) {
3887 27
            \mb_internal_encoding('UTF-8');
3888
        }
3889
3890 27
        return $return;
3891
    }
3892
3893
    /**
3894
     * Checks whether mbstring "overloaded" is active on the server.
3895
     *
3896
     * @return bool
3897
     */
3898
    private static function mbstring_overloaded(): bool
3899
    {
3900
        /**
3901
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3902
         */
3903
3904
        /** @noinspection PhpComposerExtensionStubsInspection */
3905
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3906
        return \defined('MB_OVERLOAD_STRING')
3907
               &&
3908
               (@\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
3909
    }
3910
3911
    /**
3912
     * Returns the UTF-8 character with the minimum code point in the given data.
3913
     *
3914
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3915
     *
3916
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3917
     */
3918
    public static function min($arg)
3919
    {
3920 2
        if (\is_array($arg) === true) {
3921 2
            $arg = \implode('', $arg);
3922
        }
3923
3924 2
        $codepoints = self::codepoints($arg, false);
3925 2
        if (\count($codepoints) === 0) {
3926 2
            return null;
3927
        }
3928
3929 2
        $codepoint_min = \min($codepoints);
3930
3931 2
        return self::chr($codepoint_min);
3932
    }
3933
3934
    /**
3935
     * alias for "UTF8::normalize_encoding()"
3936
     *
3937
     * @see        UTF8::normalize_encoding()
3938
     *
3939
     * @param mixed $encoding
3940
     * @param mixed $fallback
3941
     *
3942
     * @return mixed
3943
     *
3944
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3945
     */
3946
    public static function normalizeEncoding($encoding, $fallback = '')
3947
    {
3948 2
        return self::normalize_encoding($encoding, $fallback);
3949
    }
3950
3951
    /**
3952
     * Normalize the encoding-"name" input.
3953
     *
3954
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3955
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3956
     *
3957
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3958
     */
3959
    public static function normalize_encoding($encoding, $fallback = '')
3960
    {
3961 341
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3962
3963
        // init
3964 341
        $encoding = (string) $encoding;
3965
3966
        if (
3967 341
            !$encoding
3968
            ||
3969 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3970
            ||
3971 341
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3972
        ) {
3973 296
            return $fallback;
3974
        }
3975
3976
        if (
3977 49
            $encoding === 'UTF-8'
3978
            ||
3979 49
            $encoding === 'UTF8'
3980
        ) {
3981 22
            return 'UTF-8';
3982
        }
3983
3984
        if (
3985 42
            $encoding === '8BIT'
3986
            ||
3987 42
            $encoding === 'BINARY'
3988
        ) {
3989
            return 'CP850';
3990
        }
3991
3992
        if (
3993 42
            $encoding === 'HTML'
3994
            ||
3995 42
            $encoding === 'HTML-ENTITIES'
3996
        ) {
3997 2
            return 'HTML-ENTITIES';
3998
        }
3999
4000 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4001 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4002
        }
4003
4004 6
        if (self::$ENCODINGS === null) {
4005 1
            self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4006
        }
4007
4008 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4008
        if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
4009 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4010
4011 4
            return $encoding;
4012
        }
4013
4014 5
        $encodingOrig = $encoding;
4015 5
        $encoding = \strtoupper($encoding);
4016 5
        $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4017
4018
        $equivalences = [
4019 5
            'ISO8859'     => 'ISO-8859-1',
4020
            'ISO88591'    => 'ISO-8859-1',
4021
            'ISO'         => 'ISO-8859-1',
4022
            'LATIN'       => 'ISO-8859-1',
4023
            'LATIN1'      => 'ISO-8859-1', // Western European
4024
            'ISO88592'    => 'ISO-8859-2',
4025
            'LATIN2'      => 'ISO-8859-2', // Central European
4026
            'ISO88593'    => 'ISO-8859-3',
4027
            'LATIN3'      => 'ISO-8859-3', // Southern European
4028
            'ISO88594'    => 'ISO-8859-4',
4029
            'LATIN4'      => 'ISO-8859-4', // Northern European
4030
            'ISO88595'    => 'ISO-8859-5',
4031
            'ISO88596'    => 'ISO-8859-6', // Greek
4032
            'ISO88597'    => 'ISO-8859-7',
4033
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4034
            'ISO88599'    => 'ISO-8859-9',
4035
            'LATIN5'      => 'ISO-8859-9', // Turkish
4036
            'ISO885911'   => 'ISO-8859-11',
4037
            'TIS620'      => 'ISO-8859-11', // Thai
4038
            'ISO885910'   => 'ISO-8859-10',
4039
            'LATIN6'      => 'ISO-8859-10', // Nordic
4040
            'ISO885913'   => 'ISO-8859-13',
4041
            'LATIN7'      => 'ISO-8859-13', // Baltic
4042
            'ISO885914'   => 'ISO-8859-14',
4043
            'LATIN8'      => 'ISO-8859-14', // Celtic
4044
            'ISO885915'   => 'ISO-8859-15',
4045
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4046
            'ISO885916'   => 'ISO-8859-16',
4047
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4048
            'CP1250'      => 'WINDOWS-1250',
4049
            'WIN1250'     => 'WINDOWS-1250',
4050
            'WINDOWS1250' => 'WINDOWS-1250',
4051
            'CP1251'      => 'WINDOWS-1251',
4052
            'WIN1251'     => 'WINDOWS-1251',
4053
            'WINDOWS1251' => 'WINDOWS-1251',
4054
            'CP1252'      => 'WINDOWS-1252',
4055
            'WIN1252'     => 'WINDOWS-1252',
4056
            'WINDOWS1252' => 'WINDOWS-1252',
4057
            'CP1253'      => 'WINDOWS-1253',
4058
            'WIN1253'     => 'WINDOWS-1253',
4059
            'WINDOWS1253' => 'WINDOWS-1253',
4060
            'CP1254'      => 'WINDOWS-1254',
4061
            'WIN1254'     => 'WINDOWS-1254',
4062
            'WINDOWS1254' => 'WINDOWS-1254',
4063
            'CP1255'      => 'WINDOWS-1255',
4064
            'WIN1255'     => 'WINDOWS-1255',
4065
            'WINDOWS1255' => 'WINDOWS-1255',
4066
            'CP1256'      => 'WINDOWS-1256',
4067
            'WIN1256'     => 'WINDOWS-1256',
4068
            'WINDOWS1256' => 'WINDOWS-1256',
4069
            'CP1257'      => 'WINDOWS-1257',
4070
            'WIN1257'     => 'WINDOWS-1257',
4071
            'WINDOWS1257' => 'WINDOWS-1257',
4072
            'CP1258'      => 'WINDOWS-1258',
4073
            'WIN1258'     => 'WINDOWS-1258',
4074
            'WINDOWS1258' => 'WINDOWS-1258',
4075
            'UTF16'       => 'UTF-16',
4076
            'UTF32'       => 'UTF-32',
4077
            'UTF8'        => 'UTF-8',
4078
            'UTF'         => 'UTF-8',
4079
            'UTF7'        => 'UTF-7',
4080
            '8BIT'        => 'CP850',
4081
            'BINARY'      => 'CP850',
4082
        ];
4083
4084 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4085 4
            $encoding = $equivalences[$encodingUpperHelper];
4086
        }
4087
4088 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4089
4090 5
        return $encoding;
4091
    }
4092
4093
    /**
4094
     * Standardize line ending to unix-like.
4095
     *
4096
     * @param string $str
4097
     *
4098
     * @return string
4099
     */
4100
    public static function normalize_line_ending(string $str): string
4101
    {
4102 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
4103
    }
4104
4105
    /**
4106
     * Normalize some MS Word special characters.
4107
     *
4108
     * @param string $str <p>The string to be normalized.</p>
4109
     *
4110
     * @return string
4111
     */
4112
    public static function normalize_msword(string $str): string
4113
    {
4114 38
        if ($str === '') {
4115 2
            return '';
4116
        }
4117
4118 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
4119 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4120
4121 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4122 1
            if (self::$UTF8_MSWORD === null) {
4123 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4124
            }
4125
4126 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4126
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4127 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4127
            $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4128
        }
4129
4130 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4131
    }
4132
4133
    /**
4134
     * Normalize the whitespace.
4135
     *
4136
     * @param string $str                     <p>The string to be normalized.</p>
4137
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4138
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4139
     *                                        bidirectional text chars.</p>
4140
     *
4141
     * @return string
4142
     */
4143
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4144
    {
4145 86
        if ($str === '') {
4146 9
            return '';
4147
        }
4148
4149 86
        static $WHITESPACE_CACHE = [];
4150 86
        $cacheKey = (int) $keepNonBreakingSpace;
4151
4152 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4153 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4154
4155 2
            if ($keepNonBreakingSpace === true) {
4156 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4157
            }
4158
4159 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4160
        }
4161
4162 86
        if ($keepBidiUnicodeControls === false) {
4163 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4164
4165 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4166 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4167
            }
4168
4169 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4170
        }
4171
4172 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4173
    }
4174
4175
    /**
4176
     * Calculates Unicode code point of the given UTF-8 encoded character.
4177
     *
4178
     * INFO: opposite to UTF8::chr()
4179
     *
4180
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4181
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4182
     *
4183
     * @return int
4184
     *             Unicode code point of the given character,<br>
4185
     *             0 on invalid UTF-8 byte sequence
4186
     */
4187
    public static function ord($chr, string $encoding = 'UTF-8'): int
4188
    {
4189
        // init
4190 30
        $chr = (string) $chr;
4191
4192 30
        static $CHAR_CACHE = [];
4193
4194
        // save the original string
4195 30
        $chr_orig = $chr;
4196
4197 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4198 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4199
        }
4200
4201 30
        $cacheKey = $chr_orig . $encoding;
4202 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4203 23
            return $CHAR_CACHE[$cacheKey];
4204
        }
4205
4206 25
        if (self::$ORD === null) {
4207
            self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4208
        }
4209
4210 25
        if (isset(self::$ORD[$chr])) {
4211 25
            return self::$ORD[$chr];
4212
        }
4213
4214
        // check again, if it's still not UTF-8
4215 7
        if ($encoding !== 'UTF-8') {
4216 1
            $chr = self::encode($encoding, $chr);
4217
        }
4218
4219 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4220
            self::checkForSupport();
4221
        }
4222
4223 7
        if (self::$SUPPORT['intlChar'] === true) {
4224
            /** @noinspection PhpComposerExtensionStubsInspection */
4225 6
            $code = \IntlChar::ord($chr);
4226 6
            if ($code) {
4227 5
                return $CHAR_CACHE[$cacheKey] = $code;
4228
            }
4229
        }
4230
4231
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4232 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4233 2
        $code = $chr ? $chr[1] : 0;
4234
4235 2
        if ($code >= 0xF0 && isset($chr[4])) {
4236
            /** @noinspection UnnecessaryCastingInspection */
4237
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4238
        }
4239
4240 2
        if ($code >= 0xE0 && isset($chr[3])) {
4241
            /** @noinspection UnnecessaryCastingInspection */
4242 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4243
        }
4244
4245 2
        if ($code >= 0xC0 && isset($chr[2])) {
4246
            /** @noinspection UnnecessaryCastingInspection */
4247 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4248
        }
4249
4250 1
        return $CHAR_CACHE[$cacheKey] = $code;
4251
    }
4252
4253
    /**
4254
     * Parses the string into an array (into the the second parameter).
4255
     *
4256
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4257
     *          if the second parameter is not set!
4258
     *
4259
     * @see http://php.net/manual/en/function.parse-str.php
4260
     *
4261
     * @param string $str       <p>The input string.</p>
4262
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4263
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4264
     *
4265
     * @return bool
4266
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4267
     */
4268
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4269
    {
4270 2
        if ($cleanUtf8 === true) {
4271 2
            $str = self::clean($str);
4272
        }
4273
4274 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4275
            self::checkForSupport();
4276
        }
4277
4278 2
        if (self::$SUPPORT['mbstring'] === true) {
4279 2
            $return = \mb_parse_str($str, $result);
4280
4281 2
            return !($return === false || empty($result));
4282
        }
4283
4284
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4285
        \parse_str($str, $result);
4286
4287
        return !empty($result);
4288
    }
4289
4290
    /**
4291
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4292
     *
4293
     * @return bool
4294
     *              <strong>true</strong> if support is available,<br>
4295
     *              <strong>false</strong> otherwise
4296
     */
4297
    public static function pcre_utf8_support(): bool
4298
    {
4299
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4300 102
        return (bool) @\preg_match('//u', '');
4301
    }
4302
4303
    /**
4304
     * Create an array containing a range of UTF-8 characters.
4305
     *
4306
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4307
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4308
     *
4309
     * @return string[]
4310
     */
4311
    public static function range($var1, $var2): array
4312
    {
4313 2
        if (!$var1 || !$var2) {
4314 2
            return [];
4315
        }
4316
4317 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4318
            self::checkForSupport();
4319
        }
4320
4321 2
        if (self::$SUPPORT['ctype'] === false) {
4322
            throw new \RuntimeException('ext-ctype: is not installed');
4323
        }
4324
4325
        /** @noinspection PhpComposerExtensionStubsInspection */
4326 2
        if (\ctype_digit((string) $var1)) {
4327 2
            $start = (int) $var1;
4328 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4329
            $start = (int) self::hex_to_int($var1);
4330
        } else {
4331 2
            $start = self::ord($var1);
4332
        }
4333
4334 2
        if (!$start) {
4335
            return [];
4336
        }
4337
4338
        /** @noinspection PhpComposerExtensionStubsInspection */
4339 2
        if (\ctype_digit((string) $var2)) {
4340 2
            $end = (int) $var2;
4341 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4342
            $end = (int) self::hex_to_int($var2);
4343
        } else {
4344 2
            $end = self::ord($var2);
4345
        }
4346
4347 2
        if (!$end) {
4348
            return [];
4349
        }
4350
4351 2
        return \array_map(
4352
            [
4353 2
                self::class,
4354
                'chr',
4355
            ],
4356 2
            \range($start, $end)
4357
        );
4358
    }
4359
4360
    /**
4361
     * Multi decode html entity & fix urlencoded-win1252-chars.
4362
     *
4363
     * e.g:
4364
     * 'test+test'                     => 'test+test'
4365
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4366
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4367
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4368
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4369
     * 'Düsseldorf'                   => 'Düsseldorf'
4370
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4371
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4372
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4373
     *
4374
     * @param string $str          <p>The input string.</p>
4375
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4376
     *
4377
     * @return string
4378
     */
4379
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4380
    {
4381 3
        if ($str === '') {
4382 2
            return '';
4383
        }
4384
4385 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4386 3
        if (\preg_match($pattern, $str)) {
4387 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4388
        }
4389
4390 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4391
4392
        do {
4393 3
            $str_compare = $str;
4394
4395 3
            $str = self::fix_simple_utf8(
4396 3
                \rawurldecode(
4397 3
                    self::html_entity_decode(
4398 3
                        self::to_utf8($str),
4399 3
                        $flags
4400
                    )
4401
                )
4402
            );
4403 3
        } while ($multi_decode === true && $str_compare !== $str);
4404
4405 3
        return $str;
4406
    }
4407
4408
    /**
4409
     * @param array $strings
4410
     * @param bool  $removeEmptyValues
4411
     * @param int   $removeShortValues
4412
     *
4413
     * @return array
4414
     */
4415
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4416
    {
4417
        // init
4418 2
        $return = [];
4419
4420 2
        foreach ($strings as $str) {
4421
            if (
4422 2
                $removeShortValues !== null
4423
                &&
4424 2
                self::strlen($str) <= $removeShortValues
4425
            ) {
4426 2
                continue;
4427
            }
4428
4429
            if (
4430 2
                $removeEmptyValues === true
4431
                &&
4432 2
                \trim($str) === ''
4433
            ) {
4434 2
                continue;
4435
            }
4436
4437 2
            $return[] = $str;
4438
        }
4439
4440 2
        return $return;
4441
    }
4442
4443
    /**
4444
     * Replaces all occurrences of $pattern in $str by $replacement.
4445
     *
4446
     * @param string $str         <p>The input string.</p>
4447
     * @param string $pattern     <p>The regular expression pattern.</p>
4448
     * @param string $replacement <p>The string to replace with.</p>
4449
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4450
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4451
     *
4452
     * @return string
4453
     */
4454
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4455
    {
4456 259
        if ($options === 'msr') {
4457 9
            $options = 'ms';
4458
        }
4459
4460
        // fallback
4461 259
        if (!$delimiter) {
4462
            $delimiter = '/';
4463
        }
4464
4465 259
        return (string) \preg_replace(
4466 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4467 259
            $replacement,
4468 259
            $str
4469
        );
4470
    }
4471
4472
    /**
4473
     * alias for "UTF8::remove_bom()"
4474
     *
4475
     * @see        UTF8::remove_bom()
4476
     *
4477
     * @param string $str
4478
     *
4479
     * @return string
4480
     *
4481
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4482
     */
4483
    public static function removeBOM(string $str): string
4484
    {
4485
        return self::remove_bom($str);
4486
    }
4487
4488
    /**
4489
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4490
     *
4491
     * @param string $str <p>The input string.</p>
4492
     *
4493
     * @return string string without UTF-BOM
4494
     */
4495
    public static function remove_bom(string $str): string
4496
    {
4497 79
        if ($str === '') {
4498 7
            return '';
4499
        }
4500
4501 79
        $strLength = self::strlen_in_byte($str);
4502 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4503 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4504 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4505 10
                if ($strTmp === false) {
4506
                    return '';
4507
                }
4508
4509 10
                $strLength -= $bomByteLength;
4510
4511 79
                $str = (string) $strTmp;
4512
            }
4513
        }
4514
4515 79
        return $str;
4516
    }
4517
4518
    /**
4519
     * Removes duplicate occurrences of a string in another string.
4520
     *
4521
     * @param string          $str  <p>The base string.</p>
4522
     * @param string|string[] $what <p>String to search for in the base string.</p>
4523
     *
4524
     * @return string the result string with removed duplicates
4525
     */
4526
    public static function remove_duplicates(string $str, $what = ' '): string
4527
    {
4528 2
        if (\is_string($what) === true) {
4529 2
            $what = [$what];
4530
        }
4531
4532 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4533
            /** @noinspection ForeachSourceInspection */
4534 2
            foreach ($what as $item) {
4535 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4536
            }
4537
        }
4538
4539 2
        return $str;
4540
    }
4541
4542
    /**
4543
     * Remove html via "strip_tags()" from the string.
4544
     *
4545
     * @param string $str
4546
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4547
     *                              not be stripped. Default: null
4548
     *                              </p>
4549
     *
4550
     * @return string
4551
     */
4552
    public static function remove_html(string $str, string $allowableTags = ''): string
4553
    {
4554 6
        return \strip_tags($str, $allowableTags);
4555
    }
4556
4557
    /**
4558
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4559
     *
4560
     * @param string $str
4561
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4562
     *
4563
     * @return string
4564
     */
4565
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4566
    {
4567 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4568
    }
4569
4570
    /**
4571
     * Remove invisible characters from a string.
4572
     *
4573
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4574
     *
4575
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4576
     *
4577
     * @param string $str
4578
     * @param bool   $url_encoded
4579
     * @param string $replacement
4580
     *
4581
     * @return string
4582
     */
4583
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4584
    {
4585
        // init
4586 113
        $non_displayables = [];
4587
4588
        // every control character except newline (dec 10),
4589
        // carriage return (dec 13) and horizontal tab (dec 09)
4590 113
        if ($url_encoded) {
4591 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4592 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4593
        }
4594
4595 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4596
4597
        do {
4598 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4599 113
        } while ($count !== 0);
4600
4601 113
        return $str;
4602
    }
4603
4604
    /**
4605
     * Returns a new string with the prefix $substring removed, if present.
4606
     *
4607
     * @param string $str
4608
     * @param string $substring <p>The prefix to remove.</p>
4609
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4610
     *
4611
     * @return string string without the prefix $substring
4612
     */
4613
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4614
    {
4615 12
        if (self::str_starts_with($str, $substring)) {
4616 6
            return (string) self::substr(
4617 6
                $str,
4618 6
                self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4618
                /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4619 6
                null,
4620 6
                $encoding
4621
            );
4622
        }
4623
4624 6
        return $str;
4625
    }
4626
4627
    /**
4628
     * Returns a new string with the suffix $substring removed, if present.
4629
     *
4630
     * @param string $str
4631
     * @param string $substring <p>The suffix to remove.</p>
4632
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4633
     *
4634
     * @return string string having a $str without the suffix $substring
4635
     */
4636
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4637
    {
4638 12
        if (self::str_ends_with($str, $substring)) {
4639 6
            return (string) self::substr(
4640 6
                $str,
4641 6
                0,
4642 6
                self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4643
            );
4644
        }
4645
4646 6
        return $str;
4647
    }
4648
4649
    /**
4650
     * Replaces all occurrences of $search in $str by $replacement.
4651
     *
4652
     * @param string $str           <p>The input string.</p>
4653
     * @param string $search        <p>The needle to search for.</p>
4654
     * @param string $replacement   <p>The string to replace with.</p>
4655
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4656
     *
4657
     * @return string string after the replacements
4658
     */
4659
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4660
    {
4661 29
        if ($caseSensitive) {
4662 22
            return self::str_replace($search, $replacement, $str);
4663
        }
4664
4665 7
        return self::str_ireplace($search, $replacement, $str);
4666
    }
4667
4668
    /**
4669
     * Replaces all occurrences of $search in $str by $replacement.
4670
     *
4671
     * @param string       $str           <p>The input string.</p>
4672
     * @param array        $search        <p>The elements to search for.</p>
4673
     * @param array|string $replacement   <p>The string to replace with.</p>
4674
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4675
     *
4676
     * @return string string after the replacements
4677
     */
4678
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4679
    {
4680 30
        if ($caseSensitive) {
4681 23
            return self::str_replace($search, $replacement, $str);
4682
        }
4683
4684 7
        return self::str_ireplace($search, $replacement, $str);
4685
    }
4686
4687
    /**
4688
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4689
     *
4690
     * @param string $str                <p>The input string</p>
4691
     * @param string $replacementChar    <p>The replacement character.</p>
4692
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4693
     *
4694
     * @return string
4695
     */
4696
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4697
    {
4698 62
        if ($str === '') {
4699 9
            return '';
4700
        }
4701
4702 62
        if ($processInvalidUtf8 === true) {
4703 62
            $replacementCharHelper = $replacementChar;
4704 62
            if ($replacementChar === '') {
4705 62
                $replacementCharHelper = 'none';
4706
            }
4707
4708 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4709
                self::checkForSupport();
4710
            }
4711
4712 62
            if (self::$SUPPORT['mbstring'] === false) {
4713
                // if there is no native support for "mbstring",
4714
                // then we need to clean the string before ...
4715
                $str = self::clean($str);
4716
            }
4717
4718
            // always fallback via symfony polyfill
4719 62
            $save = \mb_substitute_character();
4720 62
            \mb_substitute_character($replacementCharHelper);
4721 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4722 62
            \mb_substitute_character($save);
4723
4724 62
            if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4725 62
                $str = $strTmp;
4726
            } else {
4727
                $str = '';
4728
            }
4729
        }
4730
4731 62
        return \str_replace(
4732
            [
4733 62
                "\xEF\xBF\xBD",
4734
                '�',
4735
            ],
4736
            [
4737 62
                $replacementChar,
4738 62
                $replacementChar,
4739
            ],
4740 62
            $str
4741
        );
4742
    }
4743
4744
    /**
4745
     * Strip whitespace or other characters from end of a UTF-8 string.
4746
     *
4747
     * @param string $str   <p>The string to be trimmed.</p>
4748
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4749
     *
4750
     * @return string the string with unwanted characters stripped from the right
4751
     */
4752
    public static function rtrim(string $str = '', $chars = \INF): string
4753
    {
4754 22
        if ($str === '') {
4755 3
            return '';
4756
        }
4757
4758
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4759 21
        if ($chars === \INF || !$chars) {
4760 16
            $pattern = "[\pZ\pC]+\$";
4761
        } else {
4762 8
            $chars = \preg_quote($chars, '/');
4763 8
            $pattern = "[${chars}]+\$";
4764
        }
4765
4766 21
        return self::regex_replace($str, $pattern, '', '', '/');
4767
    }
4768
4769
    /**
4770
     * rxClass
4771
     *
4772
     * @param string $s
4773
     * @param string $class
4774
     *
4775
     * @return string
4776
     */
4777
    private static function rxClass(string $s, string $class = ''): string
4778
    {
4779 42
        static $RX_CLASSS_CACHE = [];
4780
4781 42
        $cacheKey = $s . $class;
4782
4783 42
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4784 30
            return $RX_CLASSS_CACHE[$cacheKey];
4785
        }
4786
4787
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4788 16
        $class = [$class];
4789
4790
        /** @noinspection SuspiciousLoopInspection */
4791 16
        foreach (self::str_split($s) as $s) {
4792 15
            if ($s === '-') {
4793
                $class[0] = '-' . $class[0];
4794 15
            } elseif (!isset($s[2])) {
4795 15
                $class[0] .= \preg_quote($s, '/');
4796 1
            } elseif (self::strlen($s) === 1) {
4797 1
                $class[0] .= $s;
4798
            } else {
4799 15
                $class[] = $s;
4800
            }
4801
        }
4802
4803 16
        if ($class[0]) {
4804 16
            $class[0] = '[' . $class[0] . ']';
4805
        }
4806
4807 16
        if (\count($class) === 1) {
4808 16
            $return = $class[0];
4809
        } else {
4810
            $return = '(?:' . \implode('|', $class) . ')';
4811
        }
4812
4813 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
4814
4815 16
        return $return;
4816
    }
4817
4818
    /**
4819
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4820
     */
4821
    public static function showSupport()
4822
    {
4823 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4824
            self::checkForSupport();
4825
        }
4826
4827 2
        echo '<pre>';
4828 2
        foreach (self::$SUPPORT as $key => $value) {
4829 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4830
        }
4831 2
        echo '</pre>';
4832 2
    }
4833
4834
    /**
4835
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4836
     *
4837
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4838
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4839
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4840
     *
4841
     * @return string the HTML numbered entity
4842
     */
4843
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4844
    {
4845 2
        if ($char === '') {
4846 2
            return '';
4847
        }
4848
4849
        if (
4850 2
            $keepAsciiChars === true
4851
            &&
4852 2
            self::is_ascii($char) === true
4853
        ) {
4854 2
            return $char;
4855
        }
4856
4857 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4858 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4859
        }
4860
4861 2
        return '&#' . self::ord($char, $encoding) . ';';
4862
    }
4863
4864
    /**
4865
     * @param string $str
4866
     * @param int    $tabLength
4867
     *
4868
     * @return string
4869
     */
4870
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4871
    {
4872 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4873
    }
4874
4875
    /**
4876
     * Convert a string to an array of Unicode characters.
4877
     *
4878
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4879
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4880
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4881
     *
4882
     * @return string[] an array containing chunks of the string
4883
     */
4884
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4885
    {
4886 87
        if ($length <= 0) {
4887 3
            return [];
4888
        }
4889
4890 86
        if (\is_array($str) === true) {
4891 2
            foreach ($str as $k => $v) {
4892 2
                $str[$k] = self::split($v, $length);
4893
            }
4894
4895 2
            return $str;
4896
        }
4897
4898
        // init
4899 86
        $str = (string) $str;
4900
4901 86
        if ($str === '') {
4902 13
            return [];
4903
        }
4904
4905
        // init
4906 83
        $ret = [];
4907
4908 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4909
            self::checkForSupport();
4910
        }
4911
4912 83
        if ($cleanUtf8 === true) {
4913 19
            $str = self::clean($str);
4914
        }
4915
4916 83
        if (self::$SUPPORT['pcre_utf8'] === true) {
4917 79
            \preg_match_all('/./us', $str, $retArray);
4918 79
            if (isset($retArray[0])) {
4919 79
                $ret = $retArray[0];
4920
            }
4921 79
            unset($retArray);
4922
        } else {
4923
4924
            // fallback
4925
4926 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4927
                self::checkForSupport();
4928
            }
4929
4930 8
            $len = self::strlen_in_byte($str);
4931
4932
            /** @noinspection ForeachInvariantsInspection */
4933 8
            for ($i = 0; $i < $len; $i++) {
4934 8
                if (($str[$i] & "\x80") === "\x00") {
4935 8
                    $ret[] = $str[$i];
4936
                } elseif (
4937 8
                    isset($str[$i + 1])
4938
                    &&
4939 8
                    ($str[$i] & "\xE0") === "\xC0"
4940
                ) {
4941 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4942 4
                        $ret[] = $str[$i] . $str[$i + 1];
4943
4944 4
                        $i++;
4945
                    }
4946
                } elseif (
4947 6
                    isset($str[$i + 2])
4948
                    &&
4949 6
                    ($str[$i] & "\xF0") === "\xE0"
4950
                ) {
4951
                    if (
4952 6
                        ($str[$i + 1] & "\xC0") === "\x80"
4953
                        &&
4954 6
                        ($str[$i + 2] & "\xC0") === "\x80"
4955
                    ) {
4956 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4957
4958 6
                        $i += 2;
4959
                    }
4960
                } elseif (
4961
                    isset($str[$i + 3])
4962
                    &&
4963
                    ($str[$i] & "\xF8") === "\xF0"
4964
                ) {
4965
                    if (
4966
                        ($str[$i + 1] & "\xC0") === "\x80"
4967
                        &&
4968
                        ($str[$i + 2] & "\xC0") === "\x80"
4969
                        &&
4970
                        ($str[$i + 3] & "\xC0") === "\x80"
4971
                    ) {
4972
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4973
4974
                        $i += 3;
4975
                    }
4976
                }
4977
            }
4978
        }
4979
4980 83
        if ($length > 1) {
4981 11
            $ret = \array_chunk($ret, $length);
4982
4983 11
            return \array_map(
4984
                function ($item) {
4985 11
                    return \implode('', $item);
4986 11
                },
4987 11
                $ret
4988
            );
4989
        }
4990
4991 76
        if (isset($ret[0]) && $ret[0] === '') {
4992
            return [];
4993
        }
4994
4995 76
        return $ret;
4996
    }
4997
4998
    /**
4999
     * Returns a camelCase version of the string. Trims surrounding spaces,
5000
     * capitalizes letters following digits, spaces, dashes and underscores,
5001
     * and removes spaces, dashes, as well as underscores.
5002
     *
5003
     * @param string      $str                   <p>The input string.</p>
5004
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
5005
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5006
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5007
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5008
     *
5009
     * @return string
5010
     */
5011
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
5012
    {
5013 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5014 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5015
5016 32
        $str = (string) \preg_replace_callback(
5017 32
            '/[-_\s]+(.)?/u',
5018
            function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5019 27
                if (isset($match[1])) {
5020 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5021
                }
5022
5023 1
                return '';
5024 32
            },
5025 32
            $str
5026
        );
5027
5028 32
        $str = (string) \preg_replace_callback(
5029 32
            '/[\d]+(.)?/u',
5030
            function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5031 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5032 32
            },
5033 32
            $str
5034
        );
5035
5036 32
        return $str;
5037
    }
5038
5039
    /**
5040
     * Returns the string with the first letter of each word capitalized,
5041
     * except for when the word is a name which shouldn't be capitalized.
5042
     *
5043
     * @param string $str
5044
     *
5045
     * @return string string with $str capitalized
5046
     */
5047
    public static function str_capitalize_name(string $str): string
5048
    {
5049 1
        $str = self::collapse_whitespace($str);
5050
5051 1
        $str = self::str_capitalize_name_helper($str, ' ');
5052
5053 1
        return self::str_capitalize_name_helper($str, '-');
5054
    }
5055
5056
    /**
5057
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5058
     *
5059
     * @param string $names
5060
     * @param string $delimiter
5061
     * @param string $encoding
5062
     *
5063
     * @return string
5064
     */
5065
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5066
    {
5067
        // init
5068 1
        $namesArray = \explode($delimiter, $names);
5069
5070 1
        if ($namesArray === false) {
5071
            return '';
5072
        }
5073
5074
        $specialCases = [
5075 1
            'names' => [
5076
                'ab',
5077
                'af',
5078
                'al',
5079
                'and',
5080
                'ap',
5081
                'bint',
5082
                'binte',
5083
                'da',
5084
                'de',
5085
                'del',
5086
                'den',
5087
                'der',
5088
                'di',
5089
                'dit',
5090
                'ibn',
5091
                'la',
5092
                'mac',
5093
                'nic',
5094
                'of',
5095
                'ter',
5096
                'the',
5097
                'und',
5098
                'van',
5099
                'von',
5100
                'y',
5101
                'zu',
5102
            ],
5103
            'prefixes' => [
5104
                'al-',
5105
                "d'",
5106
                'ff',
5107
                "l'",
5108
                'mac',
5109
                'mc',
5110
                'nic',
5111
            ],
5112
        ];
5113
5114 1
        foreach ($namesArray as &$name) {
5115 1
            if (\in_array($name, $specialCases['names'], true)) {
5116 1
                continue;
5117
            }
5118
5119 1
            $continue = false;
5120
5121 1
            if ($delimiter === '-') {
5122 1
                foreach ($specialCases['names'] as $beginning) {
5123 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5124 1
                        $continue = true;
5125
                    }
5126
                }
5127
            }
5128
5129 1
            foreach ($specialCases['prefixes'] as $beginning) {
5130 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5131 1
                    $continue = true;
5132
                }
5133
            }
5134
5135 1
            if ($continue) {
5136 1
                continue;
5137
            }
5138
5139 1
            $name = self::str_upper_first($name);
5140
        }
5141
5142 1
        return \implode($delimiter, $namesArray);
5143
    }
5144
5145
    /**
5146
     * Returns true if the string contains $needle, false otherwise. By default
5147
     * the comparison is case-sensitive, but can be made insensitive by setting
5148
     * $caseSensitive to false.
5149
     *
5150
     * @param string $haystack      <p>The input string.</p>
5151
     * @param string $needle        <p>Substring to look for.</p>
5152
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5153
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5154
     *
5155
     * @return bool whether or not $haystack contains $needle
5156
     */
5157
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5158
    {
5159 106
        if ($haystack === '' || $needle === '') {
5160 1
            return false;
5161
        }
5162
5163
        // only a fallback to prevent BC in the api ...
5164 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5165 2
            $encoding = (string) $caseSensitive;
5166
        }
5167
5168 105
        if ($caseSensitive) {
5169 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
5170
        }
5171
5172 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
5173
    }
5174
5175
    /**
5176
     * Returns true if the string contains all $needles, false otherwise. By
5177
     * default the comparison is case-sensitive, but can be made insensitive by
5178
     * setting $caseSensitive to false.
5179
     *
5180
     * @param string $haystack      <p>The input string.</p>
5181
     * @param array  $needles       <p>SubStrings to look for.</p>
5182
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5183
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5184
     *
5185
     * @return bool whether or not $haystack contains $needle
5186
     */
5187
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5188
    {
5189 44
        if ($haystack === '') {
5190
            return false;
5191
        }
5192
5193 44
        if (empty($needles)) {
5194 1
            return false;
5195
        }
5196
5197
        // only a fallback to prevent BC in the api ...
5198 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5199 1
            $encoding = (string) $caseSensitive;
5200
        }
5201
5202 43
        foreach ($needles as $needle) {
5203 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5204 43
                return false;
5205
            }
5206
        }
5207
5208 24
        return true;
5209
    }
5210
5211
    /**
5212
     * Returns true if the string contains any $needles, false otherwise. By
5213
     * default the comparison is case-sensitive, but can be made insensitive by
5214
     * setting $caseSensitive to false.
5215
     *
5216
     * @param string $haystack      <p>The input string.</p>
5217
     * @param array  $needles       <p>SubStrings to look for.</p>
5218
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5219
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5220
     *
5221
     * @return bool
5222
     *               Whether or not $str contains $needle
5223
     */
5224
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5225
    {
5226 43
        if (empty($needles)) {
5227 1
            return false;
5228
        }
5229
5230 42
        foreach ($needles as $needle) {
5231 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5232 42
                return true;
5233
            }
5234
        }
5235
5236 18
        return false;
5237
    }
5238
5239
    /**
5240
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5241
     * inserted before uppercase characters (with the exception of the first
5242
     * character of the string), and in place of spaces as well as underscores.
5243
     *
5244
     * @param string $str      <p>The input string.</p>
5245
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5246
     *
5247
     * @return string
5248
     */
5249
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5250
    {
5251 19
        return self::str_delimit($str, '-', $encoding);
5252
    }
5253
5254
    /**
5255
     * Returns a lowercase and trimmed string separated by the given delimiter.
5256
     * Delimiters are inserted before uppercase characters (with the exception
5257
     * of the first character of the string), and in place of spaces, dashes,
5258
     * and underscores. Alpha delimiters are not converted to lowercase.
5259
     *
5260
     * @param string      $str                           <p>The input string.</p>
5261
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5262
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5263
     * @param bool        $cleanUtf8                     [optional] <p>Remove non UTF-8 chars from the string.</p>
5264
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5265
     *                                                   tr</p>
5266
     * @param bool        $tryToKeepStringLength         [optional] <p>true === try to keep the string length: e.g. ẞ ->
5267
     *                                                   ß</p>
5268
     *
5269
     * @return string
5270
     */
5271
    public static function str_delimit(
5272
        string $str,
5273
        string $delimiter,
5274
        string $encoding = 'UTF-8',
5275
        bool $cleanUtf8 = false,
5276
        string $lang = null,
5277
        bool $tryToKeepStringLength = false
5278
    ): string {
5279 49
        $str = self::trim($str);
5280
5281 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
5282
5283 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5284
5285 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5286
    }
5287
5288
    /**
5289
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5290
     *
5291
     * @param string $str <p>The input string.</p>
5292
     *
5293
     * @return false|string
5294
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5295
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5296
     */
5297
    public static function str_detect_encoding($str)
5298
    {
5299
        // init
5300 30
        $str = (string) $str;
5301
5302
        //
5303
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5304
        //
5305
5306 30
        if (self::is_binary($str, true) === true) {
5307 10
            $isUtf16 = self::is_utf16($str, false);
5308 10
            if ($isUtf16 === 1) {
5309 2
                return 'UTF-16LE';
5310
            }
5311 10
            if ($isUtf16 === 2) {
5312 2
                return 'UTF-16BE';
5313
            }
5314
5315 8
            $isUtf32 = self::is_utf32($str, false);
5316 8
            if ($isUtf32 === 1) {
5317
                return 'UTF-32LE';
5318
            }
5319 8
            if ($isUtf32 === 2) {
5320
                return 'UTF-32BE';
5321
            }
5322
5323
            // is binary but not "UTF-16" or "UTF-32"
5324 8
            return false;
5325
        }
5326
5327
        //
5328
        // 2.) simple check for ASCII chars
5329
        //
5330
5331 26
        if (self::is_ascii($str) === true) {
5332 9
            return 'ASCII';
5333
        }
5334
5335
        //
5336
        // 3.) simple check for UTF-8 chars
5337
        //
5338
5339 26
        if (self::is_utf8($str) === true) {
5340 18
            return 'UTF-8';
5341
        }
5342
5343
        //
5344
        // 4.) check via "mb_detect_encoding()"
5345
        //
5346
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5347
5348
        $detectOrder = [
5349 16
            'ISO-8859-1',
5350
            'ISO-8859-2',
5351
            'ISO-8859-3',
5352
            'ISO-8859-4',
5353
            'ISO-8859-5',
5354
            'ISO-8859-6',
5355
            'ISO-8859-7',
5356
            'ISO-8859-8',
5357
            'ISO-8859-9',
5358
            'ISO-8859-10',
5359
            'ISO-8859-13',
5360
            'ISO-8859-14',
5361
            'ISO-8859-15',
5362
            'ISO-8859-16',
5363
            'WINDOWS-1251',
5364
            'WINDOWS-1252',
5365
            'WINDOWS-1254',
5366
            'CP932',
5367
            'CP936',
5368
            'CP950',
5369
            'CP866',
5370
            'CP850',
5371
            'CP51932',
5372
            'CP50220',
5373
            'CP50221',
5374
            'CP50222',
5375
            'ISO-2022-JP',
5376
            'ISO-2022-KR',
5377
            'JIS',
5378
            'JIS-ms',
5379
            'EUC-CN',
5380
            'EUC-JP',
5381
        ];
5382
5383 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5384
            self::checkForSupport();
5385
        }
5386
5387 16
        if (self::$SUPPORT['mbstring'] === true) {
5388
            // info: do not use the symfony polyfill here
5389 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5390 16
            if ($encoding) {
5391 16
                return $encoding;
5392
            }
5393
        }
5394
5395
        //
5396
        // 5.) check via "iconv()"
5397
        //
5398
5399
        if (self::$ENCODINGS === null) {
5400
            self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5401
        }
5402
5403
        foreach (self::$ENCODINGS as $encodingTmp) {
5404
            // INFO: //IGNORE but still throw notice
5405
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5406
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5407
                return $encodingTmp;
5408
            }
5409
        }
5410
5411
        return false;
5412
    }
5413
5414
    /**
5415
     * Check if the string ends with the given substring.
5416
     *
5417
     * @param string $haystack <p>The string to search in.</p>
5418
     * @param string $needle   <p>The substring to search for.</p>
5419
     *
5420
     * @return bool
5421
     */
5422
    public static function str_ends_with(string $haystack, string $needle): bool
5423
    {
5424 40
        if ($haystack === '' || $needle === '') {
5425 4
            return false;
5426
        }
5427
5428 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5429
    }
5430
5431
    /**
5432
     * Returns true if the string ends with any of $substrings, false otherwise.
5433
     *
5434
     * - case-sensitive
5435
     *
5436
     * @param string   $str        <p>The input string.</p>
5437
     * @param string[] $substrings <p>Substrings to look for.</p>
5438
     *
5439
     * @return bool whether or not $str ends with $substring
5440
     */
5441
    public static function str_ends_with_any(string $str, array $substrings): bool
5442
    {
5443 7
        if (empty($substrings)) {
5444
            return false;
5445
        }
5446
5447 7
        foreach ($substrings as $substring) {
5448 7
            if (self::str_ends_with($str, $substring)) {
5449 7
                return true;
5450
            }
5451
        }
5452
5453 6
        return false;
5454
    }
5455
5456
    /**
5457
     * Ensures that the string begins with $substring. If it doesn't, it's
5458
     * prepended.
5459
     *
5460
     * @param string $str       <p>The input string.</p>
5461
     * @param string $substring <p>The substring to add if not present.</p>
5462
     *
5463
     * @return string
5464
     */
5465
    public static function str_ensure_left(string $str, string $substring): string
5466
    {
5467 10
        if (!self::str_starts_with($str, $substring)) {
5468 4
            $str = $substring . $str;
5469
        }
5470
5471 10
        return $str;
5472
    }
5473
5474
    /**
5475
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5476
     *
5477
     * @param string $str       <p>The input string.</p>
5478
     * @param string $substring <p>The substring to add if not present.</p>
5479
     *
5480
     * @return string
5481
     */
5482
    public static function str_ensure_right(string $str, string $substring): string
5483
    {
5484 10
        if (!self::str_ends_with($str, $substring)) {
5485 4
            $str .= $substring;
5486
        }
5487
5488 10
        return $str;
5489
    }
5490
5491
    /**
5492
     * Capitalizes the first word of the string, replaces underscores with
5493
     * spaces, and strips '_id'.
5494
     *
5495
     * @param string $str
5496
     *
5497
     * @return string
5498
     */
5499
    public static function str_humanize($str): string
5500
    {
5501 3
        $str = self::str_replace(
5502
            [
5503 3
                '_id',
5504
                '_',
5505
            ],
5506
            [
5507 3
                '',
5508
                ' ',
5509
            ],
5510 3
            $str
5511
        );
5512
5513 3
        return self::ucfirst(self::trim($str));
5514
    }
5515
5516
    /**
5517
     * Check if the string ends with the given substring, case insensitive.
5518
     *
5519
     * @param string $haystack <p>The string to search in.</p>
5520
     * @param string $needle   <p>The substring to search for.</p>
5521
     *
5522
     * @return bool
5523
     */
5524
    public static function str_iends_with(string $haystack, string $needle): bool
5525
    {
5526 12
        if ($haystack === '' || $needle === '') {
5527 2
            return false;
5528
        }
5529
5530 12
        if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5531 12
            return true;
5532
        }
5533
5534 8
        return false;
5535
    }
5536
5537
    /**
5538
     * Returns true if the string ends with any of $substrings, false otherwise.
5539
     *
5540
     * - case-insensitive
5541
     *
5542
     * @param string   $str        <p>The input string.</p>
5543
     * @param string[] $substrings <p>Substrings to look for.</p>
5544
     *
5545
     * @return bool whether or not $str ends with $substring
5546
     */
5547
    public static function str_iends_with_any(string $str, array $substrings): bool
5548
    {
5549 4
        if (empty($substrings)) {
5550
            return false;
5551
        }
5552
5553 4
        foreach ($substrings as $substring) {
5554 4
            if (self::str_iends_with($str, $substring)) {
5555 4
                return true;
5556
            }
5557
        }
5558
5559
        return false;
5560
    }
5561
5562
    /**
5563
     * Returns the index of the first occurrence of $needle in the string,
5564
     * and false if not found. Accepts an optional offset from which to begin
5565
     * the search.
5566
     *
5567
     * @param string $str      <p>The input string.</p>
5568
     * @param string $needle   <p>Substring to look for.</p>
5569
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5570
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5571
     *
5572
     * @return false|int
5573
     *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5574
     */
5575
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5576
    {
5577 2
        return self::stripos(
5578 2
            $str,
5579 2
            $needle,
5580 2
            $offset,
5581 2
            $encoding
5582
        );
5583
    }
5584
5585
    /**
5586
     * Returns the index of the last occurrence of $needle in the string,
5587
     * and false if not found. Accepts an optional offset from which to begin
5588
     * the search. Offsets may be negative to count from the last character
5589
     * in the string.
5590
     *
5591
     * @param string $str      <p>The input string.</p>
5592
     * @param string $needle   <p>Substring to look for.</p>
5593
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5594
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5595
     *
5596
     * @return false|int
5597
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5598
     */
5599
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5600
    {
5601 2
        return self::strripos(
5602 2
            $str,
5603 2
            $needle,
5604 2
            $offset,
5605 2
            $encoding
5606
        );
5607
    }
5608
5609
    /**
5610
     * Returns the index of the first occurrence of $needle in the string,
5611
     * and false if not found. Accepts an optional offset from which to begin
5612
     * the search.
5613
     *
5614
     * @param string $str      <p>The input string.</p>
5615
     * @param string $needle   <p>Substring to look for.</p>
5616
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5617
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5618
     *
5619
     * @return false|int
5620
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5621
     */
5622
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5623
    {
5624 12
        return self::strpos(
5625 12
            $str,
5626 12
            $needle,
5627 12
            $offset,
5628 12
            $encoding
5629
        );
5630
    }
5631
5632
    /**
5633
     * Returns the index of the last occurrence of $needle in the string,
5634
     * and false if not found. Accepts an optional offset from which to begin
5635
     * the search. Offsets may be negative to count from the last character
5636
     * in the string.
5637
     *
5638
     * @param string $str      <p>The input string.</p>
5639
     * @param string $needle   <p>Substring to look for.</p>
5640
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5641
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5642
     *
5643
     * @return false|int
5644
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5645
     */
5646
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5647
    {
5648 12
        return self::strrpos(
5649 12
            $str,
5650 12
            $needle,
5651 12
            $offset,
5652 12
            $encoding
5653
        );
5654
    }
5655
5656
    /**
5657
     * Inserts $substring into the string at the $index provided.
5658
     *
5659
     * @param string $str       <p>The input string.</p>
5660
     * @param string $substring <p>String to be inserted.</p>
5661
     * @param int    $index     <p>The index at which to insert the substring.</p>
5662
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5663
     *
5664
     * @return string
5665
     */
5666
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5667
    {
5668 8
        $len = self::strlen($str, $encoding);
5669
5670 8
        if ($index > $len) {
5671 1
            return $str;
5672
        }
5673
5674 7
        $start = self::substr($str, 0, $index, $encoding);
5675 7
        $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5675
        $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5676
5677 7
        return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5677
        return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5677
        return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5678
    }
5679
5680
    /**
5681
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5682
     *
5683
     * @see  http://php.net/manual/en/function.str-ireplace.php
5684
     *
5685
     * @param mixed $search  <p>
5686
     *                       Every replacement with search array is
5687
     *                       performed on the result of previous replacement.
5688
     *                       </p>
5689
     * @param mixed $replace <p>
5690
     *                       </p>
5691
     * @param mixed $subject <p>
5692
     *                       If subject is an array, then the search and
5693
     *                       replace is performed with every entry of
5694
     *                       subject, and the return value is an array as
5695
     *                       well.
5696
     *                       </p>
5697
     * @param int   $count   [optional] <p>
5698
     *                       The number of matched and replaced needles will
5699
     *                       be returned in count which is passed by
5700
     *                       reference.
5701
     *                       </p>
5702
     *
5703
     * @return mixed a string or an array of replacements
5704
     */
5705
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5706
    {
5707 29
        $search = (array) $search;
5708
5709
        /** @noinspection AlterInForeachInspection */
5710 29
        foreach ($search as &$s) {
5711 29
            $s = (string) $s;
5712 29
            if ($s === '') {
5713 6
                $s = '/^(?<=.)$/';
5714
            } else {
5715 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5716
            }
5717
        }
5718
5719 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5720 29
        $count = $replace; // used as reference parameter
5721
5722 29
        return $subject;
5723
    }
5724
5725
    /**
5726
     * Replaces $search from the beginning of string with $replacement.
5727
     *
5728
     * @param string $str         <p>The input string.</p>
5729
     * @param string $search      <p>The string to search for.</p>
5730
     * @param string $replacement <p>The replacement.</p>
5731
     *
5732
     * @return string string after the replacements
5733
     */
5734
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5735
    {
5736 17
        if ($str === '') {
5737 4
            if ($replacement === '') {
5738 2
                return '';
5739
            }
5740
5741 2
            if ($search === '') {
5742 2
                return $replacement;
5743
            }
5744
        }
5745
5746 13
        if ($search === '') {
5747 2
            return $str . $replacement;
5748
        }
5749
5750 11
        if (\stripos($str, $search) === 0) {
5751 10
            return $replacement . \substr($str, \strlen($search));
5752
        }
5753
5754 1
        return $str;
5755
    }
5756
5757
    /**
5758
     * Replaces $search from the ending of string with $replacement.
5759
     *
5760
     * @param string $str         <p>The input string.</p>
5761
     * @param string $search      <p>The string to search for.</p>
5762
     * @param string $replacement <p>The replacement.</p>
5763
     *
5764
     * @return string string after the replacements
5765
     */
5766
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5767
    {
5768 17
        if ($str === '') {
5769 4
            if ($replacement === '') {
5770 2
                return '';
5771
            }
5772
5773 2
            if ($search === '') {
5774 2
                return $replacement;
5775
            }
5776
        }
5777
5778 13
        if ($search === '') {
5779 2
            return $str . $replacement;
5780
        }
5781
5782 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5783 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5784
        }
5785
5786 11
        return $str;
5787
    }
5788
5789
    /**
5790
     * Check if the string starts with the given substring, case insensitive.
5791
     *
5792
     * @param string $haystack <p>The string to search in.</p>
5793
     * @param string $needle   <p>The substring to search for.</p>
5794
     *
5795
     * @return bool
5796
     */
5797
    public static function str_istarts_with(string $haystack, string $needle): bool
5798
    {
5799 12
        if ($haystack === '' || $needle === '') {
5800 2
            return false;
5801
        }
5802
5803 12
        if (self::stripos($haystack, $needle) === 0) {
5804 12
            return true;
5805
        }
5806
5807 4
        return false;
5808
    }
5809
5810
    /**
5811
     * Returns true if the string begins with any of $substrings, false otherwise.
5812
     *
5813
     * - case-insensitive
5814
     *
5815
     * @param string $str        <p>The input string.</p>
5816
     * @param array  $substrings <p>Substrings to look for.</p>
5817
     *
5818
     * @return bool whether or not $str starts with $substring
5819
     */
5820
    public static function str_istarts_with_any(string $str, array $substrings): bool
5821
    {
5822 4
        if ($str === '') {
5823
            return false;
5824
        }
5825
5826 4
        if (empty($substrings)) {
5827
            return false;
5828
        }
5829
5830 4
        foreach ($substrings as $substring) {
5831 4
            if (self::str_istarts_with($str, $substring)) {
5832 4
                return true;
5833
            }
5834
        }
5835
5836
        return false;
5837
    }
5838
5839
    /**
5840
     * Gets the substring after the first occurrence of a separator.
5841
     *
5842
     * @param string $str       <p>The input string.</p>
5843
     * @param string $separator <p>The string separator.</p>
5844
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5845
     *
5846
     * @return string
5847
     */
5848
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5849
    {
5850
        if (
5851 1
            $separator === ''
5852
            ||
5853 1
            $str === ''
5854
        ) {
5855 1
            return '';
5856
        }
5857
5858 1
        $offset = self::str_iindex_first($str, $separator);
5859 1
        if ($offset === false) {
5860 1
            return '';
5861
        }
5862
5863 1
        return (string) self::substr(
5864 1
            $str,
5865 1
            $offset + self::strlen($separator, $encoding),
5866 1
            null,
5867 1
            $encoding
5868
        );
5869
    }
5870
5871
    /**
5872
     * Gets the substring after the last occurrence of a separator.
5873
     *
5874
     * @param string $str       <p>The input string.</p>
5875
     * @param string $separator <p>The string separator.</p>
5876
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5877
     *
5878
     * @return string
5879
     */
5880
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5881
    {
5882
        if (
5883 1
            $separator === ''
5884
            ||
5885 1
            $str === ''
5886
        ) {
5887 1
            return '';
5888
        }
5889
5890 1
        $offset = self::str_iindex_last($str, $separator);
5891 1
        if ($offset === false) {
5892 1
            return '';
5893
        }
5894
5895 1
        return (string) self::substr(
5896 1
            $str,
5897 1
            $offset + self::strlen($separator, $encoding),
5898 1
            null,
5899 1
            $encoding
5900
        );
5901
    }
5902
5903
    /**
5904
     * Gets the substring before the first occurrence of a separator.
5905
     *
5906
     * @param string $str       <p>The input string.</p>
5907
     * @param string $separator <p>The string separator.</p>
5908
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5909
     *
5910
     * @return string
5911
     */
5912
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5913
    {
5914
        if (
5915 1
            $separator === ''
5916
            ||
5917 1
            $str === ''
5918
        ) {
5919 1
            return '';
5920
        }
5921
5922 1
        $offset = self::str_iindex_first($str, $separator);
5923 1
        if ($offset === false) {
5924 1
            return '';
5925
        }
5926
5927 1
        return (string) self::substr($str, 0, $offset, $encoding);
5928
    }
5929
5930
    /**
5931
     * Gets the substring before the last occurrence of a separator.
5932
     *
5933
     * @param string $str       <p>The input string.</p>
5934
     * @param string $separator <p>The string separator.</p>
5935
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5936
     *
5937
     * @return string
5938
     */
5939
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5940
    {
5941
        if (
5942 1
            $separator === ''
5943
            ||
5944 1
            $str === ''
5945
        ) {
5946 1
            return '';
5947
        }
5948
5949 1
        $offset = self::str_iindex_last($str, $separator);
5950 1
        if ($offset === false) {
5951 1
            return '';
5952
        }
5953
5954 1
        return (string) self::substr($str, 0, $offset, $encoding);
5955
    }
5956
5957
    /**
5958
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5959
     *
5960
     * @param string $str          <p>The input string.</p>
5961
     * @param string $needle       <p>The string to look for.</p>
5962
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5963
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5964
     *
5965
     * @return string
5966
     */
5967
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5968
    {
5969
        if (
5970 2
            $needle === ''
5971
            ||
5972 2
            $str === ''
5973
        ) {
5974 2
            return '';
5975
        }
5976
5977 2
        $part = self::stristr(
5978 2
            $str,
5979 2
            $needle,
5980 2
            $beforeNeedle,
5981 2
            $encoding
5982
        );
5983 2
        if ($part === false) {
5984 2
            return '';
5985
        }
5986
5987 2
        return $part;
5988
    }
5989
5990
    /**
5991
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5992
     *
5993
     * @param string $str          <p>The input string.</p>
5994
     * @param string $needle       <p>The string to look for.</p>
5995
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5996
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5997
     *
5998
     * @return string
5999
     */
6000
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6001
    {
6002
        if (
6003 1
            $needle === ''
6004
            ||
6005 1
            $str === ''
6006
        ) {
6007 1
            return '';
6008
        }
6009
6010 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6011 1
        if ($part === false) {
6012 1
            return '';
6013
        }
6014
6015 1
        return $part;
6016
    }
6017
6018
    /**
6019
     * Returns the last $n characters of the string.
6020
     *
6021
     * @param string $str      <p>The input string.</p>
6022
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6023
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6024
     *
6025
     * @return string
6026
     */
6027
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6028
    {
6029 12
        if ($n <= 0) {
6030 4
            return '';
6031
        }
6032
6033 8
        $returnTmp = self::substr($str, -$n, null, $encoding);
6034
6035 8
        return $returnTmp === false ? '' : $returnTmp;
6036
    }
6037
6038
    /**
6039
     * Limit the number of characters in a string.
6040
     *
6041
     * @param string $str      <p>The input string.</p>
6042
     * @param int    $length   [optional] <p>Default: 100</p>
6043
     * @param string $strAddOn [optional] <p>Default: …</p>
6044
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6045
     *
6046
     * @return string
6047
     */
6048
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6049
    {
6050 2
        if ($str === '') {
6051 2
            return '';
6052
        }
6053
6054 2
        if ($length <= 0) {
6055 2
            return '';
6056
        }
6057
6058 2
        if (self::strlen($str, $encoding) <= $length) {
6059 2
            return $str;
6060
        }
6061
6062 2
        return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6062
        return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
6063
    }
6064
6065
    /**
6066
     * Limit the number of characters in a string, but also after the next word.
6067
     *
6068
     * @param string $str      <p>The input string.</p>
6069
     * @param int    $length   [optional] <p>Default: 100</p>
6070
     * @param string $strAddOn [optional] <p>Default: …</p>
6071
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6072
     *
6073
     * @return string
6074
     */
6075
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6076
    {
6077 6
        if ($str === '') {
6078 2
            return '';
6079
        }
6080
6081 6
        if ($length <= 0) {
6082 2
            return '';
6083
        }
6084
6085 6
        if (self::strlen($str, $encoding) <= $length) {
6086 2
            return $str;
6087
        }
6088
6089 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6090 5
            return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6090
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6091
        }
6092
6093 3
        $str = (string) self::substr($str, 0, $length, $encoding);
6094 3
        $array = \explode(' ', $str);
6095 3
        \array_pop($array);
6096 3
        $new_str = \implode(' ', $array);
6097
6098 3
        if ($new_str === '') {
6099 2
            $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6100
        } else {
6101 3
            $str = $new_str . $strAddOn;
6102
        }
6103
6104 3
        return $str;
6105
    }
6106
6107
    /**
6108
     * Returns the longest common prefix between the string and $otherStr.
6109
     *
6110
     * @param string $str      <p>The input sting.</p>
6111
     * @param string $otherStr <p>Second string for comparison.</p>
6112
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6113
     *
6114
     * @return string
6115
     */
6116
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6117
    {
6118 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6119
6120 10
        $longestCommonPrefix = '';
6121 10
        for ($i = 0; $i < $maxLength; $i++) {
6122 8
            $char = self::substr($str, $i, 1, $encoding);
6123
6124 8
            if ($char === self::substr($otherStr, $i, 1, $encoding)) {
6125 6
                $longestCommonPrefix .= $char;
6126
            } else {
6127 6
                break;
6128
            }
6129
        }
6130
6131 10
        return $longestCommonPrefix;
6132
    }
6133
6134
    /**
6135
     * Returns the longest common substring between the string and $otherStr.
6136
     * In the case of ties, it returns that which occurs first.
6137
     *
6138
     * @param string $str
6139
     * @param string $otherStr <p>Second string for comparison.</p>
6140
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6141
     *
6142
     * @return string string with its $str being the longest common substring
6143
     */
6144
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6145
    {
6146
        // Uses dynamic programming to solve
6147
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6148 11
        $strLength = self::strlen($str, $encoding);
6149 11
        $otherLength = self::strlen($otherStr, $encoding);
6150
6151
        // Return if either string is empty
6152 11
        if ($strLength === 0 || $otherLength === 0) {
6153 2
            return '';
6154
        }
6155
6156 9
        $len = 0;
6157 9
        $end = 0;
6158 9
        $table = \array_fill(
6159 9
            0,
6160 9
            $strLength + 1,
6161 9
            \array_fill(0, $otherLength + 1, 0)
6162
        );
6163
6164 9
        for ($i = 1; $i <= $strLength; $i++) {
6165 9
            for ($j = 1; $j <= $otherLength; $j++) {
6166 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
6167 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6168
6169 9
                if ($strChar === $otherChar) {
6170 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6171 8
                    if ($table[$i][$j] > $len) {
6172 8
                        $len = $table[$i][$j];
6173 8
                        $end = $i;
6174
                    }
6175
                } else {
6176 9
                    $table[$i][$j] = 0;
6177
                }
6178
            }
6179
        }
6180
6181 9
        $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6182
6183 9
        return $returnTmp === false ? '' : $returnTmp;
6184
    }
6185
6186
    /**
6187
     * Returns the longest common suffix between the string and $otherStr.
6188
     *
6189
     * @param string $str
6190
     * @param string $otherStr <p>Second string for comparison.</p>
6191
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6192
     *
6193
     * @return string
6194
     */
6195
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6196
    {
6197 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6198
6199 10
        $longestCommonSuffix = '';
6200 10
        for ($i = 1; $i <= $maxLength; $i++) {
6201 8
            $char = self::substr($str, -$i, 1, $encoding);
6202
6203 8
            if ($char === self::substr($otherStr, -$i, 1, $encoding)) {
6204 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6204
                $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6205
            } else {
6206 6
                break;
6207
            }
6208
        }
6209
6210 10
        return $longestCommonSuffix;
6211
    }
6212
6213
    /**
6214
     * Returns true if $str matches the supplied pattern, false otherwise.
6215
     *
6216
     * @param string $str     <p>The input string.</p>
6217
     * @param string $pattern <p>Regex pattern to match against.</p>
6218
     *
6219
     * @return bool whether or not $str matches the pattern
6220
     */
6221
    public static function str_matches_pattern(string $str, string $pattern): bool
6222
    {
6223 126
        if (\preg_match('/' . $pattern . '/u', $str)) {
6224 87
            return true;
6225
        }
6226
6227 39
        return false;
6228
    }
6229
6230
    /**
6231
     * Returns whether or not a character exists at an index. Offsets may be
6232
     * negative to count from the last character in the string. Implements
6233
     * part of the ArrayAccess interface.
6234
     *
6235
     * @param string $str      <p>The input string.</p>
6236
     * @param int    $offset   <p>The index to check.</p>
6237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6238
     *
6239
     * @return bool whether or not the index exists
6240
     */
6241
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6242
    {
6243
        // init
6244 6
        $length = self::strlen($str, $encoding);
6245
6246 6
        if ($offset >= 0) {
6247 3
            return $length > $offset;
6248
        }
6249
6250 3
        return $length >= \abs($offset);
6251
    }
6252
6253
    /**
6254
     * Returns the character at the given index. Offsets may be negative to
6255
     * count from the last character in the string. Implements part of the
6256
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6257
     * does not exist.
6258
     *
6259
     * @param string $str      <p>The input string.</p>
6260
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6261
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6262
     *
6263
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6264
     *
6265
     * @return string the character at the specified index
6266
     */
6267
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6268
    {
6269
        // init
6270 2
        $length = self::strlen($str);
6271
6272
        if (
6273 2
            ($index >= 0 && $length <= $index)
6274
            ||
6275 2
            $length < \abs($index)
6276
        ) {
6277 1
            throw new \OutOfBoundsException('No character exists at the index');
6278
        }
6279
6280 1
        return self::char_at($str, $index, $encoding);
6281
    }
6282
6283
    /**
6284
     * Pad a UTF-8 string to given length with another string.
6285
     *
6286
     * @param string $str        <p>The input string.</p>
6287
     * @param int    $pad_length <p>The length of return string.</p>
6288
     * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6289
     * @param int    $pad_type   [optional] <p>
6290
     *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6291
     *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6292
     *                           </p>
6293
     * @param string $encoding   [optional] <p>Default: UTF-8</p>
6294
     *
6295
     * @return string returns the padded string
6296
     */
6297
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6298
    {
6299 41
        if ($str === '') {
6300
            return '';
6301
        }
6302
6303 41
        if ($pad_type !== (int) $pad_type) {
6304 13
            if ($pad_type === 'left') {
0 ignored issues
show
introduced by
The condition $pad_type === 'left' is always false.
Loading history...
6305 3
                $pad_type = \STR_PAD_LEFT;
6306 10
            } elseif ($pad_type === 'right') {
0 ignored issues
show
introduced by
The condition $pad_type === 'right' is always false.
Loading history...
6307 6
                $pad_type = \STR_PAD_RIGHT;
6308 4
            } elseif ($pad_type === 'both') {
0 ignored issues
show
introduced by
The condition $pad_type === 'both' is always false.
Loading history...
6309 3
                $pad_type = \STR_PAD_BOTH;
6310
            } else {
6311 1
                throw new \InvalidArgumentException(
6312 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6313
                );
6314
            }
6315
        }
6316
6317 40
        $str_length = self::strlen($str, $encoding);
6318
6319
        if (
6320 40
            $pad_length > 0
6321
            &&
6322 40
            $pad_length >= $str_length
6323
        ) {
6324 39
            $ps_length = self::strlen($pad_string, $encoding);
6325
6326 39
            $diff = ($pad_length - $str_length);
6327
6328
            switch ($pad_type) {
6329 39
                case \STR_PAD_LEFT:
6330 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6331 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6332 13
                    $post = '';
6333
6334 13
                    break;
6335
6336 29
                case \STR_PAD_BOTH:
6337 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6338 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6339 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6340 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6341
6342 14
                    break;
6343
6344 18
                case \STR_PAD_RIGHT:
6345
                default:
6346 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6347 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6348 18
                    $pre = '';
6349
            }
6350
6351 39
            return $pre . $str . $post;
6352
        }
6353
6354 4
        return $str;
6355
    }
6356
6357
    /**
6358
     * Returns a new string of a given length such that both sides of the
6359
     * string are padded. Alias for pad() with a $padType of 'both'.
6360
     *
6361
     * @param string $str
6362
     * @param int    $length   <p>Desired string length after padding.</p>
6363
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6364
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6365
     *
6366
     * @return string string with padding applied
6367
     */
6368
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6369
    {
6370 11
        $padding = $length - self::strlen($str, $encoding);
6371
6372 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6373
    }
6374
6375
    /**
6376
     * Returns a new string of a given length such that the beginning of the
6377
     * string is padded. Alias for pad() with a $padType of 'left'.
6378
     *
6379
     * @param string $str
6380
     * @param int    $length   <p>Desired string length after padding.</p>
6381
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6382
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6383
     *
6384
     * @return string string with left padding
6385
     */
6386
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6387
    {
6388 7
        return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6389
    }
6390
6391
    /**
6392
     * Returns a new string of a given length such that the end of the string
6393
     * is padded. Alias for pad() with a $padType of 'right'.
6394
     *
6395
     * @param string $str
6396
     * @param int    $length   <p>Desired string length after padding.</p>
6397
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6398
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6399
     *
6400
     * @return string string with right padding
6401
     */
6402
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6403
    {
6404 7
        return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6405
    }
6406
6407
    /**
6408
     * Repeat a string.
6409
     *
6410
     * @param string $str        <p>
6411
     *                           The string to be repeated.
6412
     *                           </p>
6413
     * @param int    $multiplier <p>
6414
     *                           Number of time the input string should be
6415
     *                           repeated.
6416
     *                           </p>
6417
     *                           <p>
6418
     *                           multiplier has to be greater than or equal to 0.
6419
     *                           If the multiplier is set to 0, the function
6420
     *                           will return an empty string.
6421
     *                           </p>
6422
     *
6423
     * @return string the repeated string
6424
     */
6425
    public static function str_repeat(string $str, int $multiplier): string
6426
    {
6427 9
        $str = self::filter($str);
6428
6429 9
        return \str_repeat($str, $multiplier);
6430
    }
6431
6432
    /**
6433
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6434
     *
6435
     * Replace all occurrences of the search string with the replacement string
6436
     *
6437
     * @see http://php.net/manual/en/function.str-replace.php
6438
     *
6439
     * @param mixed $search  <p>
6440
     *                       The value being searched for, otherwise known as the needle.
6441
     *                       An array may be used to designate multiple needles.
6442
     *                       </p>
6443
     * @param mixed $replace <p>
6444
     *                       The replacement value that replaces found search
6445
     *                       values. An array may be used to designate multiple replacements.
6446
     *                       </p>
6447
     * @param mixed $subject <p>
6448
     *                       The string or array being searched and replaced on,
6449
     *                       otherwise known as the haystack.
6450
     *                       </p>
6451
     *                       <p>
6452
     *                       If subject is an array, then the search and
6453
     *                       replace is performed with every entry of
6454
     *                       subject, and the return value is an array as
6455
     *                       well.
6456
     *                       </p>
6457
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6458
     *
6459
     * @return mixed this function returns a string or an array with the replaced values
6460
     */
6461
    public static function str_replace($search, $replace, $subject, int &$count = null)
6462
    {
6463 60
        return \str_replace($search, $replace, $subject, $count);
6464
    }
6465
6466
    /**
6467
     * Replaces $search from the beginning of string with $replacement.
6468
     *
6469
     * @param string $str         <p>The input string.</p>
6470
     * @param string $search      <p>The string to search for.</p>
6471
     * @param string $replacement <p>The replacement.</p>
6472
     *
6473
     * @return string string after the replacements
6474
     */
6475
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6476
    {
6477 17
        if ($str === '') {
6478 4
            if ($replacement === '') {
6479 2
                return '';
6480
            }
6481
6482 2
            if ($search === '') {
6483 2
                return $replacement;
6484
            }
6485
        }
6486
6487 13
        if ($search === '') {
6488 2
            return $str . $replacement;
6489
        }
6490
6491 11
        if (\strpos($str, $search) === 0) {
6492 9
            return $replacement . \substr($str, \strlen($search));
6493
        }
6494
6495 2
        return $str;
6496
    }
6497
6498
    /**
6499
     * Replaces $search from the ending of string with $replacement.
6500
     *
6501
     * @param string $str         <p>The input string.</p>
6502
     * @param string $search      <p>The string to search for.</p>
6503
     * @param string $replacement <p>The replacement.</p>
6504
     *
6505
     * @return string string after the replacements
6506
     */
6507
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6508
    {
6509 17
        if ($str === '') {
6510 4
            if ($replacement === '') {
6511 2
                return '';
6512
            }
6513
6514 2
            if ($search === '') {
6515 2
                return $replacement;
6516
            }
6517
        }
6518
6519 13
        if ($search === '') {
6520 2
            return $str . $replacement;
6521
        }
6522
6523 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6524 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6525
        }
6526
6527 11
        return $str;
6528
    }
6529
6530
    /**
6531
     * Replace the first "$search"-term with the "$replace"-term.
6532
     *
6533
     * @param string $search
6534
     * @param string $replace
6535
     * @param string $subject
6536
     *
6537
     * @return string
6538
     */
6539
    public static function str_replace_first(string $search, string $replace, string $subject): string
6540
    {
6541 2
        $pos = self::strpos($subject, $search);
6542 2
        if ($pos !== false) {
6543 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6543
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6544
        }
6545
6546 2
        return $subject;
6547
    }
6548
6549
    /**
6550
     * Replace the last "$search"-term with the "$replace"-term.
6551
     *
6552
     * @param string $search
6553
     * @param string $replace
6554
     * @param string $subject
6555
     *
6556
     * @return string
6557
     */
6558
    public static function str_replace_last(string $search, string $replace, string $subject): string
6559
    {
6560 2
        $pos = self::strrpos($subject, $search);
6561 2
        if ($pos !== false) {
6562 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6562
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6563
        }
6564
6565 2
        return $subject;
6566
    }
6567
6568
    /**
6569
     * Shuffles all the characters in the string.
6570
     *
6571
     * PS: uses random algorithm which is weak for cryptography purposes
6572
     *
6573
     * @param string $str <p>The input string</p>
6574
     *
6575
     * @return string the shuffled string
6576
     */
6577
    public static function str_shuffle(string $str): string
6578
    {
6579 5
        $indexes = \range(0, self::strlen($str) - 1);
6580
        /** @noinspection NonSecureShuffleUsageInspection */
6581 5
        \shuffle($indexes);
6582
6583 5
        $shuffledStr = '';
6584 5
        foreach ($indexes as $i) {
6585 5
            $shuffledStr .= self::substr($str, $i, 1);
6586
        }
6587
6588 5
        return $shuffledStr;
6589
    }
6590
6591
    /**
6592
     * Returns the substring beginning at $start, and up to, but not including
6593
     * the index specified by $end. If $end is omitted, the function extracts
6594
     * the remaining string. If $end is negative, it is computed from the end
6595
     * of the string.
6596
     *
6597
     * @param string $str
6598
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6599
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6600
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6601
     *
6602
     * @return false|string
6603
     *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6604
     *                     characters long, <b>FALSE</b> will be returned.
6605
     */
6606
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6607
    {
6608 18
        if ($end === null) {
6609 6
            $length = self::strlen($str);
6610 12
        } elseif ($end >= 0 && $end <= $start) {
6611 4
            return '';
6612 8
        } elseif ($end < 0) {
6613 2
            $length = self::strlen($str) + $end - $start;
6614
        } else {
6615 6
            $length = $end - $start;
6616
        }
6617
6618 14
        return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6618
        return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6619
    }
6620
6621
    /**
6622
     * Convert a string to e.g.: "snake_case"
6623
     *
6624
     * @param string $str
6625
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6626
     *
6627
     * @return string string in snake_case
6628
     */
6629
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6630
    {
6631 20
        $str = self::normalize_whitespace($str);
6632 20
        $str = \str_replace('-', '_', $str);
6633
6634 20
        $str = (string) \preg_replace_callback(
6635 20
            '/([\d|A-Z])/u',
6636
            function ($matches) use ($encoding) {
6637 8
                $match = $matches[1];
6638 8
                $matchInt = (int) $match;
6639
6640 8
                if ((string) $matchInt === $match) {
6641 4
                    return '_' . $match . '_';
6642
                }
6643
6644 4
                return '_' . self::strtolower($match, $encoding);
6645 20
            },
6646 20
            $str
6647
        );
6648
6649 20
        $str = (string) \preg_replace(
6650
            [
6651 20
                '/\s+/',        // convert spaces to "_"
6652
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6653
                '/_+/',         // remove double "_"
6654
            ],
6655
            [
6656 20
                '_',
6657
                '',
6658
                '_',
6659
            ],
6660 20
            $str
6661
        );
6662
6663 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6664
6665 20
        return self::trim($str); // trim leading & trailing whitespace
6666
    }
6667
6668
    /**
6669
     * Sort all characters according to code points.
6670
     *
6671
     * @param string $str    <p>A UTF-8 string.</p>
6672
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6673
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6674
     *
6675
     * @return string string of sorted characters
6676
     */
6677
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6678
    {
6679 2
        $array = self::codepoints($str);
6680
6681 2
        if ($unique) {
6682 2
            $array = \array_flip(\array_flip($array));
6683
        }
6684
6685 2
        if ($desc) {
6686 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6686
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6687
        } else {
6688 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6688
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6689
        }
6690
6691 2
        return self::string($array);
6692
    }
6693
6694
    /**
6695
     * alias for "UTF8::split()"
6696
     *
6697
     * @see UTF8::split()
6698
     *
6699
     * @param string|string[] $str
6700
     * @param int             $len
6701
     *
6702
     * @return string[]
6703
     */
6704
    public static function str_split($str, int $len = 1): array
6705
    {
6706 25
        return self::split($str, $len);
6707
    }
6708
6709
    /**
6710
     * Splits the string with the provided regular expression, returning an
6711
     * array of Stringy objects. An optional integer $limit will truncate the
6712
     * results.
6713
     *
6714
     * @param string $str
6715
     * @param string $pattern <p>The regex with which to split the string.</p>
6716
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6717
     *
6718
     * @return string[] an array of strings
6719
     */
6720
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6721
    {
6722 16
        if ($limit === 0) {
6723 2
            return [];
6724
        }
6725
6726
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6727
        // and current versions of HHVM (3.8 and below)
6728 14
        if ($pattern === '') {
6729 1
            return [$str];
6730
        }
6731
6732
        // this->split returns the remaining unsplit string in the last index when
6733
        // supplying a limit
6734 13
        if ($limit > 0) {
6735 8
            ++$limit;
6736
        } else {
6737 5
            $limit = -1;
6738
        }
6739
6740 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6741
6742 13
        if ($array === false) {
6743
            return [];
6744
        }
6745
6746 13
        if ($limit > 0 && \count($array) === $limit) {
6747 4
            \array_pop($array);
6748
        }
6749
6750 13
        return $array;
6751
    }
6752
6753
    /**
6754
     * Check if the string starts with the given substring.
6755
     *
6756
     * @param string $haystack <p>The string to search in.</p>
6757
     * @param string $needle   <p>The substring to search for.</p>
6758
     *
6759
     * @return bool
6760
     */
6761
    public static function str_starts_with(string $haystack, string $needle): bool
6762
    {
6763 41
        if ($haystack === '' || $needle === '') {
6764 4
            return false;
6765
        }
6766
6767 39
        if (\strpos($haystack, $needle) === 0) {
6768 19
            return true;
6769
        }
6770
6771 24
        return false;
6772
    }
6773
6774
    /**
6775
     * Returns true if the string begins with any of $substrings, false otherwise.
6776
     *
6777
     * - case-sensitive
6778
     *
6779
     * @param string $str        <p>The input string.</p>
6780
     * @param array  $substrings <p>Substrings to look for.</p>
6781
     *
6782
     * @return bool whether or not $str starts with $substring
6783
     */
6784
    public static function str_starts_with_any(string $str, array $substrings): bool
6785
    {
6786 8
        if ($str === '') {
6787
            return false;
6788
        }
6789
6790 8
        if (empty($substrings)) {
6791
            return false;
6792
        }
6793
6794 8
        foreach ($substrings as $substring) {
6795 8
            if (self::str_starts_with($str, $substring)) {
6796 8
                return true;
6797
            }
6798
        }
6799
6800 6
        return false;
6801
    }
6802
6803
    /**
6804
     * Gets the substring after the first occurrence of a separator.
6805
     *
6806
     * @param string $str       <p>The input string.</p>
6807
     * @param string $separator <p>The string separator.</p>
6808
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6809
     *
6810
     * @return string
6811
     */
6812
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6813
    {
6814
        if (
6815 1
            $separator === ''
6816
            ||
6817 1
            $str === ''
6818
        ) {
6819 1
            return '';
6820
        }
6821
6822 1
        $offset = self::str_index_first($str, $separator);
6823 1
        if ($offset === false) {
6824 1
            return '';
6825
        }
6826
6827 1
        return (string) self::substr(
6828 1
            $str,
6829 1
            $offset + self::strlen($separator, $encoding),
6830 1
            null,
6831 1
            $encoding
6832
        );
6833
    }
6834
6835
    /**
6836
     * Gets the substring after the last occurrence of a separator.
6837
     *
6838
     * @param string $str       <p>The input string.</p>
6839
     * @param string $separator <p>The string separator.</p>
6840
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6841
     *
6842
     * @return string
6843
     */
6844
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6845
    {
6846
        if (
6847 1
            $separator === ''
6848
            ||
6849 1
            $str === ''
6850
        ) {
6851 1
            return '';
6852
        }
6853
6854 1
        $offset = self::str_index_last($str, $separator);
6855 1
        if ($offset === false) {
6856 1
            return '';
6857
        }
6858
6859 1
        return (string) self::substr(
6860 1
            $str,
6861 1
            $offset + self::strlen($separator, $encoding),
6862 1
            null,
6863 1
            $encoding
6864
        );
6865
    }
6866
6867
    /**
6868
     * Gets the substring before the first occurrence of a separator.
6869
     *
6870
     * @param string $str       <p>The input string.</p>
6871
     * @param string $separator <p>The string separator.</p>
6872
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6873
     *
6874
     * @return string
6875
     */
6876
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6877
    {
6878
        if (
6879 1
            $separator === ''
6880
            ||
6881 1
            $str === ''
6882
        ) {
6883 1
            return '';
6884
        }
6885
6886 1
        $offset = self::str_index_first($str, $separator);
6887 1
        if ($offset === false) {
6888 1
            return '';
6889
        }
6890
6891 1
        return (string) self::substr(
6892 1
            $str,
6893 1
            0,
6894 1
            $offset,
6895 1
            $encoding
6896
        );
6897
    }
6898
6899
    /**
6900
     * Gets the substring before the last occurrence of a separator.
6901
     *
6902
     * @param string $str       <p>The input string.</p>
6903
     * @param string $separator <p>The string separator.</p>
6904
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6905
     *
6906
     * @return string
6907
     */
6908
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6909
    {
6910
        if (
6911 1
            $separator === ''
6912
            ||
6913 1
            $str === ''
6914
        ) {
6915 1
            return '';
6916
        }
6917
6918 1
        $offset = self::str_index_last($str, $separator);
6919 1
        if ($offset === false) {
6920 1
            return '';
6921
        }
6922
6923 1
        return (string) self::substr(
6924 1
            $str,
6925 1
            0,
6926 1
            $offset,
6927 1
            $encoding
6928
        );
6929
    }
6930
6931
    /**
6932
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6933
     *
6934
     * @param string $str          <p>The input string.</p>
6935
     * @param string $needle       <p>The string to look for.</p>
6936
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6937
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6938
     *
6939
     * @return string
6940
     */
6941
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6942
    {
6943
        if (
6944 2
            $str === ''
6945
            ||
6946 2
            $needle === ''
6947
        ) {
6948 2
            return '';
6949
        }
6950
6951 2
        $part = self::strstr(
6952 2
            $str,
6953 2
            $needle,
6954 2
            $beforeNeedle,
6955 2
            $encoding
6956
        );
6957 2
        if ($part === false) {
6958 2
            return '';
6959
        }
6960
6961 2
        return $part;
6962
    }
6963
6964
    /**
6965
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6966
     *
6967
     * @param string $str          <p>The input string.</p>
6968
     * @param string $needle       <p>The string to look for.</p>
6969
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6970
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6971
     *
6972
     * @return string
6973
     */
6974
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6975
    {
6976
        if (
6977 2
            $str === ''
6978
            ||
6979 2
            $needle === ''
6980
        ) {
6981 2
            return '';
6982
        }
6983
6984 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6985 2
        if ($part === false) {
6986 2
            return '';
6987
        }
6988
6989 2
        return $part;
6990
    }
6991
6992
    /**
6993
     * Surrounds $str with the given substring.
6994
     *
6995
     * @param string $str
6996
     * @param string $substring <p>The substring to add to both sides.</P>
6997
     *
6998
     * @return string string with the substring both prepended and appended
6999
     */
7000
    public static function str_surround(string $str, string $substring): string
7001
    {
7002 5
        return \implode('', [$substring, $str, $substring]);
7003
    }
7004
7005
    /**
7006
     * Returns a trimmed string with the first letter of each word capitalized.
7007
     * Also accepts an array, $ignore, allowing you to list words not to be
7008
     * capitalized.
7009
     *
7010
     * @param string              $str
7011
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7012
     *                                                   Default: null</p>
7013
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
7014
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7015
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7016
     *                                                   tr</p>
7017
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7018
     *                                                   ß</p>
7019
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7020
     *
7021
     * @return string the titleized string
7022
     */
7023
    public static function str_titleize(
7024
        string $str,
7025
        array $ignore = null,
7026
        string $encoding = 'UTF-8',
7027
        bool $cleanUtf8 = false,
7028
        string $lang = null,
7029
        bool $tryToKeepStringLength = false,
7030
        bool $useTrimFirst = true
7031
    ): string {
7032 10
        if ($useTrimFirst === true) {
7033 5
            $str = self::trim($str);
7034
        }
7035
7036 10
        $str_array = self::str_to_words($str);
7037
7038 10
        foreach ($str_array as &$str_tmp) {
7039 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
7040 2
                continue;
7041
            }
7042
7043 10
            $str_tmp = self::str_upper_first(
7044 10
                self::strtolower(
7045 10
                    $str_tmp,
7046 10
                    $encoding,
7047 10
                    $cleanUtf8,
7048 10
                    $lang,
7049 10
                    $tryToKeepStringLength
7050
                ),
7051 10
                $encoding,
7052 10
                $cleanUtf8,
7053 10
                $lang,
7054 10
                $tryToKeepStringLength
7055
            );
7056
        }
7057
7058 10
        return \implode('', $str_array);
7059
    }
7060
7061
    /**
7062
     * Returns a trimmed string in proper title case.
7063
     *
7064
     * Also accepts an array, $ignore, allowing you to list words not to be
7065
     * capitalized.
7066
     *
7067
     * Adapted from John Gruber's script.
7068
     *
7069
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7070
     *
7071
     * @param string $str
7072
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7073
     * @param string $encoding [optional] <p>Default: UTF-8</p>
7074
     *
7075
     * @return string the titleized string
7076
     */
7077
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7078
    {
7079 35
        $smallWords = \array_merge(
7080
            [
7081 35
                '(?<!q&)a',
7082
                'an',
7083
                'and',
7084
                'as',
7085
                'at(?!&t)',
7086
                'but',
7087
                'by',
7088
                'en',
7089
                'for',
7090
                'if',
7091
                'in',
7092
                'of',
7093
                'on',
7094
                'or',
7095
                'the',
7096
                'to',
7097
                'v[.]?',
7098
                'via',
7099
                'vs[.]?',
7100
            ],
7101 35
            $ignore
7102
        );
7103
7104 35
        $smallWordsRx = \implode('|', $smallWords);
7105 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7106
7107 35
        $str = self::trim($str);
7108
7109 35
        if (self::has_lowercase($str) === false) {
7110 2
            $str = self::strtolower($str);
7111
        }
7112
7113
        // The main substitutions
7114 35
        $str = (string) \preg_replace_callback(
7115
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7116
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7117 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7118
                        |
7119 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7120
                        |
7121 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7122
                        |
7123 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7124
                      ) (_*) \b                                                           # 6. With trailing underscore
7125
                    ~ux',
7126
            function ($matches) use ($encoding) {
7127
                // Preserve leading underscore
7128 35
                $str = $matches[1];
7129 35
                if ($matches[2]) {
7130
                    // Preserve URLs, domains, emails and file paths
7131 5
                    $str .= $matches[2];
7132 35
                } elseif ($matches[3]) {
7133
                    // Lower-case small words
7134 25
                    $str .= self::strtolower($matches[3], $encoding);
7135 35
                } elseif ($matches[4]) {
7136
                    // Capitalize word w/o internal caps
7137 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7138
                } else {
7139
                    // Preserve other kinds of word (iPhone)
7140 7
                    $str .= $matches[5];
7141
                }
7142
                // Preserve trailing underscore
7143 35
                $str .= $matches[6];
7144
7145 35
                return $str;
7146 35
            },
7147 35
            $str
7148
        );
7149
7150
        // Exceptions for small words: capitalize at start of title...
7151 35
        $str = (string) \preg_replace_callback(
7152
            '~(  \A [[:punct:]]*                # start of title...
7153
                      |  [:.;?!][ ]+               # or of subsentence...
7154
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7155 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7156
                     ~uxi',
7157
            function ($matches) use ($encoding) {
7158 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7159 35
            },
7160 35
            $str
7161
        );
7162
7163
        // ...and end of title
7164 35
        $str = (string) \preg_replace_callback(
7165 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7166
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7167
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7168
                     ~uxi',
7169
            function ($matches) use ($encoding) {
7170 3
                return static::str_upper_first($matches[1], $encoding);
7171 35
            },
7172 35
            $str
7173
        );
7174
7175
        // Exceptions for small words in hyphenated compound words
7176
        // e.g. "in-flight" -> In-Flight
7177 35
        $str = (string) \preg_replace_callback(
7178
            '~\b
7179
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7180 35
                        ( ' . $smallWordsRx . ' )
7181
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7182
                       ~uxi',
7183
            function ($matches) use ($encoding) {
7184
                return static::str_upper_first($matches[1], $encoding);
7185 35
            },
7186 35
            $str
7187
        );
7188
7189
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7190 35
        $str = (string) \preg_replace_callback(
7191
            '~\b
7192
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7193
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7194 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7195
                      (?!	- )                   # Negative lookahead for another -
7196
                     ~uxi',
7197
            function ($matches) use ($encoding) {
7198
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7199 35
            },
7200 35
            $str
7201
        );
7202
7203 35
        return $str;
7204
    }
7205
7206
    /**
7207
     * Get a binary representation of a specific string.
7208
     *
7209
     * @param string $str <p>The input string.</p>
7210
     *
7211
     * @return string
7212
     */
7213
    public static function str_to_binary(string $str): string
7214
    {
7215 2
        $value = \unpack('H*', $str);
7216
7217 2
        return \base_convert($value[1], 16, 2);
7218
    }
7219
7220
    /**
7221
     * @param string   $str
7222
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7223
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7224
     *
7225
     * @return string[]
7226
     */
7227
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7228
    {
7229 17
        if ($str === '') {
7230 1
            return $removeEmptyValues === true ? [] : [''];
7231
        }
7232
7233 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
7234
7235 16
        if ($return === false) {
7236
            return $removeEmptyValues === true ? [] : [''];
7237
        }
7238
7239
        if (
7240 16
            $removeShortValues === null
7241
            &&
7242 16
            $removeEmptyValues === false
7243
        ) {
7244 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7245
        }
7246
7247
        return self::reduce_string_array(
7248
            $return,
7249
            $removeEmptyValues,
7250
            $removeShortValues
7251
        );
7252
    }
7253
7254
    /**
7255
     * Convert a string into an array of words.
7256
     *
7257
     * @param string   $str
7258
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7259
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7260
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7261
     *
7262
     * @return string[]
7263
     */
7264
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7265
    {
7266 23
        if ($str === '') {
7267 4
            return $removeEmptyValues === true ? [] : [''];
7268
        }
7269
7270 23
        $charList = self::rxClass($charList, '\pL');
7271
7272 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7273
7274 23
        if ($return === false) {
7275
            return $removeEmptyValues === true ? [] : [''];
7276
        }
7277
7278
        if (
7279 23
            $removeShortValues === null
7280
            &&
7281 23
            $removeEmptyValues === false
7282
        ) {
7283 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7284
        }
7285
7286 2
        $tmpReturn = self::reduce_string_array(
7287 2
            $return,
7288 2
            $removeEmptyValues,
7289 2
            $removeShortValues
7290
        );
7291
7292 2
        foreach ($tmpReturn as &$item) {
7293 2
            $item = (string) $item;
7294
        }
7295
7296 2
        return $tmpReturn;
7297
    }
7298
7299
    /**
7300
     * alias for "UTF8::to_ascii()"
7301
     *
7302
     * @see UTF8::to_ascii()
7303
     *
7304
     * @param string $str
7305
     * @param string $unknown
7306
     * @param bool   $strict
7307
     *
7308
     * @return string
7309
     */
7310
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7311
    {
7312 8
        return self::to_ascii($str, $unknown, $strict);
7313
    }
7314
7315
    /**
7316
     * Truncates the string to a given length. If $substring is provided, and
7317
     * truncating occurs, the string is further truncated so that the substring
7318
     * may be appended without exceeding the desired length.
7319
     *
7320
     * @param string $str
7321
     * @param int    $length    <p>Desired length of the truncated string.</p>
7322
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7323
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7324
     *
7325
     * @return string string after truncating
7326
     */
7327
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7328
    {
7329
        // init
7330 22
        $str = (string) $str;
7331
7332 22
        if ($str === '') {
7333
            return '';
7334
        }
7335
7336 22
        if ($length >= self::strlen($str, $encoding)) {
7337 4
            return $str;
7338
        }
7339
7340
        // Need to further trim the string so we can append the substring
7341 18
        $substringLength = self::strlen($substring, $encoding);
7342 18
        $length -= $substringLength;
7343
7344 18
        $truncated = self::substr($str, 0, $length, $encoding);
7345
7346 18
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7346
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7347
    }
7348
7349
    /**
7350
     * Truncates the string to a given length, while ensuring that it does not
7351
     * split words. If $substring is provided, and truncating occurs, the
7352
     * string is further truncated so that the substring may be appended without
7353
     * exceeding the desired length.
7354
     *
7355
     * @param string $str
7356
     * @param int    $length    <p>Desired length of the truncated string.</p>
7357
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7358
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7359
     *
7360
     * @return string string after truncating
7361
     */
7362
    public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7363
    {
7364 23
        if ($length >= self::strlen($str, $encoding)) {
7365 4
            return $str;
7366
        }
7367
7368
        // need to further trim the string so we can append the substring
7369 19
        $substringLength = self::strlen($substring, $encoding);
7370 19
        $length -= $substringLength;
7371
7372 19
        $truncated = self::substr($str, 0, $length, $encoding);
7373 19
        if ($truncated === false) {
7374
            return '';
7375
        }
7376
7377
        // if the last word was truncated
7378 19
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7379 19
        if ($strPosSpace !== $length) {
7380
            // find pos of the last occurrence of a space, get up to that
7381 12
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7382
7383 12
            if ($lastPos !== false || $strPosSpace !== false) {
7384 11
                $truncated = self::substr($truncated, 0, (int) $lastPos, $encoding);
7385
            }
7386
        }
7387
7388 19
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7388
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7389
    }
7390
7391
    /**
7392
     * Returns a lowercase and trimmed string separated by underscores.
7393
     * Underscores are inserted before uppercase characters (with the exception
7394
     * of the first character of the string), and in place of spaces as well as
7395
     * dashes.
7396
     *
7397
     * @param string $str
7398
     *
7399
     * @return string the underscored string
7400
     */
7401
    public static function str_underscored(string $str): string
7402
    {
7403 16
        return self::str_delimit($str, '_');
7404
    }
7405
7406
    /**
7407
     * Returns an UpperCamelCase version of the supplied string. It trims
7408
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7409
     * and underscores, and removes spaces, dashes, underscores.
7410
     *
7411
     * @param string      $str                   <p>The input string.</p>
7412
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7413
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7414
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7415
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7416
     *
7417
     * @return string string in UpperCamelCase
7418
     */
7419
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7420
    {
7421 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7422
    }
7423
7424
    /**
7425
     * alias for "UTF8::ucfirst()"
7426
     *
7427
     * @see UTF8::ucfirst()
7428
     *
7429
     * @param string      $str
7430
     * @param string      $encoding
7431
     * @param bool        $cleanUtf8
7432
     * @param string|null $lang
7433
     * @param bool        $tryToKeepStringLength
7434
     *
7435
     * @return string
7436
     */
7437
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7438
    {
7439 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7440
    }
7441
7442
    /**
7443
     * Counts number of words in the UTF-8 string.
7444
     *
7445
     * @param string $str      <p>The input string.</p>
7446
     * @param int    $format   [optional] <p>
7447
     *                         <strong>0</strong> => return a number of words (default)<br>
7448
     *                         <strong>1</strong> => return an array of words<br>
7449
     *                         <strong>2</strong> => return an array of words with word-offset as key
7450
     *                         </p>
7451
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7452
     *
7453
     * @return int|string[] The number of words in the string
7454
     */
7455
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7456
    {
7457 2
        $strParts = self::str_to_words($str, $charlist);
7458
7459 2
        $len = \count($strParts);
7460
7461 2
        if ($format === 1) {
7462 2
            $numberOfWords = [];
7463 2
            for ($i = 1; $i < $len; $i += 2) {
7464 2
                $numberOfWords[] = $strParts[$i];
7465
            }
7466 2
        } elseif ($format === 2) {
7467 2
            $numberOfWords = [];
7468 2
            $offset = self::strlen($strParts[0]);
7469 2
            for ($i = 1; $i < $len; $i += 2) {
7470 2
                $numberOfWords[$offset] = $strParts[$i];
7471 2
                $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7472
            }
7473
        } else {
7474 2
            $numberOfWords = (int) (($len - 1) / 2);
7475
        }
7476
7477 2
        return $numberOfWords;
7478
    }
7479
7480
    /**
7481
     * Case-insensitive string comparison.
7482
     *
7483
     * INFO: Case-insensitive version of UTF8::strcmp()
7484
     *
7485
     * @param string $str1     <p>The first string.</p>
7486
     * @param string $str2     <p>The second string.</p>
7487
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7488
     *
7489
     * @return int
7490
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7491
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7492
     *             <strong>0</strong> if they are equal
7493
     */
7494
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7495
    {
7496 23
        return self::strcmp(
7497 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7498 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7499
        );
7500
    }
7501
7502
    /**
7503
     * alias for "UTF8::strstr()"
7504
     *
7505
     * @see UTF8::strstr()
7506
     *
7507
     * @param string $haystack
7508
     * @param string $needle
7509
     * @param bool   $before_needle
7510
     * @param string $encoding
7511
     * @param bool   $cleanUtf8
7512
     *
7513
     * @return false|string
7514
     */
7515
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7516
    {
7517 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7518
    }
7519
7520
    /**
7521
     * Case-sensitive string comparison.
7522
     *
7523
     * @param string $str1 <p>The first string.</p>
7524
     * @param string $str2 <p>The second string.</p>
7525
     *
7526
     * @return int
7527
     *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7528
     *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7529
     *              <strong>0</strong> if they are equal
7530
     */
7531
    public static function strcmp(string $str1, string $str2): int
7532
    {
7533
        /** @noinspection PhpUndefinedClassInspection */
7534 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7535 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7536 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7537
        );
7538
    }
7539
7540
    /**
7541
     * Find length of initial segment not matching mask.
7542
     *
7543
     * @param string $str
7544
     * @param string $charList
7545
     * @param int    $offset
7546
     * @param int    $length
7547
     *
7548
     * @return int|null
7549
     */
7550
    public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7551
    {
7552 11
        if ($charList === '') {
7553 1
            return null;
7554
        }
7555
7556 10
        if ($offset || $length !== null) {
7557 2
            $strTmp = self::substr($str, $offset, $length);
7558 2
            if ($strTmp === false) {
7559
                return null;
7560
            }
7561 2
            $str = $strTmp;
7562
        }
7563
7564 10
        if ($str === '') {
7565 1
            return null;
7566
        }
7567
7568 9
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7568
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7569 9
            return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7570
        }
7571
7572 1
        return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7573
    }
7574
7575
    /**
7576
     * alias for "UTF8::stristr()"
7577
     *
7578
     * @see UTF8::stristr()
7579
     *
7580
     * @param string $haystack
7581
     * @param string $needle
7582
     * @param bool   $before_needle
7583
     * @param string $encoding
7584
     * @param bool   $cleanUtf8
7585
     *
7586
     * @return false|string
7587
     */
7588
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7589
    {
7590 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7591
    }
7592
7593
    /**
7594
     * Create a UTF-8 string from code points.
7595
     *
7596
     * INFO: opposite to UTF8::codepoints()
7597
     *
7598
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7599
     *
7600
     * @return string UTF-8 encoded string
7601
     */
7602
    public static function string(array $array): string
7603
    {
7604 4
        return \implode(
7605 4
            '',
7606 4
            \array_map(
7607
                [
7608 4
                    self::class,
7609
                    'chr',
7610
                ],
7611 4
                $array
7612
            )
7613
        );
7614
    }
7615
7616
    /**
7617
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7618
     *
7619
     * @param string $str <p>The input string.</p>
7620
     *
7621
     * @return bool
7622
     *              <strong>true</strong> if the string has BOM at the start,<br>
7623
     *              <strong>false</strong> otherwise
7624
     */
7625
    public static function string_has_bom(string $str): bool
7626
    {
7627 6
        foreach (self::$BOM as $bomString => $bomByteLength) {
7628 6
            if (\strpos($str, $bomString) === 0) {
7629 6
                return true;
7630
            }
7631
        }
7632
7633 6
        return false;
7634
    }
7635
7636
    /**
7637
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7638
     *
7639
     * @see http://php.net/manual/en/function.strip-tags.php
7640
     *
7641
     * @param string $str             <p>
7642
     *                                The input string.
7643
     *                                </p>
7644
     * @param string $allowable_tags  [optional] <p>
7645
     *                                You can use the optional second parameter to specify tags which should
7646
     *                                not be stripped.
7647
     *                                </p>
7648
     *                                <p>
7649
     *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7650
     *                                can not be changed with allowable_tags.
7651
     *                                </p>
7652
     * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7653
     *
7654
     * @return string the stripped string
7655
     */
7656
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7657
    {
7658 4
        if ($str === '') {
7659 1
            return '';
7660
        }
7661
7662 4
        if ($cleanUtf8 === true) {
7663 2
            $str = self::clean($str);
7664
        }
7665
7666 4
        return \strip_tags($str, $allowable_tags);
7667
    }
7668
7669
    /**
7670
     * Strip all whitespace characters. This includes tabs and newline
7671
     * characters, as well as multibyte whitespace such as the thin space
7672
     * and ideographic space.
7673
     *
7674
     * @param string $str
7675
     *
7676
     * @return string
7677
     */
7678
    public static function strip_whitespace(string $str): string
7679
    {
7680 36
        if ($str === '') {
7681 3
            return '';
7682
        }
7683
7684 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7685
    }
7686
7687
    /**
7688
     * Finds position of first occurrence of a string within another, case insensitive.
7689
     *
7690
     * @see http://php.net/manual/en/function.mb-stripos.php
7691
     *
7692
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7693
     * @param string $needle    <p>The string to find in haystack.</p>
7694
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7695
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7696
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7697
     *
7698
     * @return false|int
7699
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7700
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7701
     */
7702
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7703
    {
7704 75
        if ($haystack === '' || $needle === '') {
7705 5
            return false;
7706
        }
7707
7708 74
        if ($cleanUtf8 === true) {
7709
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7710
            // if invalid characters are found in $haystack before $needle
7711 1
            $haystack = self::clean($haystack);
7712 1
            $needle = self::clean($needle);
7713
        }
7714
7715 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7716 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7717
        }
7718
7719 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7720
            self::checkForSupport();
7721
        }
7722
7723 74
        if (self::$SUPPORT['mbstring'] === true) {
7724 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7725 74
            if ($returnTmp !== false) {
7726 54
                return $returnTmp;
7727
            }
7728
        }
7729
7730
        if (
7731 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7732
            &&
7733 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7734
            &&
7735 31
            self::$SUPPORT['intl'] === true
7736
        ) {
7737 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7738 31
            if ($returnTmp !== false) {
7739
                return $returnTmp;
7740
            }
7741
        }
7742
7743
        //
7744
        // fallback for ascii only
7745
        //
7746
7747 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7748 15
            return \stripos($haystack, $needle, $offset);
7749
        }
7750
7751
        //
7752
        // fallback via vanilla php
7753
        //
7754
7755 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7756 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7757
7758 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7759
    }
7760
7761
    /**
7762
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7763
     *
7764
     * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7765
     * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7766
     * @param bool   $before_needle  [optional] <p>
7767
     *                               If <b>TRUE</b>, it returns the part of the
7768
     *                               haystack before the first occurrence of the needle (excluding the needle).
7769
     *                               </p>
7770
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7771
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7772
     *
7773
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7774
     */
7775
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7776
    {
7777 12
        if ($haystack === '' || $needle === '') {
7778 3
            return false;
7779
        }
7780
7781 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7782 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7783
        }
7784
7785 9
        if ($cleanUtf8 === true) {
7786
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7787
            // if invalid characters are found in $haystack before $needle
7788 1
            $needle = self::clean($needle);
7789 1
            $haystack = self::clean($haystack);
7790
        }
7791
7792 9
        if (!$needle) {
7793
            return $haystack;
7794
        }
7795
7796 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7797
            self::checkForSupport();
7798
        }
7799
7800
        if (
7801 9
            $encoding !== 'UTF-8'
7802
            &&
7803 9
            self::$SUPPORT['mbstring'] === false
7804
        ) {
7805
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7806
        }
7807
7808 9
        if (self::$SUPPORT['mbstring'] === true) {
7809 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7810
        }
7811
7812
        if (
7813
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7814
            &&
7815
            self::$SUPPORT['intl'] === true
7816
        ) {
7817
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7818
            if ($returnTmp !== false) {
7819
                return $returnTmp;
7820
            }
7821
        }
7822
7823
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7824
            return \stristr($haystack, $needle, $before_needle);
7825
        }
7826
7827
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7828
7829
        if (!isset($match[1])) {
7830
            return false;
7831
        }
7832
7833
        if ($before_needle) {
7834
            return $match[1];
7835
        }
7836
7837
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7837
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7838
    }
7839
7840
    /**
7841
     * Get the string length, not the byte-length!
7842
     *
7843
     * @see     http://php.net/manual/en/function.mb-strlen.php
7844
     *
7845
     * @param string $str       <p>The string being checked for length.</p>
7846
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7847
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7848
     *
7849
     * @return false|int
7850
     *             The number <strong>(int)</strong> of characters in the string $str having character encoding
7851
     *             $encoding.
7852
     *             (One multi-byte character counted as +1).
7853
     *             <br>
7854
     *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7855
     */
7856
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7857
    {
7858 259
        if ($str === '') {
7859 37
            return 0;
7860
        }
7861
7862 257
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7863 83
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7864
        }
7865
7866
        //
7867
        // fallback for binary || ascii only
7868
        //
7869
7870
        if (
7871 257
            $encoding === 'CP850'
7872
            ||
7873 257
            $encoding === 'ASCII'
7874
        ) {
7875 2
            return self::strlen_in_byte($str);
7876
        }
7877
7878 257
        if ($cleanUtf8 === true) {
7879
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7880
            // if invalid characters are found in $str
7881 4
            $str = self::clean($str);
7882
        }
7883
7884 257
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7885
            self::checkForSupport();
7886
        }
7887
7888
        if (
7889 257
            $encoding !== 'UTF-8'
7890
            &&
7891 257
            self::$SUPPORT['mbstring'] === false
7892
            &&
7893 257
            self::$SUPPORT['iconv'] === false
7894
        ) {
7895 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7896
        }
7897
7898
        //
7899
        // fallback via mbstring
7900
        //
7901
7902 257
        if (self::$SUPPORT['mbstring'] === true) {
7903 253
            $returnTmp = \mb_strlen($str, $encoding);
7904 253
            if ($returnTmp !== false) {
7905 253
                return $returnTmp;
7906
            }
7907
        }
7908
7909
        //
7910
        // fallback via iconv
7911
        //
7912
7913 8
        if (self::$SUPPORT['iconv'] === true) {
7914
            $returnTmp = \iconv_strlen($str, $encoding);
7915
            if ($returnTmp !== false) {
7916
                return $returnTmp;
7917
            }
7918
        }
7919
7920
        //
7921
        // fallback via intl
7922
        //
7923
7924
        if (
7925 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7926
            &&
7927 8
            self::$SUPPORT['intl'] === true
7928
        ) {
7929
            $returnTmp = \grapheme_strlen($str);
7930
            if ($returnTmp !== null) {
7931
                return $returnTmp;
7932
            }
7933
        }
7934
7935
        //
7936
        // fallback for ascii only
7937
        //
7938
7939 8
        if (self::is_ascii($str)) {
7940 4
            return \strlen($str);
7941
        }
7942
7943
        //
7944
        // fallback via vanilla php
7945
        //
7946
7947 8
        \preg_match_all('/./us', $str, $parts);
7948
7949 8
        $returnTmp = \count($parts[0]);
7950 8
        if ($returnTmp === 0 && isset($str[0])) {
7951
            return false;
7952
        }
7953
7954 8
        return $returnTmp;
7955
    }
7956
7957
    /**
7958
     * Get string length in byte.
7959
     *
7960
     * @param string $str
7961
     *
7962
     * @return int
7963
     */
7964
    public static function strlen_in_byte(string $str): int
7965
    {
7966 192
        if ($str === '') {
7967
            return 0;
7968
        }
7969
7970 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7971
            self::checkForSupport();
7972
        }
7973
7974 192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7975
            // "mb_" is available if overload is used, so use it ...
7976
            return \mb_strlen($str, 'CP850'); // 8-BIT
7977
        }
7978
7979 192
        return \strlen($str);
7980
    }
7981
7982
    /**
7983
     * Case insensitive string comparisons using a "natural order" algorithm.
7984
     *
7985
     * INFO: natural order version of UTF8::strcasecmp()
7986
     *
7987
     * @param string $str1     <p>The first string.</p>
7988
     * @param string $str2     <p>The second string.</p>
7989
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7990
     *
7991
     * @return int
7992
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7993
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7994
     *             <strong>0</strong> if they are equal
7995
     */
7996
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7997
    {
7998 2
        return self::strnatcmp(
7999 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8000 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8001
        );
8002
    }
8003
8004
    /**
8005
     * String comparisons using a "natural order" algorithm
8006
     *
8007
     * INFO: natural order version of UTF8::strcmp()
8008
     *
8009
     * @see  http://php.net/manual/en/function.strnatcmp.php
8010
     *
8011
     * @param string $str1 <p>The first string.</p>
8012
     * @param string $str2 <p>The second string.</p>
8013
     *
8014
     * @return int
8015
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8016
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8017
     *             <strong>0</strong> if they are equal
8018
     */
8019
    public static function strnatcmp(string $str1, string $str2): int
8020
    {
8021 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
8022
    }
8023
8024
    /**
8025
     * Case-insensitive string comparison of the first n characters.
8026
     *
8027
     * @see  http://php.net/manual/en/function.strncasecmp.php
8028
     *
8029
     * @param string $str1     <p>The first string.</p>
8030
     * @param string $str2     <p>The second string.</p>
8031
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8032
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8033
     *
8034
     * @return int
8035
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8036
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8037
     *             <strong>0</strong> if they are equal
8038
     */
8039
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
8040
    {
8041 2
        return self::strncmp(
8042 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8043 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8044 2
            $len
8045
        );
8046
    }
8047
8048
    /**
8049
     * String comparison of the first n characters.
8050
     *
8051
     * @see  http://php.net/manual/en/function.strncmp.php
8052
     *
8053
     * @param string $str1 <p>The first string.</p>
8054
     * @param string $str2 <p>The second string.</p>
8055
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
8056
     *
8057
     * @return int
8058
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8059
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8060
     *             <strong>0</strong> if they are equal
8061
     */
8062
    public static function strncmp(string $str1, string $str2, int $len): int
8063
    {
8064 4
        $str1 = (string) self::substr($str1, 0, $len);
8065 4
        $str2 = (string) self::substr($str2, 0, $len);
8066
8067 4
        return self::strcmp($str1, $str2);
8068
    }
8069
8070
    /**
8071
     * Search a string for any of a set of characters.
8072
     *
8073
     * @see  http://php.net/manual/en/function.strpbrk.php
8074
     *
8075
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8076
     * @param string $char_list <p>This parameter is case sensitive.</p>
8077
     *
8078
     * @return false|string string starting from the character found, or false if it is not found
8079
     */
8080
    public static function strpbrk(string $haystack, string $char_list)
8081
    {
8082 2
        if ($haystack === '' || $char_list === '') {
8083 2
            return false;
8084
        }
8085
8086 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8087 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8088
        }
8089
8090 2
        return false;
8091
    }
8092
8093
    /**
8094
     * Find position of first occurrence of string in a string.
8095
     *
8096
     * @see http://php.net/manual/en/function.mb-strpos.php
8097
     *
8098
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8099
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8100
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8101
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8102
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8103
     *
8104
     * @return false|int
8105
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8106
     *                   string.<br> If needle is not found it returns false.
8107
     */
8108
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
8109
    {
8110 142
        if ($haystack === '') {
8111 4
            return false;
8112
        }
8113
8114
        // iconv and mbstring do not support integer $needle
8115 141
        if ((int) $needle === $needle && $needle >= 0) {
8116
            $needle = (string) self::chr($needle);
8117
        }
8118 141
        $needle = (string) $needle;
8119
8120 141
        if ($needle === '') {
8121 2
            return false;
8122
        }
8123
8124 141
        if ($cleanUtf8 === true) {
8125
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8126
            // if invalid characters are found in $haystack before $needle
8127 3
            $needle = self::clean($needle);
8128 3
            $haystack = self::clean($haystack);
8129
        }
8130
8131 141
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8132 55
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8133
        }
8134
8135 141
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8136
            self::checkForSupport();
8137
        }
8138
8139
        //
8140
        // fallback for binary || ascii only
8141
        //
8142
8143
        if (
8144 141
            $encoding === 'CP850'
8145
            ||
8146 141
            $encoding === 'ASCII'
8147
        ) {
8148 2
            return self::strpos_in_byte($haystack, $needle, $offset);
8149
        }
8150
8151
        if (
8152 141
            $encoding !== 'UTF-8'
8153
            &&
8154 141
            self::$SUPPORT['iconv'] === false
8155
            &&
8156 141
            self::$SUPPORT['mbstring'] === false
8157
        ) {
8158 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8159
        }
8160
8161
        //
8162
        // fallback via mbstring
8163
        //
8164
8165 141
        if (self::$SUPPORT['mbstring'] === true) {
8166 141
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8167 141
            if ($returnTmp !== false) {
8168 86
                return $returnTmp;
8169
            }
8170
        }
8171
8172
        //
8173
        // fallback via intl
8174
        //
8175
8176
        if (
8177 69
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8178
            &&
8179 69
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8180
            &&
8181 69
            self::$SUPPORT['intl'] === true
8182
        ) {
8183 69
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8184 69
            if ($returnTmp !== false) {
8185
                return $returnTmp;
8186
            }
8187
        }
8188
8189
        //
8190
        // fallback via iconv
8191
        //
8192
8193
        if (
8194 69
            $offset >= 0 // iconv_strpos() can't handle negative offset
8195
            &&
8196 69
            self::$SUPPORT['iconv'] === true
8197
        ) {
8198
            // ignore invalid negative offset to keep compatibility
8199
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8200 69
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8201 69
            if ($returnTmp !== false) {
8202
                return $returnTmp;
8203
            }
8204
        }
8205
8206
        //
8207
        // fallback for ascii only
8208
        //
8209
8210 69
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8211 35
            return \strpos($haystack, $needle, $offset);
8212
        }
8213
8214
        //
8215
        // fallback via vanilla php
8216
        //
8217
8218 39
        if ($haystackIsAscii) {
8219
            $haystackTmp = \substr($haystack, $offset);
8220
        } else {
8221 39
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8222
        }
8223 39
        if ($haystackTmp === false) {
8224
            $haystackTmp = '';
8225
        }
8226 39
        $haystack = (string) $haystackTmp;
8227
8228 39
        if ($offset < 0) {
8229 2
            $offset = 0;
8230
        }
8231
8232 39
        $pos = \strpos($haystack, $needle);
8233 39
        if ($pos === false) {
8234 39
            return false;
8235
        }
8236
8237 4
        if ($pos) {
8238 4
            return $offset + (self::strlen(\substr($haystack, 0, $pos), $encoding));
8239
        }
8240
8241 2
        return $offset + 0;
8242
    }
8243
8244
    /**
8245
     * Find position of first occurrence of string in a string.
8246
     *
8247
     * @param string $haystack <p>
8248
     *                         The string being checked.
8249
     *                         </p>
8250
     * @param string $needle   <p>
8251
     *                         The position counted from the beginning of haystack.
8252
     *                         </p>
8253
     * @param int    $offset   [optional] <p>
8254
     *                         The search offset. If it is not specified, 0 is used.
8255
     *                         </p>
8256
     *
8257
     * @return false|int The numeric position of the first occurrence of needle in the
8258
     *                   haystack string. If needle is not found, it returns false.
8259
     */
8260
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8261
    {
8262 81
        if ($haystack === '' || $needle === '') {
8263
            return false;
8264
        }
8265
8266 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8267
            self::checkForSupport();
8268
        }
8269
8270 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8271
            // "mb_" is available if overload is used, so use it ...
8272
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8273
        }
8274
8275 81
        return \strpos($haystack, $needle, $offset);
8276
    }
8277
8278
    /**
8279
     * Finds the last occurrence of a character in a string within another.
8280
     *
8281
     * @see http://php.net/manual/en/function.mb-strrchr.php
8282
     *
8283
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8284
     * @param string $needle        <p>The string to find in haystack</p>
8285
     * @param bool   $before_needle [optional] <p>
8286
     *                              Determines which portion of haystack
8287
     *                              this function returns.
8288
     *                              If set to true, it returns all of haystack
8289
     *                              from the beginning to the last occurrence of needle.
8290
     *                              If set to false, it returns all of haystack
8291
     *                              from the last occurrence of needle to the end,
8292
     *                              </p>
8293
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8294
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8295
     *
8296
     * @return false|string the portion of haystack or false if needle is not found
8297
     */
8298
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8299
    {
8300 4
        if ($haystack === '' || $needle === '') {
8301 2
            return false;
8302
        }
8303
8304 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8305 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8306
        }
8307
8308 4
        if ($cleanUtf8 === true) {
8309
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8310
            // if invalid characters are found in $haystack before $needle
8311 2
            $needle = self::clean($needle);
8312 2
            $haystack = self::clean($haystack);
8313
        }
8314
8315 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8316
            self::checkForSupport();
8317
        }
8318
8319
        if (
8320 4
            $encoding !== 'UTF-8'
8321
            &&
8322 4
            self::$SUPPORT['mbstring'] === false
8323
        ) {
8324
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8325
        }
8326
8327 4
        if (self::$SUPPORT['mbstring'] === true) {
8328 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8329
        }
8330
8331
        //
8332
        // fallback for binary || ascii only
8333
        //
8334
8335
        if (
8336
            $before_needle === false
8337
            &&
8338
            (
8339
                $encoding === 'CP850'
8340
                ||
8341
                $encoding === 'ASCII'
8342
            )
8343
        ) {
8344
            return \strrchr($haystack, $needle);
8345
        }
8346
8347
        //
8348
        // fallback via iconv
8349
        //
8350
8351
        if (self::$SUPPORT['iconv'] === true) {
8352
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8353
            if ($needleTmp === false) {
8354
                return false;
8355
            }
8356
            $needle = (string) $needleTmp;
8357
8358
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8359
            if ($pos === false) {
8360
                return false;
8361
            }
8362
8363
            if ($before_needle) {
8364
                return self::substr($haystack, 0, $pos, $encoding);
8365
            }
8366
8367
            return self::substr($haystack, $pos, null, $encoding);
8368
        }
8369
8370
        //
8371
        // fallback via vanilla php
8372
        //
8373
8374
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8375
        if ($needleTmp === false) {
8376
            return false;
8377
        }
8378
        $needle = (string) $needleTmp;
8379
8380
        $pos = self::strrpos($haystack, $needle, null, $encoding);
8381
        if ($pos === false) {
8382
            return false;
8383
        }
8384
8385
        if ($before_needle) {
8386
            return self::substr($haystack, 0, $pos, $encoding);
8387
        }
8388
8389
        return self::substr($haystack, $pos, null, $encoding);
8390
    }
8391
8392
    /**
8393
     * Reverses characters order in the string.
8394
     *
8395
     * @param string $str <p>The input string.</p>
8396
     *
8397
     * @return string the string with characters in the reverse sequence
8398
     */
8399
    public static function strrev(string $str): string
8400
    {
8401 10
        if ($str === '') {
8402 4
            return '';
8403
        }
8404
8405 8
        $reversed = '';
8406 8
        $i = self::strlen($str);
8407 8
        while ($i--) {
8408 8
            $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8408
            $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8409
        }
8410
8411 8
        return $reversed;
8412
    }
8413
8414
    /**
8415
     * Finds the last occurrence of a character in a string within another, case insensitive.
8416
     *
8417
     * @see http://php.net/manual/en/function.mb-strrichr.php
8418
     *
8419
     * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8420
     * @param string $needle         <p>The string to find in haystack.</p>
8421
     * @param bool   $before_needle  [optional] <p>
8422
     *                               Determines which portion of haystack
8423
     *                               this function returns.
8424
     *                               If set to true, it returns all of haystack
8425
     *                               from the beginning to the last occurrence of needle.
8426
     *                               If set to false, it returns all of haystack
8427
     *                               from the last occurrence of needle to the end,
8428
     *                               </p>
8429
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8430
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8431
     *
8432
     * @return false|string the portion of haystack or<br>false if needle is not found
8433
     */
8434
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8435
    {
8436 3
        if ($haystack === '' || $needle === '') {
8437 2
            return false;
8438
        }
8439
8440 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8441 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8442
        }
8443
8444 3
        if ($cleanUtf8 === true) {
8445
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8446
            // if invalid characters are found in $haystack before $needle
8447 2
            $needle = self::clean($needle);
8448 2
            $haystack = self::clean($haystack);
8449
        }
8450
8451 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8452
            self::checkForSupport();
8453
        }
8454
8455
        //
8456
        // fallback via mbstring
8457
        //
8458
8459 3
        if (self::$SUPPORT['mbstring'] === true) {
8460 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8461
        }
8462
8463
        //
8464
        // fallback via vanilla php
8465
        //
8466
8467
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8468
        if ($needleTmp === false) {
8469
            return false;
8470
        }
8471
        $needle = (string) $needleTmp;
8472
8473
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8474
        if ($pos === false) {
8475
            return false;
8476
        }
8477
8478
        if ($before_needle) {
8479
            return self::substr($haystack, 0, $pos, $encoding);
8480
        }
8481
8482
        return self::substr($haystack, $pos, null, $encoding);
8483
    }
8484
8485
    /**
8486
     * Find position of last occurrence of a case-insensitive string.
8487
     *
8488
     * @param string     $haystack  <p>The string to look in.</p>
8489
     * @param int|string $needle    <p>The string to look for.</p>
8490
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8491
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8492
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8493
     *
8494
     * @return false|int
8495
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8496
     *                   string.<br>If needle is not found, it returns false.
8497
     */
8498
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8499
    {
8500 4
        if ($haystack === '') {
8501
            return false;
8502
        }
8503
8504
        // iconv and mbstring do not support integer $needle
8505 4
        if ((int) $needle === $needle && $needle >= 0) {
8506
            $needle = (string) self::chr($needle);
8507
        }
8508 4
        $needle = (string) $needle;
8509
8510 4
        if ($needle === '') {
8511
            return false;
8512
        }
8513
8514 4
        if ($cleanUtf8 === true) {
8515
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8516 2
            $needle = self::clean($needle);
8517 2
            $haystack = self::clean($haystack);
8518
        }
8519
8520 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8521 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8522
        }
8523
8524
        //
8525
        // fallback for binary || ascii only
8526
        //
8527
8528
        if (
8529 4
            $encoding === 'CP850'
8530
            ||
8531 4
            $encoding === 'ASCII'
8532
        ) {
8533
            return self::strripos_in_byte($haystack, $needle, $offset);
8534
        }
8535
8536 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8537
            self::checkForSupport();
8538
        }
8539
8540
        if (
8541 4
            $encoding !== 'UTF-8'
8542
            &&
8543 4
            self::$SUPPORT['mbstring'] === false
8544
        ) {
8545
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8546
        }
8547
8548
        //
8549
        // fallback via mbstrig
8550
        //
8551
8552 4
        if (self::$SUPPORT['mbstring'] === true) {
8553 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8554
        }
8555
8556
        //
8557
        // fallback via intl
8558
        //
8559
8560
        if (
8561
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8562
            &&
8563
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8564
            &&
8565
            self::$SUPPORT['intl'] === true
8566
        ) {
8567
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8568
            if ($returnTmp !== false) {
8569
                return $returnTmp;
8570
            }
8571
        }
8572
8573
        //
8574
        // fallback for ascii only
8575
        //
8576
8577
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8578
            return self::strripos_in_byte($haystack, $needle, $offset);
8579
        }
8580
8581
        //
8582
        // fallback via vanilla php
8583
        //
8584
8585
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8586
        $needle = self::strtocasefold($needle, true, false, $encoding);
8587
8588
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8589
    }
8590
8591
    /**
8592
     * Finds position of last occurrence of a string within another, case insensitive.
8593
     *
8594
     * @param string $haystack <p>
8595
     *                         The string from which to get the position of the last occurrence
8596
     *                         of needle.
8597
     *                         </p>
8598
     * @param string $needle   <p>
8599
     *                         The string to find in haystack.
8600
     *                         </p>
8601
     * @param int    $offset   [optional] <p>
8602
     *                         The position in haystack
8603
     *                         to start searching.
8604
     *                         </p>
8605
     *
8606
     * @return false|int return the numeric position of the last occurrence of needle in the
8607
     *                   haystack string, or false if needle is not found
8608
     */
8609
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8610
    {
8611
        if ($haystack === '' || $needle === '') {
8612
            return false;
8613
        }
8614
8615
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8616
            self::checkForSupport();
8617
        }
8618
8619
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8620
            // "mb_" is available if overload is used, so use it ...
8621
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8622
        }
8623
8624
        return \strripos($haystack, $needle, $offset);
8625
    }
8626
8627
    /**
8628
     * Find position of last occurrence of a string in a string.
8629
     *
8630
     * @see http://php.net/manual/en/function.mb-strrpos.php
8631
     *
8632
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8633
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8634
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8635
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8636
     *                              the end of the string.
8637
     *                              </p>
8638
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8639
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8640
     *
8641
     * @return false|int
8642
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8643
     *                   string.<br>If needle is not found, it returns false.
8644
     */
8645
    public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8646
    {
8647 38
        if ($haystack === '') {
8648 3
            return false;
8649
        }
8650
8651
        // iconv and mbstring do not support integer $needle
8652 37
        if ((int) $needle === $needle && $needle >= 0) {
8653 2
            $needle = (string) self::chr($needle);
8654
        }
8655 37
        $needle = (string) $needle;
8656
8657 37
        if ($needle === '') {
8658 2
            return false;
8659
        }
8660
8661 37
        if ($cleanUtf8 === true) {
8662
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8663 4
            $needle = self::clean($needle);
8664 4
            $haystack = self::clean($haystack);
8665
        }
8666
8667 37
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8668 14
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8669
        }
8670
8671
        //
8672
        // fallback for binary || ascii only
8673
        //
8674
8675
        if (
8676 37
            $encoding === 'CP850'
8677
            ||
8678 37
            $encoding === 'ASCII'
8679
        ) {
8680 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8680
            return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8681
        }
8682
8683 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8684
            self::checkForSupport();
8685
        }
8686
8687
        if (
8688 37
            $encoding !== 'UTF-8'
8689
            &&
8690 37
            self::$SUPPORT['mbstring'] === false
8691
        ) {
8692
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8693
        }
8694
8695
        //
8696
        // fallback via mbstring
8697
        //
8698
8699 37
        if (self::$SUPPORT['mbstring'] === true) {
8700 37
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8701
        }
8702
8703
        //
8704
        // fallback via intl
8705
        //
8706
8707
        if (
8708
            $offset !== null
8709
            &&
8710
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8711
            &&
8712
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8713
            &&
8714
            self::$SUPPORT['intl'] === true
8715
        ) {
8716
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8717
            if ($returnTmp !== false) {
8718
                return $returnTmp;
8719
            }
8720
        }
8721
8722
        //
8723
        // fallback for ascii only
8724
        //
8725
8726
        if (
8727
            $offset !== null
8728
            &&
8729
            self::is_ascii($haystack)
8730
            &&
8731
            self::is_ascii($needle)
8732
        ) {
8733
            return self::strrpos_in_byte($haystack, $needle, $offset);
8734
        }
8735
8736
        //
8737
        // fallback via vanilla php
8738
        //
8739
8740
        $haystackTmp = null;
8741
        if ($offset > 0) {
8742
            $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8742
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8743
        } elseif ($offset < 0) {
8744
            $haystackTmp = self::substr($haystack, 0, $offset);
8745
            $offset = 0;
8746
        }
8747
8748
        if ($haystackTmp !== null) {
8749
            if ($haystackTmp === false) {
8750
                $haystackTmp = '';
8751
            }
8752
            $haystack = (string) $haystackTmp;
8753
        }
8754
8755
        $pos = self::strrpos_in_byte($haystack, $needle);
8756
        if ($pos === false) {
8757
            return false;
8758
        }
8759
8760
        return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8761
    }
8762
8763
    /**
8764
     * Find position of last occurrence of a string in a string.
8765
     *
8766
     * @param string $haystack <p>
8767
     *                         The string being checked, for the last occurrence
8768
     *                         of needle.
8769
     *                         </p>
8770
     * @param string $needle   <p>
8771
     *                         The string to find in haystack.
8772
     *                         </p>
8773
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8774
     *                         the string. Negative values will stop searching at an arbitrary point
8775
     *                         prior to the end of the string.
8776
     *
8777
     * @return false|int The numeric position of the last occurrence of needle in the
8778
     *                   haystack string. If needle is not found, it returns false.
8779
     */
8780
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8781
    {
8782 2
        if ($haystack === '' || $needle === '') {
8783
            return false;
8784
        }
8785
8786 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8787
            self::checkForSupport();
8788
        }
8789
8790 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8791
            // "mb_" is available if overload is used, so use it ...
8792
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8793
        }
8794
8795 2
        return \strrpos($haystack, $needle, $offset);
8796
    }
8797
8798
    /**
8799
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8800
     * mask.
8801
     *
8802
     * @param string $str    <p>The input string.</p>
8803
     * @param string $mask   <p>The mask of chars</p>
8804
     * @param int    $offset [optional]
8805
     * @param int    $length [optional]
8806
     *
8807
     * @return int
8808
     */
8809
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8810
    {
8811 10
        if ($offset || $length !== null) {
8812 2
            $strTmp = self::substr($str, $offset, $length);
8813 2
            if ($strTmp === false) {
8814
                $strTmp = '';
8815
            }
8816 2
            $str = (string) $strTmp;
8817
        }
8818
8819 10
        if ($str === '' || $mask === '') {
8820 2
            return 0;
8821
        }
8822
8823 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8823
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8824
    }
8825
8826
    /**
8827
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8828
     *
8829
     * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8830
     * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8831
     * @param bool   $before_needle  [optional] <p>
8832
     *                               If <b>TRUE</b>, strstr() returns the part of the
8833
     *                               haystack before the first occurrence of the needle (excluding the needle).
8834
     *                               </p>
8835
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8836
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8837
     *
8838
     * @return false|string
8839
     *                       A sub-string,<br>or <strong>false</strong> if needle is not found
8840
     */
8841
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8842
    {
8843 5
        if ($haystack === '' || $needle === '') {
8844 2
            return false;
8845
        }
8846
8847 5
        if ($cleanUtf8 === true) {
8848
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8849
            // if invalid characters are found in $haystack before $needle
8850
            $needle = self::clean($needle);
8851
            $haystack = self::clean($haystack);
8852
        }
8853
8854 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8855 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8856
        }
8857
8858
        //
8859
        // fallback for binary || ascii only
8860
        //
8861
8862
        if (
8863 5
            $encoding === 'CP850'
8864
            ||
8865 5
            $encoding === 'ASCII'
8866
        ) {
8867
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8868
        }
8869
8870 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8871
            self::checkForSupport();
8872
        }
8873
8874
        if (
8875 5
            $encoding !== 'UTF-8'
8876
            &&
8877 5
            self::$SUPPORT['mbstring'] === false
8878
        ) {
8879
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8880
        }
8881
8882
        //
8883
        // fallback via mbstring
8884
        //
8885
8886 5
        if (self::$SUPPORT['mbstring'] === true) {
8887 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8888
        }
8889
8890
        //
8891
        // fallback via intl
8892
        //
8893
8894
        if (
8895
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8896
            &&
8897
            self::$SUPPORT['intl'] === true
8898
        ) {
8899
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8900
            if ($returnTmp !== false) {
8901
                return $returnTmp;
8902
            }
8903
        }
8904
8905
        //
8906
        // fallback for ascii only
8907
        //
8908
8909
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8910
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8911
        }
8912
8913
        //
8914
        // fallback via vanilla php
8915
        //
8916
8917
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8918
8919
        if (!isset($match[1])) {
8920
            return false;
8921
        }
8922
8923
        if ($before_needle) {
8924
            return $match[1];
8925
        }
8926
8927
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8927
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8928
    }
8929
8930
    /**
8931
     *  * Finds first occurrence of a string within another.
8932
     *
8933
     * @param string $haystack      <p>
8934
     *                              The string from which to get the first occurrence
8935
     *                              of needle.
8936
     *                              </p>
8937
     * @param string $needle        <p>
8938
     *                              The string to find in haystack.
8939
     *                              </p>
8940
     * @param bool   $before_needle [optional] <p>
8941
     *                              Determines which portion of haystack
8942
     *                              this function returns.
8943
     *                              If set to true, it returns all of haystack
8944
     *                              from the beginning to the first occurrence of needle.
8945
     *                              If set to false, it returns all of haystack
8946
     *                              from the first occurrence of needle to the end,
8947
     *                              </p>
8948
     *
8949
     * @return false|string the portion of haystack,
8950
     *                      or false if needle is not found
8951
     */
8952
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8953
    {
8954
        if ($haystack === '' || $needle === '') {
8955
            return false;
8956
        }
8957
8958
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8959
            self::checkForSupport();
8960
        }
8961
8962
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8963
            // "mb_" is available if overload is used, so use it ...
8964
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8965
        }
8966
8967
        return \strstr($haystack, $needle, $before_needle);
8968
    }
8969
8970
    /**
8971
     * Unicode transformation for case-less matching.
8972
     *
8973
     * @see http://unicode.org/reports/tr21/tr21-5.html
8974
     *
8975
     * @param string      $str       <p>The input string.</p>
8976
     * @param bool        $full      [optional] <p>
8977
     *                               <b>true</b>, replace full case folding chars (default)<br>
8978
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8979
     *                               </p>
8980
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8981
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8982
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8983
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8984
     *                               is for some languages better ...</p>
8985
     *
8986
     * @return string
8987
     */
8988
    public static function strtocasefold(
8989
        string $str,
8990
        bool $full = true,
8991
        bool $cleanUtf8 = false,
8992
        string $encoding = 'UTF-8',
8993
        string $lang = null,
8994
        $lower = true
8995
    ): string {
8996 53
        if ($str === '') {
8997 5
            return '';
8998
        }
8999
9000 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
9001
9002 52
        if ($lower === true) {
9003 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9004
        }
9005
9006 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9007
    }
9008
9009
    /**
9010
     * Make a string lowercase.
9011
     *
9012
     * @see http://php.net/manual/en/function.mb-strtolower.php
9013
     *
9014
     * @param string      $str                   <p>The string being lowercased.</p>
9015
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9016
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9017
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9018
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9019
     *
9020
     * @return string string with all alphabetic characters converted to lowercase
9021
     */
9022
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9023
    {
9024
        // init
9025 156
        $str = (string) $str;
9026
9027 156
        if ($str === '') {
9028 12
            return '';
9029
        }
9030
9031 154
        if ($cleanUtf8 === true) {
9032
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9033
            // if invalid characters are found in $haystack before $needle
9034 4
            $str = self::clean($str);
9035
        }
9036
9037 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9038 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9039
        }
9040
9041
        // hack for old php version or for the polyfill ...
9042 154
        if ($tryToKeepStringLength === true) {
9043
            $str = self::fixStrCaseHelper($str, true);
9044
        }
9045
9046 154
        if ($lang !== null) {
9047 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9048
                self::checkForSupport();
9049
            }
9050
9051 2
            if (self::$SUPPORT['intl'] === true) {
9052 2
                $langCode = $lang . '-Lower';
9053 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9054
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9055
9056
                    $langCode = 'Any-Lower';
9057
                }
9058
9059
                /** @noinspection PhpComposerExtensionStubsInspection */
9060 2
                return \transliterator_transliterate($langCode, $str);
9061
            }
9062
9063
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9064
        }
9065
9066
        // always fallback via symfony polyfill
9067 154
        return \mb_strtolower($str, $encoding);
9068
    }
9069
9070
    /**
9071
     * Generic case sensitive transformation for collation matching.
9072
     *
9073
     * @param string $str <p>The input string</p>
9074
     *
9075
     * @return string
9076
     */
9077
    private static function strtonatfold(string $str): string
9078
    {
9079
        /** @noinspection PhpUndefinedClassInspection */
9080 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
9081
    }
9082
9083
    /**
9084
     * Make a string uppercase.
9085
     *
9086
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9087
     *
9088
     * @param string      $str                   <p>The string being uppercased.</p>
9089
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9090
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9091
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9092
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9093
     *
9094
     * @return string string with all alphabetic characters converted to uppercase
9095
     */
9096
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9097
    {
9098
        // init
9099 163
        $str = (string) $str;
9100
9101 163
        if ($str === '') {
9102 12
            return '';
9103
        }
9104
9105 161
        if ($cleanUtf8 === true) {
9106
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9107
            // if invalid characters are found in $haystack before $needle
9108 3
            $str = self::clean($str);
9109
        }
9110
9111 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9112 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9113
        }
9114
9115
        // hack for old php version or for the polyfill ...
9116 161
        if ($tryToKeepStringLength === true) {
9117 2
            $str = self::fixStrCaseHelper($str, false);
9118
        }
9119
9120 161
        if ($lang !== null) {
9121 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9122
                self::checkForSupport();
9123
            }
9124
9125 2
            if (self::$SUPPORT['intl'] === true) {
9126 2
                $langCode = $lang . '-Upper';
9127 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9128
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9129
9130
                    $langCode = 'Any-Upper';
9131
                }
9132
9133
                /** @noinspection PhpComposerExtensionStubsInspection */
9134 2
                return \transliterator_transliterate($langCode, $str);
9135
            }
9136
9137
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9138
        }
9139
9140
        // always fallback via symfony polyfill
9141 161
        return \mb_strtoupper($str, $encoding);
9142
    }
9143
9144
    /**
9145
     * Translate characters or replace sub-strings.
9146
     *
9147
     * @see  http://php.net/manual/en/function.strtr.php
9148
     *
9149
     * @param string          $str  <p>The string being translated.</p>
9150
     * @param string|string[] $from <p>The string replacing from.</p>
9151
     * @param string|string[] $to   <p>The string being translated to to.</p>
9152
     *
9153
     * @return string
9154
     *                This function returns a copy of str, translating all occurrences of each character in from to the
9155
     *                corresponding character in to
9156
     */
9157
    public static function strtr(string $str, $from, $to = \INF): string
9158
    {
9159 2
        if ($str === '') {
9160
            return '';
9161
        }
9162
9163 2
        if ($from === $to) {
9164
            return $str;
9165
        }
9166
9167 2
        if ($to !== \INF) {
9168 2
            $from = self::str_split($from);
9169 2
            $to = self::str_split($to);
9170 2
            $countFrom = \count($from);
9171 2
            $countTo = \count($to);
9172
9173 2
            if ($countFrom > $countTo) {
9174 2
                $from = \array_slice($from, 0, $countTo);
9175 2
            } elseif ($countFrom < $countTo) {
9176 2
                $to = \array_slice($to, 0, $countFrom);
9177
            }
9178
9179 2
            $from = \array_combine($from, $to);
9180
        }
9181
9182 2
        if (\is_string($from)) {
9183 2
            return \str_replace($from, '', $str);
9184
        }
9185
9186 2
        return \strtr($str, $from);
9187
    }
9188
9189
    /**
9190
     * Return the width of a string.
9191
     *
9192
     * @param string $str       <p>The input string.</p>
9193
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9194
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9195
     *
9196
     * @return int
9197
     */
9198
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9199
    {
9200 2
        if ($str === '') {
9201 2
            return 0;
9202
        }
9203
9204 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9205 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9206
        }
9207
9208 2
        if ($cleanUtf8 === true) {
9209
            // iconv and mbstring are not tolerant to invalid encoding
9210
            // further, their behaviour is inconsistent with that of PHP's substr
9211 2
            $str = self::clean($str);
9212
        }
9213
9214 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9215
            self::checkForSupport();
9216
        }
9217
9218
        //
9219
        // fallback via mbstring
9220
        //
9221
9222 2
        if (self::$SUPPORT['mbstring'] === true) {
9223 2
            return \mb_strwidth($str, $encoding);
9224
        }
9225
9226
        //
9227
        // fallback via vanilla php
9228
        //
9229
9230
        if ($encoding !== 'UTF-8') {
9231
            $str = self::encode('UTF-8', $str, false, $encoding);
9232
        }
9233
9234
        $wide = 0;
9235
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9236
9237
        return ($wide << 1) + self::strlen($str, 'UTF-8');
9238
    }
9239
9240
    /**
9241
     * Get part of a string.
9242
     *
9243
     * @see http://php.net/manual/en/function.mb-substr.php
9244
     *
9245
     * @param string $str       <p>The string being checked.</p>
9246
     * @param int    $offset    <p>The first position used in str.</p>
9247
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9248
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9249
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9250
     *
9251
     * @return false|string
9252
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9253
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9254
     *                      characters long, <b>FALSE</b> will be returned.
9255
     */
9256
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9257
    {
9258 401
        if ($str === '') {
9259 26
            return '';
9260
        }
9261
9262
        // Empty string
9263 396
        if ($length === 0) {
9264 20
            return '';
9265
        }
9266
9267 393
        if ($cleanUtf8 === true) {
9268
            // iconv and mbstring are not tolerant to invalid encoding
9269
            // further, their behaviour is inconsistent with that of PHP's substr
9270 2
            $str = self::clean($str);
9271
        }
9272
9273
        // Whole string
9274 393
        if (!$offset && $length === null) {
9275 40
            return $str;
9276
        }
9277
9278 364
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9279 161
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9280
        }
9281
9282 364
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9283
            self::checkForSupport();
9284
        }
9285
9286
        //
9287
        // fallback for binary || ascii only
9288
        //
9289
9290
        if (
9291 364
            $encoding === 'CP850'
9292
            ||
9293 364
            $encoding === 'ASCII'
9294
        ) {
9295 2
            return self::substr_in_byte($str, $offset, $length);
9296
        }
9297
9298
        //
9299
        // fallback via mbstring
9300
        //
9301
9302 362
        if (self::$SUPPORT['mbstring'] === true) {
9303 362
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9304 362
            if ($return !== false) {
9305 362
                return $return;
9306
            }
9307
        }
9308
9309
        // otherwise we need the string-length and can't fake it via "2147483647"
9310 4
        $str_length = 0;
9311 4
        if ($offset || $length === null) {
9312 4
            $str_length = self::strlen($str, $encoding);
9313
        }
9314
9315
        // e.g.: invalid chars + mbstring not installed
9316 4
        if ($str_length === false) {
9317
            return false;
9318
        }
9319
9320
        // Empty string
9321 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9322
            return '';
9323
        }
9324
9325
        // Impossible
9326 4
        if ($offset && $offset > $str_length) {
9327
            // "false" is the php native return type here,
9328
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9329
            return '';
9330
        }
9331
9332 4
        if ($length === null) {
9333 4
            $length = (int) $str_length;
9334
        } else {
9335 2
            $length = (int) $length;
9336
        }
9337
9338
        if (
9339 4
            $encoding !== 'UTF-8'
9340
            &&
9341 4
            self::$SUPPORT['mbstring'] === false
9342
        ) {
9343 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9344
        }
9345
9346
        //
9347
        // fallback via intl
9348
        //
9349
9350
        if (
9351 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9352
            &&
9353 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
9354
            &&
9355 4
            self::$SUPPORT['intl'] === true
9356
        ) {
9357
            $returnTmp = \grapheme_substr($str, $offset, $length);
9358
            if ($returnTmp !== false) {
9359
                return $returnTmp;
9360
            }
9361
        }
9362
9363
        //
9364
        // fallback via iconv
9365
        //
9366
9367
        if (
9368 4
            $length >= 0 // "iconv_substr()" can't handle negative length
9369
            &&
9370 4
            self::$SUPPORT['iconv'] === true
9371
        ) {
9372
            $returnTmp = \iconv_substr($str, $offset, $length);
9373
            if ($returnTmp !== false) {
9374
                return $returnTmp;
9375
            }
9376
        }
9377
9378
        //
9379
        // fallback for ascii only
9380
        //
9381
9382 4
        if (self::is_ascii($str)) {
9383
            return \substr($str, $offset, $length);
9384
        }
9385
9386
        //
9387
        // fallback via vanilla php
9388
        //
9389
9390
        // split to array, and remove invalid characters
9391 4
        $array = self::split($str);
9392
9393
        // extract relevant part, and join to make sting again
9394 4
        return \implode('', \array_slice($array, $offset, $length));
9395
    }
9396
9397
    /**
9398
     * Binary safe comparison of two strings from an offset, up to length characters.
9399
     *
9400
     * @param string   $str1               <p>The main string being compared.</p>
9401
     * @param string   $str2               <p>The secondary string being compared.</p>
9402
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9403
     *                                     counting from the end of the string.</p>
9404
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9405
     *                                     of the length of the str compared to the length of main_str less the
9406
     *                                     offset.</p>
9407
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9408
     *                                     insensitive.</p>
9409
     *
9410
     * @return int
9411
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9412
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9413
     *             <strong>0</strong> if they are equal
9414
     */
9415
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9416
    {
9417
        if (
9418 2
            $offset !== 0
9419
            ||
9420 2
            $length !== null
9421
        ) {
9422 2
            $str1Tmp = self::substr($str1, $offset, $length);
9423 2
            if ($str1Tmp === false) {
9424
                $str1Tmp = '';
9425
            }
9426 2
            $str1 = (string) $str1Tmp;
9427
9428 2
            $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9428
            $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9429 2
            if ($str2Tmp === false) {
9430
                $str2Tmp = '';
9431
            }
9432 2
            $str2 = (string) $str2Tmp;
9433
        }
9434
9435 2
        if ($case_insensitivity === true) {
9436 2
            return self::strcasecmp($str1, $str2);
9437
        }
9438
9439 2
        return self::strcmp($str1, $str2);
9440
    }
9441
9442
    /**
9443
     * Count the number of substring occurrences.
9444
     *
9445
     * @see  http://php.net/manual/en/function.substr-count.php
9446
     *
9447
     * @param string $haystack   <p>The string to search in.</p>
9448
     * @param string $needle     <p>The substring to search for.</p>
9449
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9450
     * @param int    $length     [optional] <p>
9451
     *                           The maximum length after the specified offset to search for the
9452
     *                           substring. It outputs a warning if the offset plus the length is
9453
     *                           greater than the haystack length.
9454
     *                           </p>
9455
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9456
     * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9457
     *
9458
     * @return false|int this functions returns an integer or false if there isn't a string
9459
     */
9460
    public static function substr_count(
9461
        string $haystack,
9462
        string $needle,
9463
        int $offset = 0,
9464
        int $length = null,
9465
        string $encoding = 'UTF-8',
9466
        bool $cleanUtf8 = false
9467
    ) {
9468 18
        if ($haystack === '' || $needle === '') {
9469 2
            return false;
9470
        }
9471
9472 18
        if ($offset || $length !== null) {
9473 2
            if ($length === null) {
9474 2
                $lengthTmp = self::strlen($haystack);
9475 2
                if ($lengthTmp === false) {
9476
                    return false;
9477
                }
9478 2
                $length = (int) $lengthTmp;
9479
            }
9480
9481
            if (
9482
                (
9483 2
                    $length !== 0
9484
                    &&
9485 2
                    $offset !== 0
9486
                )
9487
                &&
9488 2
                ($length + $offset) <= 0
9489
                &&
9490 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9491
            ) {
9492 2
                return false;
9493
            }
9494
9495 2
            $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9496 2
            if ($haystackTmp === false) {
9497
                $haystackTmp = '';
9498
            }
9499 2
            $haystack = (string) $haystackTmp;
9500
        }
9501
9502 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9503 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9504
        }
9505
9506 18
        if ($cleanUtf8 === true) {
9507
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9508
            // if invalid characters are found in $haystack before $needle
9509
            $needle = self::clean($needle);
9510
            $haystack = self::clean($haystack);
9511
        }
9512
9513 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9514
            self::checkForSupport();
9515
        }
9516
9517
        if (
9518 18
            $encoding !== 'UTF-8'
9519
            &&
9520 18
            self::$SUPPORT['mbstring'] === false
9521
        ) {
9522
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9523
        }
9524
9525 18
        if (self::$SUPPORT['mbstring'] === true) {
9526 18
            return \mb_substr_count($haystack, $needle, $encoding);
9527
        }
9528
9529
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9530
9531
        return \count($matches);
9532
    }
9533
9534
    /**
9535
     * Count the number of substring occurrences.
9536
     *
9537
     * @param string $haystack <p>
9538
     *                         The string being checked.
9539
     *                         </p>
9540
     * @param string $needle   <p>
9541
     *                         The string being found.
9542
     *                         </p>
9543
     * @param int    $offset   [optional] <p>
9544
     *                         The offset where to start counting
9545
     *                         </p>
9546
     * @param int    $length   [optional] <p>
9547
     *                         The maximum length after the specified offset to search for the
9548
     *                         substring. It outputs a warning if the offset plus the length is
9549
     *                         greater than the haystack length.
9550
     *                         </p>
9551
     *
9552
     * @return false|int the number of times the
9553
     *                   needle substring occurs in the
9554
     *                   haystack string
9555
     */
9556
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9557
    {
9558 36
        if ($haystack === '' || $needle === '') {
9559
            return 0;
9560
        }
9561
9562 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9563
            self::checkForSupport();
9564
        }
9565
9566
        if (
9567 36
            ($offset || $length !== null)
9568
            &&
9569 36
            self::$SUPPORT['mbstring_func_overload'] === true
9570
        ) {
9571
            if ($length === null) {
9572
                $lengthTmp = self::strlen($haystack);
9573
                if ($lengthTmp === false) {
9574
                    return false;
9575
                }
9576
                $length = (int) $lengthTmp;
9577
            }
9578
9579
            if (
9580
                (
9581
                    $length !== 0
9582
                    &&
9583
                    $offset !== 0
9584
                )
9585
                &&
9586
                ($length + $offset) <= 0
9587
                &&
9588
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9589
            ) {
9590
                return false;
9591
            }
9592
9593
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9594
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9595
                $haystackTmp = '';
9596
            }
9597
            $haystack = (string) $haystackTmp;
9598
        }
9599
9600 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9601
            // "mb_" is available if overload is used, so use it ...
9602
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9603
        }
9604
9605 36
        return \substr_count($haystack, $needle, $offset, $length);
9606
    }
9607
9608
    /**
9609
     * Returns the number of occurrences of $substring in the given string.
9610
     * By default, the comparison is case-sensitive, but can be made insensitive
9611
     * by setting $caseSensitive to false.
9612
     *
9613
     * @param string $str           <p>The input string.</p>
9614
     * @param string $substring     <p>The substring to search for.</p>
9615
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9616
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9617
     *
9618
     * @return int
9619
     */
9620
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9621
    {
9622 15
        if ($str === '' || $substring === '') {
9623 2
            return 0;
9624
        }
9625
9626
        // only a fallback to prevent BC in the api ...
9627 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9628 4
            $encoding = (string) $caseSensitive;
9629
        }
9630
9631 13
        if (!$caseSensitive) {
9632 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9633 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9634
        }
9635
9636 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9637
    }
9638
9639
    /**
9640
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9641
     *
9642
     * @param string $haystack <p>The string to search in.</p>
9643
     * @param string $needle   <p>The substring to search for.</p>
9644
     *
9645
     * @return string return the sub-string
9646
     */
9647
    public static function substr_ileft(string $haystack, string $needle): string
9648
    {
9649 2
        if ($haystack === '') {
9650 2
            return '';
9651
        }
9652
9653 2
        if ($needle === '') {
9654 2
            return $haystack;
9655
        }
9656
9657 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9658 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9658
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9659 2
            if ($haystackTmp === false) {
9660
                $haystackTmp = '';
9661
            }
9662 2
            $haystack = (string) $haystackTmp;
9663
        }
9664
9665 2
        return $haystack;
9666
    }
9667
9668
    /**
9669
     * Get part of a string process in bytes.
9670
     *
9671
     * @param string $str    <p>The string being checked.</p>
9672
     * @param int    $offset <p>The first position used in str.</p>
9673
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9674
     *
9675
     * @return false|string
9676
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9677
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9678
     *                      characters long, <b>FALSE</b> will be returned.
9679
     */
9680
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9681
    {
9682 51
        if ($str === '') {
9683
            return '';
9684
        }
9685
9686
        // Empty string
9687 51
        if ($length === 0) {
9688
            return '';
9689
        }
9690
9691
        // Whole string
9692 51
        if (!$offset && $length === null) {
9693
            return $str;
9694
        }
9695
9696 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9697
            self::checkForSupport();
9698
        }
9699
9700 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9701
            // "mb_" is available if overload is used, so use it ...
9702
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9703
        }
9704
9705 51
        return \substr($str, $offset, $length ?? 2147483647);
9706
    }
9707
9708
    /**
9709
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9710
     *
9711
     * @param string $haystack <p>The string to search in.</p>
9712
     * @param string $needle   <p>The substring to search for.</p>
9713
     *
9714
     * @return string return the sub-string
9715
     */
9716
    public static function substr_iright(string $haystack, string $needle): string
9717
    {
9718 2
        if ($haystack === '') {
9719 2
            return '';
9720
        }
9721
9722 2
        if ($needle === '') {
9723 2
            return $haystack;
9724
        }
9725
9726 2
        if (self::str_iends_with($haystack, $needle) === true) {
9727 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9728 2
            if ($haystackTmp === false) {
9729
                $haystackTmp = '';
9730
            }
9731 2
            $haystack = (string) $haystackTmp;
9732
        }
9733
9734 2
        return $haystack;
9735
    }
9736
9737
    /**
9738
     * Removes an prefix ($needle) from start of the string ($haystack).
9739
     *
9740
     * @param string $haystack <p>The string to search in.</p>
9741
     * @param string $needle   <p>The substring to search for.</p>
9742
     *
9743
     * @return string return the sub-string
9744
     */
9745
    public static function substr_left(string $haystack, string $needle): string
9746
    {
9747 2
        if ($haystack === '') {
9748 2
            return '';
9749
        }
9750
9751 2
        if ($needle === '') {
9752 2
            return $haystack;
9753
        }
9754
9755 2
        if (self::str_starts_with($haystack, $needle) === true) {
9756 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9756
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9757 2
            if ($haystackTmp === false) {
9758
                $haystackTmp = '';
9759
            }
9760 2
            $haystack = (string) $haystackTmp;
9761
        }
9762
9763 2
        return $haystack;
9764
    }
9765
9766
    /**
9767
     * Replace text within a portion of a string.
9768
     *
9769
     * source: https://gist.github.com/stemar/8287074
9770
     *
9771
     * @param string|string[] $str              <p>The input string or an array of stings.</p>
9772
     * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9773
     * @param int|int[]       $offset           <p>
9774
     *                                          If start is positive, the replacing will begin at the start'th offset
9775
     *                                          into string.
9776
     *                                          <br><br>
9777
     *                                          If start is negative, the replacing will begin at the start'th character
9778
     *                                          from the end of string.
9779
     *                                          </p>
9780
     * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9781
     *                                          portion of string which is to be replaced. If it is negative, it
9782
     *                                          represents the number of characters from the end of string at which to
9783
     *                                          stop replacing. If it is not given, then it will default to strlen(
9784
     *                                          string ); i.e. end the replacing at the end of string. Of course, if
9785
     *                                          length is zero then this function will have the effect of inserting
9786
     *                                          replacement into string at the given start offset.</p>
9787
     * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9788
     *
9789
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9790
     */
9791
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9792
    {
9793 10
        if (\is_array($str) === true) {
9794 1
            $num = \count($str);
9795
9796
            // the replacement
9797 1
            if (\is_array($replacement) === true) {
9798 1
                $replacement = \array_slice($replacement, 0, $num);
9799
            } else {
9800 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9801
            }
9802
9803
            // the offset
9804 1
            if (\is_array($offset) === true) {
9805 1
                $offset = \array_slice($offset, 0, $num);
9806 1
                foreach ($offset as &$valueTmp) {
9807 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9808
                }
9809 1
                unset($valueTmp);
9810
            } else {
9811 1
                $offset = \array_pad([$offset], $num, $offset);
9812
            }
9813
9814
            // the length
9815 1
            if ($length === null) {
9816 1
                $length = \array_fill(0, $num, 0);
9817 1
            } elseif (\is_array($length) === true) {
9818 1
                $length = \array_slice($length, 0, $num);
9819 1
                foreach ($length as &$valueTmpV2) {
9820 1
                    if ($valueTmpV2 !== null) {
9821 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9822
                    } else {
9823 1
                        $valueTmpV2 = 0;
9824
                    }
9825
                }
9826 1
                unset($valueTmpV2);
9827
            } else {
9828 1
                $length = \array_pad([$length], $num, $length);
9829
            }
9830
9831
            // recursive call
9832 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9833
        }
9834
9835 10
        if (\is_array($replacement) === true) {
9836 1
            if (\count($replacement) > 0) {
9837 1
                $replacement = $replacement[0];
9838
            } else {
9839 1
                $replacement = '';
9840
            }
9841
        }
9842
9843
        // init
9844 10
        $str = (string) $str;
9845 10
        $replacement = (string) $replacement;
9846
9847 10
        if ($str === '') {
9848 1
            return $replacement;
9849
        }
9850
9851 9
        if (self::is_ascii($str)) {
9852 6
            return ($length === null) ?
9853
                \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9853
                \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9854 6
                \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9854
                \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9855
        }
9856
9857 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9858
            self::checkForSupport();
9859
        }
9860
9861 8
        if (self::$SUPPORT['mbstring'] === true) {
9862 8
            $string_length = self::strlen($str, $encoding);
9863
9864 8
            if ($offset < 0) {
9865 1
                $offset = \max(0, $string_length + $offset);
9866 8
            } elseif ($offset > $string_length) {
9867
                $offset = $string_length;
9868
            }
9869
9870 8
            if ($length < 0) {
9871 1
                $length = \max(0, $string_length - $offset + $length);
9872 8
            } elseif ($length === null || $length > $string_length) {
9873 3
                $length = $string_length;
9874
            }
9875
9876 8
            if (($offset + $length) > $string_length) {
9877 3
                $length = $string_length - $offset;
9878
            }
9879
9880 8
            return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9880
            return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9880
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9881
        }
9882
9883
        \preg_match_all('/./us', $str, $smatches);
9884
        \preg_match_all('/./us', $replacement, $rmatches);
9885
9886
        if ($length === null) {
9887
            $lengthTmp = self::strlen($str, $encoding);
9888
            if ($lengthTmp === false) {
9889
                // e.g.: non mbstring support + invalid chars
9890
                return '';
9891
            }
9892
            $length = (int) $lengthTmp;
9893
        }
9894
9895
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9895
        \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9895
        \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
9896
9897
        return \implode('', $smatches[0]);
9898
    }
9899
9900
    /**
9901
     * Removes an suffix ($needle) from end of the string ($haystack).
9902
     *
9903
     * @param string $haystack <p>The string to search in.</p>
9904
     * @param string $needle   <p>The substring to search for.</p>
9905
     *
9906
     * @return string return the sub-string
9907
     */
9908
    public static function substr_right(string $haystack, string $needle): string
9909
    {
9910 2
        if ($haystack === '') {
9911 2
            return '';
9912
        }
9913
9914 2
        if ($needle === '') {
9915 2
            return $haystack;
9916
        }
9917
9918 2
        if (self::str_ends_with($haystack, $needle) === true) {
9919 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9920 2
            if ($haystackTmp === false) {
9921
                $haystackTmp = '';
9922
            }
9923 2
            $haystack = (string) $haystackTmp;
9924
        }
9925
9926 2
        return $haystack;
9927
    }
9928
9929
    /**
9930
     * Returns a case swapped version of the string.
9931
     *
9932
     * @param string $str       <p>The input string.</p>
9933
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9934
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9935
     *
9936
     * @return string each character's case swapped
9937
     */
9938
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9939
    {
9940 6
        if ($str === '') {
9941 1
            return '';
9942
        }
9943
9944 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9945 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9946
        }
9947
9948 6
        if ($cleanUtf8 === true) {
9949
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9950
            // if invalid characters are found in $haystack before $needle
9951 2
            $str = self::clean($str);
9952
        }
9953
9954 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9955
    }
9956
9957
    /**
9958
     * Checks whether mbstring is available on the server.
9959
     *
9960
     * @return bool
9961
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
9962
     */
9963
    public static function symfony_polyfill_used(): bool
9964
    {
9965
        // init
9966
        $return = false;
9967
9968
        $returnTmp = \extension_loaded('mbstring') ? true : false;
9969
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9970
            $return = true;
9971
        }
9972
9973
        $returnTmp = \extension_loaded('iconv') ? true : false;
9974
        if ($returnTmp === false && \function_exists('iconv')) {
9975
            $return = true;
9976
        }
9977
9978
        return $return;
9979
    }
9980
9981
    /**
9982
     * @param string $str
9983
     * @param int    $tabLength
9984
     *
9985
     * @return string
9986
     */
9987
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9988
    {
9989 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9990
    }
9991
9992
    /**
9993
     * Converts the first character of each word in the string to uppercase
9994
     * and all other chars to lowercase.
9995
     *
9996
     * @param string      $str                   <p>The input string.</p>
9997
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9998
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9999
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10000
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10001
     *
10002
     * @return string string with all characters of $str being title-cased
10003
     */
10004
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10005
    {
10006 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10007 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10008
        }
10009
10010 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
10011
    }
10012
10013
    /**
10014
     * alias for "UTF8::to_ascii()"
10015
     *
10016
     * @see        UTF8::to_ascii()
10017
     *
10018
     * @param string $str
10019
     * @param string $subst_chr
10020
     * @param bool   $strict
10021
     *
10022
     * @return string
10023
     *
10024
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10025
     */
10026
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10027
    {
10028 7
        return self::to_ascii($str, $subst_chr, $strict);
10029
    }
10030
10031
    /**
10032
     * alias for "UTF8::to_iso8859()"
10033
     *
10034
     * @see        UTF8::to_iso8859()
10035
     *
10036
     * @param string|string[] $str
10037
     *
10038
     * @return string|string[]
10039
     *
10040
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10041
     */
10042
    public static function toIso8859($str)
10043
    {
10044 2
        return self::to_iso8859($str);
10045
    }
10046
10047
    /**
10048
     * alias for "UTF8::to_latin1()"
10049
     *
10050
     * @see        UTF8::to_latin1()
10051
     *
10052
     * @param string|string[] $str
10053
     *
10054
     * @return string|string[]
10055
     *
10056
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10057
     */
10058
    public static function toLatin1($str)
10059
    {
10060 2
        return self::to_latin1($str);
10061
    }
10062
10063
    /**
10064
     * alias for "UTF8::to_utf8()"
10065
     *
10066
     * @see        UTF8::to_utf8()
10067
     *
10068
     * @param string|string[] $str
10069
     *
10070
     * @return string|string[]
10071
     *
10072
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10073
     */
10074
    public static function toUTF8($str)
10075
    {
10076 2
        return self::to_utf8($str);
10077
    }
10078
10079
    /**
10080
     * Convert a string into ASCII.
10081
     *
10082
     * @param string $str     <p>The input string.</p>
10083
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10084
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10085
     *                        performance</p>
10086
     *
10087
     * @return string
10088
     */
10089
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10090
    {
10091 38
        static $UTF8_TO_ASCII;
10092
10093 38
        if ($str === '') {
10094 3
            return '';
10095
        }
10096
10097
        // check if we only have ASCII, first (better performance)
10098 35
        if (self::is_ascii($str) === true) {
10099 9
            return $str;
10100
        }
10101
10102 28
        $str = self::clean(
10103 28
            $str,
10104 28
            true,
10105 28
            true,
10106 28
            true,
10107 28
            false,
10108 28
            true,
10109 28
            true
10110
        );
10111
10112
        // check again, if we only have ASCII, now ...
10113 28
        if (self::is_ascii($str) === true) {
10114 10
            return $str;
10115
        }
10116
10117 19
        if ($strict === true) {
10118 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10119
                self::checkForSupport();
10120
            }
10121
10122 1
            if (self::$SUPPORT['intl'] === true) {
10123
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10124
                /** @noinspection PhpComposerExtensionStubsInspection */
10125 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10126
10127
                // check again, if we only have ASCII, now ...
10128 1
                if (self::is_ascii($str) === true) {
10129 1
                    return $str;
10130
                }
10131
            }
10132
        }
10133
10134 19
        if (self::$ORD === null) {
10135
            self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10136
        }
10137
10138 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10139 19
        $chars = $ar[0];
10140 19
        $ord = null;
10141 19
        foreach ($chars as &$c) {
10142 19
            $ordC0 = self::$ORD[$c[0]];
10143
10144 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
10145 15
                continue;
10146
            }
10147
10148 19
            $ordC1 = self::$ORD[$c[1]];
10149
10150
            // ASCII - next please
10151 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
10152 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10153
            }
10154
10155 19
            if ($ordC0 >= 224) {
10156 8
                $ordC2 = self::$ORD[$c[2]];
10157
10158 8
                if ($ordC0 <= 239) {
10159 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10160
                }
10161
10162 8
                if ($ordC0 >= 240) {
10163 2
                    $ordC3 = self::$ORD[$c[3]];
10164
10165 2
                    if ($ordC0 <= 247) {
10166 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10167
                    }
10168
10169 2
                    if ($ordC0 >= 248) {
10170
                        $ordC4 = self::$ORD[$c[4]];
10171
10172
                        if ($ordC0 <= 251) {
10173
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10174
                        }
10175
10176
                        if ($ordC0 >= 252) {
10177
                            $ordC5 = self::$ORD[$c[5]];
10178
10179
                            if ($ordC0 <= 253) {
10180
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10181
                            }
10182
                        }
10183
                    }
10184
                }
10185
            }
10186
10187 19
            if ($ordC0 === 254 || $ordC0 === 255) {
10188
                $c = $unknown;
10189
10190
                continue;
10191
            }
10192
10193 19
            if ($ord === null) {
10194
                $c = $unknown;
10195
10196
                continue;
10197
            }
10198
10199 19
            $bank = $ord >> 8;
10200 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
10201 9
                $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10202 9
                if ($UTF8_TO_ASCII[$bank] === false) {
10203 2
                    $UTF8_TO_ASCII[$bank] = [];
10204
                }
10205
            }
10206
10207 19
            $newchar = $ord & 255;
10208
10209 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10210
10211
                // keep for debugging
10212
                /*
10213
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10214
                echo "char: " . $c . "\n";
10215
                echo "ord: " . $ord . "\n";
10216
                echo "newchar: " . $newchar . "\n";
10217
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10218
                echo "bank:" . $bank . "\n\n";
10219
                 */
10220
10221 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
10222
            } else {
10223
10224
                // keep for debugging missing chars
10225
                /*
10226
                echo "file: " . sprintf('x%02x', $bank) . "\n";
10227
                echo "char: " . $c . "\n";
10228
                echo "ord: " . $ord . "\n";
10229
                echo "newchar: " . $newchar . "\n";
10230
                echo "bank:" . $bank . "\n\n";
10231
                 */
10232
10233 19
                $c = $unknown;
10234
            }
10235
        }
10236
10237 19
        return \implode('', $chars);
10238
    }
10239
10240
    /**
10241
     * @param mixed $str
10242
     *
10243
     * @return bool
10244
     */
10245
    public static function to_boolean($str): bool
10246
    {
10247
        // init
10248 19
        $str = (string) $str;
10249
10250 19
        if ($str === '') {
10251 2
            return false;
10252
        }
10253
10254 17
        $key = \strtolower($str);
10255
10256
        // Info: http://php.net/manual/en/filter.filters.validate.php
10257
        $map = [
10258 17
            'true'  => true,
10259
            '1'     => true,
10260
            'on'    => true,
10261
            'yes'   => true,
10262
            'false' => false,
10263
            '0'     => false,
10264
            'off'   => false,
10265
            'no'    => false,
10266
        ];
10267
10268 17
        if (isset($map[$key])) {
10269 13
            return $map[$key];
10270
        }
10271
10272
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10273 4
        if (\is_numeric($str)) {
10274 2
            return ((float) $str + 0) > 0;
10275
        }
10276
10277 2
        return (bool) self::trim($str);
10278
    }
10279
10280
    /**
10281
     * Convert given string to safe filename (and keep string case).
10282
     *
10283
     * @param string $string
10284
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10285
     *                                  simply replaced with hyphen.
10286
     * @param string $fallback_char
10287
     *
10288
     * @return string
10289
     */
10290
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10291
    {
10292 1
        if ($use_transliterate === true) {
10293 1
            $string = self::str_transliterate($string, $fallback_char);
10294
        }
10295
10296 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
10297
10298 1
        $string = (string) \preg_replace(
10299
            [
10300 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10301 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10302 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10303
            ],
10304
            [
10305 1
                '',
10306 1
                $fallback_char,
10307 1
                $fallback_char,
10308
            ],
10309 1
            $string
10310
        );
10311
10312
        // trim "$fallback_char" from beginning and end of the string
10313 1
        return \trim($string, $fallback_char);
10314
    }
10315
10316
    /**
10317
     * Convert a string into "ISO-8859"-encoding (Latin-1).
10318
     *
10319
     * @param string|string[] $str
10320
     *
10321
     * @return string|string[]
10322
     */
10323
    public static function to_iso8859($str)
10324
    {
10325 7
        if (\is_array($str) === true) {
10326 2
            foreach ($str as $k => $v) {
10327 2
                $str[$k] = self::to_iso8859($v);
10328
            }
10329
10330 2
            return $str;
10331
        }
10332
10333 7
        $str = (string) $str;
10334 7
        if ($str === '') {
10335 2
            return '';
10336
        }
10337
10338 7
        return self::utf8_decode($str);
10339
    }
10340
10341
    /**
10342
     * alias for "UTF8::to_iso8859()"
10343
     *
10344
     * @see UTF8::to_iso8859()
10345
     *
10346
     * @param string|string[] $str
10347
     *
10348
     * @return string|string[]
10349
     */
10350
    public static function to_latin1($str)
10351
    {
10352 2
        return self::to_iso8859($str);
10353
    }
10354
10355
    /**
10356
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10357
     *
10358
     * <ul>
10359
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10360
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10361
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10362
     * case.</li>
10363
     * </ul>
10364
     *
10365
     * @param string|string[] $str                    <p>Any string or array.</p>
10366
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10367
     *
10368
     * @return string|string[] the UTF-8 encoded string
10369
     */
10370
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10371
    {
10372 37
        if (\is_array($str) === true) {
10373 4
            foreach ($str as $k => $v) {
10374 4
                $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10375
            }
10376
10377 4
            return $str;
10378
        }
10379
10380 37
        $str = (string) $str;
10381 37
        if ($str === '') {
10382 6
            return $str;
10383
        }
10384
10385 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10386
            self::checkForSupport();
10387
        }
10388
10389 37
        $max = self::strlen_in_byte($str);
10390 37
        $buf = '';
10391
10392
        /** @noinspection ForeachInvariantsInspection */
10393 37
        for ($i = 0; $i < $max; $i++) {
10394 37
            $c1 = $str[$i];
10395
10396 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10397
10398 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10399
10400 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10401
10402 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10403 17
                        $buf .= $c1 . $c2;
10404 17
                        $i++;
10405
                    } else { // not valid UTF8 - convert it
10406 31
                        $buf .= self::to_utf8_convert_helper($c1);
10407
                    }
10408 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10409
10410 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10411 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10412
10413 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10414 14
                        $buf .= $c1 . $c2 . $c3;
10415 14
                        $i += 2;
10416
                    } else { // not valid UTF8 - convert it
10417 32
                        $buf .= self::to_utf8_convert_helper($c1);
10418
                    }
10419 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10420
10421 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10422 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10423 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10424
10425 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10426 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10427 8
                        $i += 3;
10428
                    } else { // not valid UTF8 - convert it
10429 26
                        $buf .= self::to_utf8_convert_helper($c1);
10430
                    }
10431
                } else { // doesn't look like UTF8, but should be converted
10432 34
                    $buf .= self::to_utf8_convert_helper($c1);
10433
                }
10434 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10435
10436 4
                $buf .= self::to_utf8_convert_helper($c1);
10437
            } else { // it doesn't need conversion
10438 34
                $buf .= $c1;
10439
            }
10440
        }
10441
10442
        // decode unicode escape sequences
10443 37
        $buf = \preg_replace_callback(
10444 37
            '/\\\\u([0-9a-f]{4})/i',
10445
            function ($match) {
10446
                // always fallback via symfony polyfill
10447 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10448 37
            },
10449 37
            $buf
10450
        );
10451
10452
        // decode UTF-8 codepoints
10453 37
        if ($decodeHtmlEntityToUtf8 === true) {
10454 2
            $buf = self::html_entity_decode($buf);
10455
        }
10456
10457 37
        return $buf;
10458
    }
10459
10460
    /**
10461
     * @param int|string $input
10462
     *
10463
     * @return string
10464
     */
10465
    private static function to_utf8_convert_helper($input): string
10466
    {
10467
        // init
10468 30
        $buf = '';
10469
10470 30
        if (self::$ORD === null) {
10471 1
            self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10472
        }
10473
10474 30
        if (self::$CHR === null) {
10475 1
            self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10476
        }
10477
10478 30
        if (self::$WIN1252_TO_UTF8 === null) {
10479 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10480
        }
10481
10482 30
        $ordC1 = self::$ORD[$input];
10483 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10484 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10485
        } else {
10486 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10487 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10488 2
            $buf .= $cc1 . $cc2;
10489
        }
10490
10491 30
        return $buf;
10492
    }
10493
10494
    /**
10495
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10496
     *
10497
     * INFO: This is slower then "trim()"
10498
     *
10499
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10500
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10501
     *
10502
     * @param string $str   <p>The string to be trimmed</p>
10503
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10504
     *
10505
     * @return string the trimmed string
10506
     */
10507
    public static function trim(string $str = '', $chars = \INF): string
10508
    {
10509 214
        if ($str === '') {
10510 11
            return '';
10511
        }
10512
10513
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10514 206
        if ($chars === \INF || !$chars) {
10515 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10516
        } else {
10517 47
            $chars = \preg_quote($chars, '/');
10518 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10519
        }
10520
10521 206
        return self::regex_replace($str, $pattern, '', '', '/');
10522
    }
10523
10524
    /**
10525
     * Makes string's first char uppercase.
10526
     *
10527
     * @param string      $str                   <p>The input string.</p>
10528
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10529
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10530
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10531
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10532
     *
10533
     * @return string the resulting string
10534
     */
10535
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10536
    {
10537 79
        if ($cleanUtf8 === true) {
10538
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10539
            // if invalid characters are found in $haystack before $needle
10540 1
            $str = self::clean($str);
10541
        }
10542
10543 79
        $strPartTwo = self::substr($str, 1, null, $encoding);
10544 79
        if ($strPartTwo === false) {
10545
            $strPartTwo = '';
10546
        }
10547
10548 79
        $strPartOne = self::strtoupper(
10549 79
            (string) self::substr($str, 0, 1, $encoding),
10550 79
            $encoding,
10551 79
            $cleanUtf8,
10552 79
            $lang,
10553 79
            $tryToKeepStringLength
10554
        );
10555
10556 79
        return $strPartOne . $strPartTwo;
10557
    }
10558
10559
    /**
10560
     * alias for "UTF8::ucfirst()"
10561
     *
10562
     * @see UTF8::ucfirst()
10563
     *
10564
     * @param string $str
10565
     * @param string $encoding
10566
     * @param bool   $cleanUtf8
10567
     *
10568
     * @return string
10569
     */
10570
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10571
    {
10572 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10573
    }
10574
10575
    /**
10576
     * Uppercase for all words in the string.
10577
     *
10578
     * @param string   $str        <p>The input string.</p>
10579
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10580
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10581
     *                             word.</p>
10582
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10583
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10584
     *
10585
     * @return string
10586
     */
10587
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10588
    {
10589 8
        if (!$str) {
10590 2
            return '';
10591
        }
10592
10593
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10594
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10595
10596 7
        if ($cleanUtf8 === true) {
10597
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10598
            // if invalid characters are found in $haystack before $needle
10599 1
            $str = self::clean($str);
10600
        }
10601
10602 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10603
10604
        if (
10605 7
            $usePhpDefaultFunctions === true
10606
            &&
10607 7
            self::is_ascii($str) === true
10608
        ) {
10609
            return \ucwords($str);
10610
        }
10611
10612 7
        $words = self::str_to_words($str, $charlist);
10613 7
        $newWords = [];
10614
10615 7
        if (\count($exceptions) > 0) {
10616 1
            $useExceptions = true;
10617
        } else {
10618 7
            $useExceptions = false;
10619
        }
10620
10621 7
        foreach ($words as $word) {
10622 7
            if (!$word) {
10623 7
                continue;
10624
            }
10625
10626
            if (
10627 7
                $useExceptions === false
10628
                ||
10629
                (
10630 1
                    $useExceptions === true
10631
                    &&
10632 7
                    !\in_array($word, $exceptions, true)
10633
                )
10634
            ) {
10635 7
                $word = self::ucfirst($word, $encoding);
10636
            }
10637
10638 7
            $newWords[] = $word;
10639
        }
10640
10641 7
        return \implode('', $newWords);
10642
    }
10643
10644
    /**
10645
     * Multi decode html entity & fix urlencoded-win1252-chars.
10646
     *
10647
     * e.g:
10648
     * 'test+test'                     => 'test test'
10649
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10650
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10651
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10652
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10653
     * 'Düsseldorf'                   => 'Düsseldorf'
10654
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10655
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10656
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10657
     *
10658
     * @param string $str          <p>The input string.</p>
10659
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10660
     *
10661
     * @return string
10662
     */
10663
    public static function urldecode(string $str, bool $multi_decode = true): string
10664
    {
10665 2
        if ($str === '') {
10666 2
            return '';
10667
        }
10668
10669 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10670 2
        if (\preg_match($pattern, $str)) {
10671 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10672
        }
10673
10674 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10675
10676
        do {
10677 2
            $str_compare = $str;
10678
10679 2
            $str = self::fix_simple_utf8(
10680 2
                \urldecode(
10681 2
                    self::html_entity_decode(
10682 2
                        self::to_utf8($str),
10683 2
                        $flags
10684
                    )
10685
                )
10686
            );
10687 2
        } while ($multi_decode === true && $str_compare !== $str);
10688
10689 2
        return $str;
10690
    }
10691
10692
    /**
10693
     * Return a array with "urlencoded"-win1252 -> UTF-8
10694
     *
10695
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10696
     *
10697
     * @return string[]
10698
     */
10699
    public static function urldecode_fix_win1252_chars(): array
10700
    {
10701
        return [
10702 2
            '%20' => ' ',
10703
            '%21' => '!',
10704
            '%22' => '"',
10705
            '%23' => '#',
10706
            '%24' => '$',
10707
            '%25' => '%',
10708
            '%26' => '&',
10709
            '%27' => "'",
10710
            '%28' => '(',
10711
            '%29' => ')',
10712
            '%2A' => '*',
10713
            '%2B' => '+',
10714
            '%2C' => ',',
10715
            '%2D' => '-',
10716
            '%2E' => '.',
10717
            '%2F' => '/',
10718
            '%30' => '0',
10719
            '%31' => '1',
10720
            '%32' => '2',
10721
            '%33' => '3',
10722
            '%34' => '4',
10723
            '%35' => '5',
10724
            '%36' => '6',
10725
            '%37' => '7',
10726
            '%38' => '8',
10727
            '%39' => '9',
10728
            '%3A' => ':',
10729
            '%3B' => ';',
10730
            '%3C' => '<',
10731
            '%3D' => '=',
10732
            '%3E' => '>',
10733
            '%3F' => '?',
10734
            '%40' => '@',
10735
            '%41' => 'A',
10736
            '%42' => 'B',
10737
            '%43' => 'C',
10738
            '%44' => 'D',
10739
            '%45' => 'E',
10740
            '%46' => 'F',
10741
            '%47' => 'G',
10742
            '%48' => 'H',
10743
            '%49' => 'I',
10744
            '%4A' => 'J',
10745
            '%4B' => 'K',
10746
            '%4C' => 'L',
10747
            '%4D' => 'M',
10748
            '%4E' => 'N',
10749
            '%4F' => 'O',
10750
            '%50' => 'P',
10751
            '%51' => 'Q',
10752
            '%52' => 'R',
10753
            '%53' => 'S',
10754
            '%54' => 'T',
10755
            '%55' => 'U',
10756
            '%56' => 'V',
10757
            '%57' => 'W',
10758
            '%58' => 'X',
10759
            '%59' => 'Y',
10760
            '%5A' => 'Z',
10761
            '%5B' => '[',
10762
            '%5C' => '\\',
10763
            '%5D' => ']',
10764
            '%5E' => '^',
10765
            '%5F' => '_',
10766
            '%60' => '`',
10767
            '%61' => 'a',
10768
            '%62' => 'b',
10769
            '%63' => 'c',
10770
            '%64' => 'd',
10771
            '%65' => 'e',
10772
            '%66' => 'f',
10773
            '%67' => 'g',
10774
            '%68' => 'h',
10775
            '%69' => 'i',
10776
            '%6A' => 'j',
10777
            '%6B' => 'k',
10778
            '%6C' => 'l',
10779
            '%6D' => 'm',
10780
            '%6E' => 'n',
10781
            '%6F' => 'o',
10782
            '%70' => 'p',
10783
            '%71' => 'q',
10784
            '%72' => 'r',
10785
            '%73' => 's',
10786
            '%74' => 't',
10787
            '%75' => 'u',
10788
            '%76' => 'v',
10789
            '%77' => 'w',
10790
            '%78' => 'x',
10791
            '%79' => 'y',
10792
            '%7A' => 'z',
10793
            '%7B' => '{',
10794
            '%7C' => '|',
10795
            '%7D' => '}',
10796
            '%7E' => '~',
10797
            '%7F' => '',
10798
            '%80' => '`',
10799
            '%81' => '',
10800
            '%82' => '‚',
10801
            '%83' => 'ƒ',
10802
            '%84' => '„',
10803
            '%85' => '…',
10804
            '%86' => '†',
10805
            '%87' => '‡',
10806
            '%88' => 'ˆ',
10807
            '%89' => '‰',
10808
            '%8A' => 'Š',
10809
            '%8B' => '‹',
10810
            '%8C' => 'Œ',
10811
            '%8D' => '',
10812
            '%8E' => 'Ž',
10813
            '%8F' => '',
10814
            '%90' => '',
10815
            '%91' => '‘',
10816
            '%92' => '’',
10817
            '%93' => '“',
10818
            '%94' => '”',
10819
            '%95' => '•',
10820
            '%96' => '–',
10821
            '%97' => '—',
10822
            '%98' => '˜',
10823
            '%99' => '™',
10824
            '%9A' => 'š',
10825
            '%9B' => '›',
10826
            '%9C' => 'œ',
10827
            '%9D' => '',
10828
            '%9E' => 'ž',
10829
            '%9F' => 'Ÿ',
10830
            '%A0' => '',
10831
            '%A1' => '¡',
10832
            '%A2' => '¢',
10833
            '%A3' => '£',
10834
            '%A4' => '¤',
10835
            '%A5' => '¥',
10836
            '%A6' => '¦',
10837
            '%A7' => '§',
10838
            '%A8' => '¨',
10839
            '%A9' => '©',
10840
            '%AA' => 'ª',
10841
            '%AB' => '«',
10842
            '%AC' => '¬',
10843
            '%AD' => '',
10844
            '%AE' => '®',
10845
            '%AF' => '¯',
10846
            '%B0' => '°',
10847
            '%B1' => '±',
10848
            '%B2' => '²',
10849
            '%B3' => '³',
10850
            '%B4' => '´',
10851
            '%B5' => 'µ',
10852
            '%B6' => '¶',
10853
            '%B7' => '·',
10854
            '%B8' => '¸',
10855
            '%B9' => '¹',
10856
            '%BA' => 'º',
10857
            '%BB' => '»',
10858
            '%BC' => '¼',
10859
            '%BD' => '½',
10860
            '%BE' => '¾',
10861
            '%BF' => '¿',
10862
            '%C0' => 'À',
10863
            '%C1' => 'Á',
10864
            '%C2' => 'Â',
10865
            '%C3' => 'Ã',
10866
            '%C4' => 'Ä',
10867
            '%C5' => 'Å',
10868
            '%C6' => 'Æ',
10869
            '%C7' => 'Ç',
10870
            '%C8' => 'È',
10871
            '%C9' => 'É',
10872
            '%CA' => 'Ê',
10873
            '%CB' => 'Ë',
10874
            '%CC' => 'Ì',
10875
            '%CD' => 'Í',
10876
            '%CE' => 'Î',
10877
            '%CF' => 'Ï',
10878
            '%D0' => 'Ð',
10879
            '%D1' => 'Ñ',
10880
            '%D2' => 'Ò',
10881
            '%D3' => 'Ó',
10882
            '%D4' => 'Ô',
10883
            '%D5' => 'Õ',
10884
            '%D6' => 'Ö',
10885
            '%D7' => '×',
10886
            '%D8' => 'Ø',
10887
            '%D9' => 'Ù',
10888
            '%DA' => 'Ú',
10889
            '%DB' => 'Û',
10890
            '%DC' => 'Ü',
10891
            '%DD' => 'Ý',
10892
            '%DE' => 'Þ',
10893
            '%DF' => 'ß',
10894
            '%E0' => 'à',
10895
            '%E1' => 'á',
10896
            '%E2' => 'â',
10897
            '%E3' => 'ã',
10898
            '%E4' => 'ä',
10899
            '%E5' => 'å',
10900
            '%E6' => 'æ',
10901
            '%E7' => 'ç',
10902
            '%E8' => 'è',
10903
            '%E9' => 'é',
10904
            '%EA' => 'ê',
10905
            '%EB' => 'ë',
10906
            '%EC' => 'ì',
10907
            '%ED' => 'í',
10908
            '%EE' => 'î',
10909
            '%EF' => 'ï',
10910
            '%F0' => 'ð',
10911
            '%F1' => 'ñ',
10912
            '%F2' => 'ò',
10913
            '%F3' => 'ó',
10914
            '%F4' => 'ô',
10915
            '%F5' => 'õ',
10916
            '%F6' => 'ö',
10917
            '%F7' => '÷',
10918
            '%F8' => 'ø',
10919
            '%F9' => 'ù',
10920
            '%FA' => 'ú',
10921
            '%FB' => 'û',
10922
            '%FC' => 'ü',
10923
            '%FD' => 'ý',
10924
            '%FE' => 'þ',
10925
            '%FF' => 'ÿ',
10926
        ];
10927
    }
10928
10929
    /**
10930
     * Decodes an UTF-8 string to ISO-8859-1.
10931
     *
10932
     * @param string $str <p>The input string.</p>
10933
     * @param bool   $keepUtf8Chars
10934
     *
10935
     * @return string
10936
     */
10937
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10938
    {
10939 13
        if ($str === '') {
10940 5
            return '';
10941
        }
10942
10943 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10944 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10945
10946 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10947 1
            if (self::$WIN1252_TO_UTF8 === null) {
10948
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10949
            }
10950
10951 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10951
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10952 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10952
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10953
        }
10954
10955
        /** @noinspection PhpInternalEntityUsedInspection */
10956 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10957
10958 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10959
            self::checkForSupport();
10960
        }
10961
10962
        // save for later comparision
10963 13
        $str_backup = $str;
10964 13
        $len = self::strlen_in_byte($str);
10965
10966 13
        if (self::$ORD === null) {
10967
            self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10968
        }
10969
10970 13
        if (self::$CHR === null) {
10971
            self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10972
        }
10973
10974 13
        $noCharFound = '?';
10975
        /** @noinspection ForeachInvariantsInspection */
10976 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10977 13
            switch ($str[$i] & "\xF0") {
10978 13
                case "\xC0":
10979 12
                case "\xD0":
10980 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10981 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10982
10983 13
                    break;
10984
10985
                /** @noinspection PhpMissingBreakStatementInspection */
10986 12
                case "\xF0":
10987
                    ++$i;
10988
                // no break
10989 12
                case "\xE0":
10990 10
                    $str[$j] = $noCharFound;
10991 10
                    $i += 2;
10992
10993 10
                    break;
10994
10995
                default:
10996 12
                    $str[$j] = $str[$i];
10997
            }
10998
        }
10999
11000 13
        $return = self::substr_in_byte($str, 0, $j);
11001 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
11002
            $return = '';
11003
        }
11004
11005
        if (
11006 13
            $keepUtf8Chars === true
11007
            &&
11008 13
            self::strlen($return) >= self::strlen($str_backup)
11009
        ) {
11010 2
            return $str_backup;
11011
        }
11012
11013 13
        return $return;
11014
    }
11015
11016
    /**
11017
     * Encodes an ISO-8859-1 string to UTF-8.
11018
     *
11019
     * @param string $str <p>The input string.</p>
11020
     *
11021
     * @return string
11022
     */
11023
    public static function utf8_encode(string $str): string
11024
    {
11025 14
        if ($str === '') {
11026 13
            return '';
11027
        }
11028
11029 14
        $str = \utf8_encode($str);
11030
11031
        // the polyfill maybe return false
11032
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11033 14
        if ($str === false) {
11034
            return '';
11035
        }
11036
11037 14
        if (\strpos($str, "\xC2") === false) {
11038 6
            return $str;
11039
        }
11040
11041 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11042 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11043
11044 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11045 1
            if (self::$WIN1252_TO_UTF8 === null) {
11046
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11047
            }
11048
11049 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11049
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11050 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11050
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11051
        }
11052
11053 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11054
    }
11055
11056
    /**
11057
     * fix -> utf8-win1252 chars
11058
     *
11059
     * @param string $str <p>The input string.</p>
11060
     *
11061
     * @return string
11062
     *
11063
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11064
     */
11065
    public static function utf8_fix_win1252_chars(string $str): string
11066
    {
11067 2
        return self::fix_simple_utf8($str);
11068
    }
11069
11070
    /**
11071
     * Returns an array with all utf8 whitespace characters.
11072
     *
11073
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11074
     *
11075
     * @author: Derek E. [email protected]
11076
     *
11077
     * @return string[]
11078
     *                 An array with all known whitespace characters as values and the type of whitespace as keys
11079
     *                 as defined in above URL
11080
     */
11081
    public static function whitespace_table(): array
11082
    {
11083 2
        return self::$WHITESPACE_TABLE;
11084
    }
11085
11086
    /**
11087
     * Limit the number of words in a string.
11088
     *
11089
     * @param string $str      <p>The input string.</p>
11090
     * @param int    $limit    <p>The limit of words as integer.</p>
11091
     * @param string $strAddOn <p>Replacement for the striped string.</p>
11092
     *
11093
     * @return string
11094
     */
11095
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11096
    {
11097 2
        if ($str === '') {
11098 2
            return '';
11099
        }
11100
11101 2
        if ($limit < 1) {
11102 2
            return '';
11103
        }
11104
11105 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11106
11107
        if (
11108 2
            !isset($matches[0])
11109
            ||
11110 2
            self::strlen($str) === self::strlen($matches[0])
11111
        ) {
11112 2
            return $str;
11113
        }
11114
11115 2
        return self::rtrim($matches[0]) . $strAddOn;
11116
    }
11117
11118
    /**
11119
     * Wraps a string to a given number of characters
11120
     *
11121
     * @see  http://php.net/manual/en/function.wordwrap.php
11122
     *
11123
     * @param string $str   <p>The input string.</p>
11124
     * @param int    $width [optional] <p>The column width.</p>
11125
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11126
     * @param bool   $cut   [optional] <p>
11127
     *                      If the cut is set to true, the string is
11128
     *                      always wrapped at or before the specified width. So if you have
11129
     *                      a word that is larger than the given width, it is broken apart.
11130
     *                      </p>
11131
     *
11132
     * @return string the given string wrapped at the specified column
11133
     */
11134
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11135
    {
11136 10
        if ($str === '' || $break === '') {
11137 3
            return '';
11138
        }
11139
11140 8
        $w = '';
11141 8
        $strSplit = \explode($break, $str);
11142 8
        if ($strSplit === false) {
11143
            $count = 0;
11144
        } else {
11145 8
            $count = \count($strSplit);
11146
        }
11147
11148 8
        $chars = [];
11149
        /** @noinspection ForeachInvariantsInspection */
11150 8
        for ($i = 0; $i < $count; ++$i) {
11151 8
            if ($i) {
11152 1
                $chars[] = $break;
11153 1
                $w .= '#';
11154
            }
11155
11156 8
            $c = $strSplit[$i];
11157 8
            unset($strSplit[$i]);
11158
11159 8
            if ($c !== null) {
11160 8
                foreach (self::split($c) as $c) {
11161 8
                    $chars[] = $c;
11162 8
                    $w .= $c === ' ' ? ' ' : '?';
11163
                }
11164
            }
11165
        }
11166
11167 8
        $strReturn = '';
11168 8
        $j = 0;
11169 8
        $b = $i = -1;
11170 8
        $w = \wordwrap($w, $width, '#', $cut);
11171
11172 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
11173 6
            for (++$i; $i < $b; ++$i) {
11174 6
                $strReturn .= $chars[$j];
11175 6
                unset($chars[$j++]);
11176
            }
11177
11178 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
11179 3
                unset($chars[$j++]);
11180
            }
11181
11182 6
            $strReturn .= $break;
11183
        }
11184
11185 8
        return $strReturn . \implode('', $chars);
11186
    }
11187
11188
    /**
11189
     * Line-Wrap the string after $limit, but also after the next word.
11190
     *
11191
     * @param string $str
11192
     * @param int    $limit
11193
     *
11194
     * @return string
11195
     */
11196
    public static function wordwrap_per_line(string $str, int $limit): string
11197
    {
11198 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
11199
11200 1
        $string = '';
11201 1
        foreach ($strings as $value) {
11202 1
            if ($value === false) {
11203
                continue;
11204
            }
11205
11206 1
            $string .= \wordwrap($value, $limit);
11207 1
            $string .= "\n";
11208
        }
11209
11210 1
        return $string;
11211
    }
11212
11213
    /**
11214
     * Returns an array of Unicode White Space characters.
11215
     *
11216
     * @return string[] an array with numeric code point as key and White Space Character as value
11217
     */
11218
    public static function ws(): array
11219
    {
11220 2
        return self::$WHITESPACE;
11221
    }
11222
}
11223