Passed
Push — master ( 5912c0...6df748 )
by Lars
04:36
created

UTF8   F

Complexity

Total Complexity 1782

Size/Duplication

Total Lines 14804
Duplicated Lines 0 %

Test Coverage

Coverage 81.35%

Importance

Changes 106
Bugs 53 Features 6
Metric Value
eloc 4536
dl 0
loc 14804
ccs 3263
cts 4011
cp 0.8135
rs 0.8
c 106
b 53
f 6
wmc 1782

310 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 4 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A filter_input() 0 16 3
A get_unique_string() 0 22 3
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 26 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A remove_left() 0 28 4
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 27 5
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
A lcword() 0 13 1
A mbstring_loaded() 0 3 1
D chr() 0 109 19
A html_escape() 0 6 1
D normalize_encoding() 0 147 16
B get_file_type() 0 65 7
A chr_to_int() 0 3 1
C is_utf16() 0 71 16
A isHtml() 0 3 1
C filter() 0 57 12
A normalize_whitespace() 0 9 1
A isBase64() 0 3 1
A is_html() 0 14 2
A decode_mimeheader() 0 8 3
A html_decode() 0 6 1
A isUtf32() 0 3 1
A rtrim() 0 27 5
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A replace_all() 0 11 2
A removeBOM() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A css_identifier() 0 56 6
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
D range() 0 72 23
B rawurldecode() 0 51 8
A normalize_msword() 0 3 1
A spaces_to_tabs() 0 11 3
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 15 2
A decimal_to_chr() 0 3 1
A has_whitespace() 0 8 2
A pcre_utf8_support() 0 4 1
A codepoints() 0 36 5
A lowerCaseFirst() 0 13 1
A chr_map() 0 5 1
A cleanup() 0 24 2
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 17 3
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
B is_binary() 0 38 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
B is_url() 0 44 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A remove_bom() 0 22 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A str_capitalize_name() 0 8 1
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A filter_var() 0 15 2
A is_empty() 0 3 1
B html_encode() 0 54 11
A isUtf16() 0 3 1
F encode() 0 147 37
C is_utf32() 0 71 16
C ord() 0 77 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 32 4
A checkForSupport() 0 48 4
A is_printable() 0 3 1
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A isJson() 0 3 1
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
A chr_to_hex() 0 11 3
A min() 0 14 3
A is_punctuation() 0 3 1
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 59 13
A split() 0 7 1
A remove_duplicates() 0 16 4
B file_get_contents() 0 56 11
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
A str_contains() 0 14 3
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
C utf8_decode() 0 61 13
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 72 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A toUTF8() 0 3 1
B str_obfuscate() 0 47 8
A string() 0 16 4
B rxClass() 0 45 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
A str_starts_with() 0 15 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
C str_longest_common_substring() 0 76 16
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
D substr_replace() 0 124 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 31 5
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A str_isubstr_after_last_separator() 0 26 5
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
C str_detect_encoding() 0 111 13
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 138 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 15 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 15 3
A to_utf8() 0 14 3
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 13 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
B to_string() 0 33 8
A strtonatfold() 0 13 2
C strcspn() 0 49 12
A fixStrCaseHelper() 0 41 5
B str_split_pattern() 0 49 11
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 138 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @noinspection OffsetOperationsInspection */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089 19
                $clean_utf8,
1090 19
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        self::initEmojiData();
1300
1301 9
        if ($use_reversible_string_mappings) {
1302 9
            return (string) \str_replace(
1303 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1304 9
                (array) self::$EMOJI_VALUES_CACHE,
1305 9
                $str
1306
            );
1307
        }
1308
1309 1
        return (string) \str_replace(
1310 1
            (array) self::$EMOJI_KEYS_CACHE,
1311 1
            (array) self::$EMOJI_VALUES_CACHE,
1312 1
            $str
1313
        );
1314
    }
1315
1316
    /**
1317
     * Encode a string with emoji chars into a non-emoji string.
1318
     *
1319
     * INFO: opposite to UTF8::emoji_decode()
1320
     *
1321
     * EXAMPLE: <code>
1322
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1323
     * //
1324
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1325
     * </code>
1326
     *
1327
     * @param string $str                            <p>The input string</p>
1328
     * @param bool   $use_reversible_string_mappings [optional] <p>
1329
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1330
     *                                               between "emoji_encode" and "emoji_decode"</p>
1331
     *
1332
     * @psalm-pure
1333
     *
1334
     * @return string
1335
     */
1336 12
    public static function emoji_encode(
1337
        string $str,
1338
        bool $use_reversible_string_mappings = false
1339
    ): string {
1340 12
        self::initEmojiData();
1341
1342 12
        if ($use_reversible_string_mappings) {
1343 9
            return (string) \str_replace(
1344 9
                (array) self::$EMOJI_VALUES_CACHE,
1345 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1346 9
                $str
1347
            );
1348
        }
1349
1350 4
        return (string) \str_replace(
1351 4
            (array) self::$EMOJI_VALUES_CACHE,
1352 4
            (array) self::$EMOJI_KEYS_CACHE,
1353 4
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Encode a string with a new charset-encoding.
1359
     *
1360
     * INFO:  This function will also try to fix broken / double encoding,
1361
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1362
     *
1363
     * EXAMPLE: <code>
1364
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1365
     * //
1366
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1367
     * //
1368
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1369
     * //
1370
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1371
     * </code>
1372
     *
1373
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1374
     * @param string $str                           <p>The input string</p>
1375
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1376
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1377
     *                                              string-encoding</p>
1378
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1379
     *                                              A empty string will trigger the autodetect anyway.</p>
1380
     *
1381
     * @psalm-pure
1382
     *
1383
     * @return string
1384
     *
1385
     * @psalm-suppress InvalidReturnStatement
1386
     */
1387 29
    public static function encode(
1388
        string $to_encoding,
1389
        string $str,
1390
        bool $auto_detect_the_from_encoding = true,
1391
        string $from_encoding = ''
1392
    ): string {
1393 29
        if ($str === '' || $to_encoding === '') {
1394 13
            return $str;
1395
        }
1396
1397 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1398 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1399
        }
1400
1401 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1402 2
            $from_encoding = self::normalize_encoding($from_encoding);
1403
        }
1404
1405
        if (
1406 29
            $to_encoding
1407
            &&
1408 29
            $from_encoding
1409
            &&
1410 29
            $from_encoding === $to_encoding
1411
        ) {
1412
            return $str;
1413
        }
1414
1415 29
        if ($to_encoding === 'JSON') {
1416 1
            $return = self::json_encode($str);
1417 1
            if ($return === false) {
1418
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1419
            }
1420
1421 1
            return $return;
1422
        }
1423 29
        if ($from_encoding === 'JSON') {
1424 1
            $str = self::json_decode($str);
1425 1
            $from_encoding = '';
1426
        }
1427
1428 29
        if ($to_encoding === 'BASE64') {
1429 2
            return \base64_encode($str);
1430
        }
1431 29
        if ($from_encoding === 'BASE64') {
1432 2
            $str = \base64_decode($str, true);
1433 2
            $from_encoding = '';
1434
        }
1435
1436 29
        if ($to_encoding === 'HTML-ENTITIES') {
1437 2
            return self::html_encode($str, true);
1438
        }
1439 29
        if ($from_encoding === 'HTML-ENTITIES') {
1440 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1441 2
            $from_encoding = '';
1442
        }
1443
1444 29
        $from_encoding_auto_detected = false;
1445
        if (
1446 29
            $auto_detect_the_from_encoding
1447
            ||
1448 29
            !$from_encoding
1449
        ) {
1450 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1451
        }
1452
1453
        // DEBUG
1454
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1455
1456 29
        if ($from_encoding_auto_detected !== false) {
1457
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1458 25
            $from_encoding = $from_encoding_auto_detected;
1459 7
        } elseif ($auto_detect_the_from_encoding) {
1460
            // fallback for the "autodetect"-mode
1461 7
            return self::to_utf8($str);
1462
        }
1463
1464
        if (
1465 25
            !$from_encoding
1466
            ||
1467 25
            $from_encoding === $to_encoding
1468
        ) {
1469 15
            return $str;
1470
        }
1471
1472
        if (
1473 20
            $to_encoding === 'UTF-8'
1474
            &&
1475
            (
1476 18
                $from_encoding === 'WINDOWS-1252'
1477
                ||
1478 20
                $from_encoding === 'ISO-8859-1'
1479
            )
1480
        ) {
1481 14
            return self::to_utf8($str);
1482
        }
1483
1484
        if (
1485 12
            $to_encoding === 'ISO-8859-1'
1486
            &&
1487
            (
1488 6
                $from_encoding === 'WINDOWS-1252'
1489
                ||
1490 12
                $from_encoding === 'UTF-8'
1491
            )
1492
        ) {
1493 6
            return self::to_iso8859($str);
1494
        }
1495
1496
        /** @noinspection InArrayCanBeUsedInspection */
1497
        if (
1498 10
            $to_encoding !== 'UTF-8'
1499
            &&
1500 10
            $to_encoding !== 'ISO-8859-1'
1501
            &&
1502 10
            $to_encoding !== 'WINDOWS-1252'
1503
            &&
1504 10
            self::$SUPPORT['mbstring'] === false
1505
        ) {
1506
            /**
1507
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1508
             */
1509
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1510
        }
1511
1512 10
        if (self::$SUPPORT['mbstring'] === true) {
1513
            // warning: do not use the symfony polyfill here
1514 10
            $str_encoded = \mb_convert_encoding(
1515 10
                $str,
1516 10
                $to_encoding,
1517 10
                $from_encoding
1518
            );
1519
1520 10
            if ($str_encoded) {
1521 10
                \assert(\is_string($str_encoded));
1522
1523 10
                return $str_encoded;
1524
            }
1525
        }
1526
1527
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1528
        $return = @\iconv($from_encoding, $to_encoding, $str);
1529
        if ($return !== false) {
1530
            return $return;
1531
        }
1532
1533
        return $str;
1534
    }
1535
1536
    /**
1537
     * @param string $str
1538
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1539
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1540
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1541
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1542
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1543
     *
1544
     * @psalm-pure
1545
     *
1546
     * @return false|string
1547
     *                      <p>An encoded MIME field on success,
1548
     *                      or false if an error occurs during the encoding.</p>
1549
     */
1550 1
    public static function encode_mimeheader(
1551
        string $str,
1552
        string $from_charset = 'UTF-8',
1553
        string $to_charset = 'UTF-8',
1554
        string $transfer_encoding = 'Q',
1555
        string $linefeed = "\r\n",
1556
        int $indent = 76
1557
    ) {
1558 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1559
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1560
        }
1561
1562 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1563 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1564
        }
1565
1566
        // always fallback via symfony polyfill
1567 1
        return \iconv_mime_encode(
1568 1
            '',
1569 1
            $str,
1570
            [
1571 1
                'scheme'           => $transfer_encoding,
1572 1
                'line-length'      => $indent,
1573 1
                'input-charset'    => $from_charset,
1574 1
                'output-charset'   => $to_charset,
1575 1
                'line-break-chars' => $linefeed,
1576
            ]
1577
        );
1578
    }
1579
1580
    /**
1581
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1582
     *
1583
     * @param string   $str                       <p>The input string.</p>
1584
     * @param string   $search                    <p>The searched string.</p>
1585
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1586
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1587
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1588
     *
1589
     * @psalm-pure
1590
     *
1591
     * @return string
1592
     */
1593 1
    public static function extract_text(
1594
        string $str,
1595
        string $search = '',
1596
        int $length = null,
1597
        string $replacer_for_skipped_text = '…',
1598
        string $encoding = 'UTF-8'
1599
    ): string {
1600 1
        if ($str === '') {
1601 1
            return '';
1602
        }
1603
1604 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1605
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1606
        }
1607
1608 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1609
1610 1
        if ($length === null) {
1611 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1612
        }
1613
1614 1
        if ($search === '') {
1615 1
            if ($encoding === 'UTF-8') {
1616 1
                if ($length > 0) {
1617 1
                    $string_length = (int) \mb_strlen($str);
1618 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1619
                } else {
1620 1
                    $end = 0;
1621
                }
1622
1623 1
                $pos = (int) \min(
1624 1
                    \mb_strpos($str, ' ', $end),
1625 1
                    \mb_strpos($str, '.', $end)
1626
                );
1627
            } else {
1628
                if ($length > 0) {
1629
                    $string_length = (int) self::strlen($str, $encoding);
1630
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1631
                } else {
1632
                    $end = 0;
1633
                }
1634
1635
                $pos = (int) \min(
1636
                    self::strpos($str, ' ', $end, $encoding),
1637
                    self::strpos($str, '.', $end, $encoding)
1638
                );
1639
            }
1640
1641 1
            if ($pos) {
1642 1
                if ($encoding === 'UTF-8') {
1643 1
                    $str_sub = \mb_substr($str, 0, $pos);
1644
                } else {
1645
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1646
                }
1647
1648 1
                if ($str_sub === false) {
1649
                    return '';
1650
                }
1651
1652 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1653
            }
1654
1655
            return $str;
1656
        }
1657
1658 1
        if ($encoding === 'UTF-8') {
1659 1
            $word_position = (int) \mb_stripos($str, $search);
1660 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1661
        } else {
1662
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1663
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1664
        }
1665
1666 1
        $pos_start = 0;
1667 1
        if ($half_side > 0) {
1668 1
            if ($encoding === 'UTF-8') {
1669 1
                $half_text = \mb_substr($str, 0, $half_side);
1670
            } else {
1671
                $half_text = self::substr($str, 0, $half_side, $encoding);
1672
            }
1673 1
            if ($half_text !== false) {
1674 1
                if ($encoding === 'UTF-8') {
1675 1
                    $pos_start = (int) \max(
1676 1
                        \mb_strrpos($half_text, ' '),
1677 1
                        \mb_strrpos($half_text, '.')
1678
                    );
1679
                } else {
1680
                    $pos_start = (int) \max(
1681
                        self::strrpos($half_text, ' ', 0, $encoding),
1682
                        self::strrpos($half_text, '.', 0, $encoding)
1683
                    );
1684
                }
1685
            }
1686
        }
1687
1688 1
        if ($word_position && $half_side > 0) {
1689 1
            $offset = $pos_start + $length - 1;
1690 1
            $real_length = (int) self::strlen($str, $encoding);
1691
1692 1
            if ($offset > $real_length) {
1693
                $offset = $real_length;
1694
            }
1695
1696 1
            if ($encoding === 'UTF-8') {
1697 1
                $pos_end = (int) \min(
1698 1
                    \mb_strpos($str, ' ', $offset),
1699 1
                    \mb_strpos($str, '.', $offset)
1700 1
                ) - $pos_start;
1701
            } else {
1702
                $pos_end = (int) \min(
1703
                    self::strpos($str, ' ', $offset, $encoding),
1704
                    self::strpos($str, '.', $offset, $encoding)
1705
                ) - $pos_start;
1706
            }
1707
1708 1
            if (!$pos_end || $pos_end <= 0) {
1709 1
                if ($encoding === 'UTF-8') {
1710 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1711
                } else {
1712
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1713
                }
1714 1
                if ($str_sub !== false) {
1715 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1716
                } else {
1717 1
                    $extract = '';
1718
                }
1719
            } else {
1720 1
                if ($encoding === 'UTF-8') {
1721 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1722
                } else {
1723
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1724
                }
1725 1
                if ($str_sub !== false) {
1726 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1727
                } else {
1728 1
                    $extract = '';
1729
                }
1730
            }
1731
        } else {
1732 1
            $offset = $length - 1;
1733 1
            $true_length = (int) self::strlen($str, $encoding);
1734
1735 1
            if ($offset > $true_length) {
1736
                $offset = $true_length;
1737
            }
1738
1739 1
            if ($encoding === 'UTF-8') {
1740 1
                $pos_end = (int) \min(
1741 1
                    \mb_strpos($str, ' ', $offset),
1742 1
                    \mb_strpos($str, '.', $offset)
1743
                );
1744
            } else {
1745
                $pos_end = (int) \min(
1746
                    self::strpos($str, ' ', $offset, $encoding),
1747
                    self::strpos($str, '.', $offset, $encoding)
1748
                );
1749
            }
1750
1751 1
            if ($pos_end) {
1752 1
                if ($encoding === 'UTF-8') {
1753 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1754
                } else {
1755
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1756
                }
1757 1
                if ($str_sub !== false) {
1758 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1759
                } else {
1760 1
                    $extract = '';
1761
                }
1762
            } else {
1763 1
                $extract = $str;
1764
            }
1765
        }
1766
1767 1
        return $extract;
1768
    }
1769
1770
    /**
1771
     * Reads entire file into a string.
1772
     *
1773
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1774
     *
1775
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1776
     *
1777
     * @see http://php.net/manual/en/function.file-get-contents.php
1778
     *
1779
     * @param string        $filename         <p>
1780
     *                                        Name of the file to read.
1781
     *                                        </p>
1782
     * @param bool          $use_include_path [optional] <p>
1783
     *                                        Prior to PHP 5, this parameter is called
1784
     *                                        use_include_path and is a bool.
1785
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1786
     *                                        to trigger include path
1787
     *                                        search.
1788
     *                                        </p>
1789
     * @param resource|null $context          [optional] <p>
1790
     *                                        A valid context resource created with
1791
     *                                        stream_context_create. If you don't need to use a
1792
     *                                        custom context, you can skip this parameter by &null;.
1793
     *                                        </p>
1794
     * @param int|null      $offset           [optional] <p>
1795
     *                                        The offset where the reading starts.
1796
     *                                        </p>
1797
     * @param int|null      $max_length       [optional] <p>
1798
     *                                        Maximum length of data read. The default is to read until end
1799
     *                                        of file is reached.
1800
     *                                        </p>
1801
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1802
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1803
     *                                        some files, because they used non default utf-8 chars. Binary files
1804
     *                                        like images or pdf will not be converted.</p>
1805
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1806
     *                                        A empty string will trigger the autodetect anyway.</p>
1807
     *
1808
     * @psalm-pure
1809
     *
1810
     * @return false|string
1811
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1812
     *
1813
     * @noinspection PhpTooManyParametersInspection
1814
     */
1815 12
    public static function file_get_contents(
1816
        string $filename,
1817
        bool $use_include_path = false,
1818
        $context = null,
1819
        int $offset = null,
1820
        int $max_length = null,
1821
        int $timeout = 10,
1822
        bool $convert_to_utf8 = true,
1823
        string $from_encoding = ''
1824
    ) {
1825
        // init
1826 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1827
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1828 12
        if ($filename === false) {
1829
            return false;
1830
        }
1831
1832 12
        if ($timeout && $context === null) {
1833 9
            $context = \stream_context_create(
1834
                [
1835
                    'http' => [
1836 9
                        'timeout' => $timeout,
1837
                    ],
1838
                ]
1839
            );
1840
        }
1841
1842 12
        if ($offset === null) {
1843 12
            $offset = 0;
1844
        }
1845
1846 12
        if (\is_int($max_length)) {
1847 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1848
        } else {
1849 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1850
        }
1851
1852
        // return false on error
1853 12
        if ($data === false) {
1854
            return false;
1855
        }
1856
1857 12
        if ($convert_to_utf8) {
1858
            if (
1859 12
                !self::is_binary($data, true)
1860
                ||
1861 9
                self::is_utf16($data, false) !== false
1862
                ||
1863 12
                self::is_utf32($data, false) !== false
1864
            ) {
1865 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1866 9
                $data = self::cleanup($data);
1867
            }
1868
        }
1869
1870 12
        return $data;
1871
    }
1872
1873
    /**
1874
     * Checks if a file starts with BOM (Byte Order Mark) character.
1875
     *
1876
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1877
     *
1878
     * @param string $file_path <p>Path to a valid file.</p>
1879
     *
1880
     * @throws \RuntimeException if file_get_contents() returned false
1881
     *
1882
     * @return bool
1883
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1884
     *
1885
     * @psalm-pure
1886
     */
1887 2
    public static function file_has_bom(string $file_path): bool
1888
    {
1889 2
        $file_content = \file_get_contents($file_path);
1890 2
        if ($file_content === false) {
1891
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1892
        }
1893
1894 2
        return self::string_has_bom($file_content);
1895
    }
1896
1897
    /**
1898
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1899
     *
1900
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1901
     *
1902
     * @param array|object|string $var
1903
     * @param int                 $normalization_form
1904
     * @param string              $leading_combining
1905
     *
1906
     * @psalm-pure
1907
     *
1908
     * @return mixed
1909
     *
1910
     * @template TFilter
1911
     * @phpstan-param TFilter $var
1912
     * @phpstan-return TFilter
1913
     */
1914 65
    public static function filter(
1915
        $var,
1916
        int $normalization_form = \Normalizer::NFC,
1917
        string $leading_combining = '◌'
1918
    ) {
1919 65
        switch (\gettype($var)) {
1920 65
            case 'object':
1921 65
            case 'array':
1922 6
                foreach ($var as $k => &$v) {
1923 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1924
                }
1925 6
                unset($v);
1926
1927 6
                break;
1928 65
            case 'string':
1929
1930 63
                if (\strpos($var, "\r") !== false) {
1931 3
                    $var = self::normalize_line_ending($var);
1932
                }
1933
1934 63
                if (!ASCII::is_ascii($var)) {
1935 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1936 27
                        $n = '-';
1937
                    } else {
1938 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1939
1940 13
                        if (isset($n[0])) {
1941 7
                            $var = $n;
1942
                        } else {
1943 9
                            $var = self::encode('UTF-8', $var);
1944
                        }
1945
                    }
1946
1947 33
                    \assert(\is_string($var));
1948
                    if (
1949 33
                        $var[0] >= "\x80"
1950
                        &&
1951 33
                        isset($n[0], $leading_combining[0])
1952
                        &&
1953 33
                        \preg_match('/^\\p{Mn}/u', $var)
1954
                    ) {
1955
                        // Prevent leading combining chars
1956
                        // for NFC-safe concatenations.
1957 3
                        $var = $leading_combining . $var;
1958
                    }
1959
                }
1960
1961 63
                break;
1962
            default:
1963
                // nothing
1964
        }
1965
1966
        /** @noinspection PhpSillyAssignmentInspection */
1967
        /** @phpstan-var TFilter $var */
1968 65
        $var = $var;
1969
1970 65
        return $var;
1971
    }
1972
1973
    /**
1974
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1975
     *
1976
     * Gets a specific external variable by name and optionally filters it.
1977
     *
1978
     * EXAMPLE: <code>
1979
     * // _GET['foo'] = 'bar';
1980
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1981
     * </code>
1982
     *
1983
     * @see http://php.net/manual/en/function.filter-input.php
1984
     *
1985
     * @param int            $type          <p>
1986
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1987
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1988
     *                                      <b>INPUT_ENV</b>.
1989
     *                                      </p>
1990
     * @param string         $variable_name <p>
1991
     *                                      Name of a variable to get.
1992
     *                                      </p>
1993
     * @param int            $filter        [optional] <p>
1994
     *                                      The ID of the filter to apply. The
1995
     *                                      manual page lists the available filters.
1996
     *                                      </p>
1997
     * @param int|int[]|null $options       [optional] <p>
1998
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1999
     *                                      accepts options, flags can be provided in "flags" field of array.
2000
     *                                      </p>
2001
     *
2002
     * @psalm-pure
2003
     *
2004
     * @return mixed
2005
     *               <p>
2006
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2007
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2008
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2009
     *               </p>
2010
     */
2011 1
    public static function filter_input(
2012
        int $type,
2013
        string $variable_name,
2014
        int $filter = \FILTER_DEFAULT,
2015
        $options = null
2016
    ) {
2017
        /**
2018
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2019
         */
2020 1
        if ($options === null || \func_num_args() < 4) {
2021 1
            $var = \filter_input($type, $variable_name, $filter);
2022
        } else {
2023
            $var = \filter_input($type, $variable_name, $filter, $options);
2024
        }
2025
2026 1
        return self::filter($var);
2027
    }
2028
2029
    /**
2030
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2031
     *
2032
     * Gets external variables and optionally filters them.
2033
     *
2034
     * EXAMPLE: <code>
2035
     * // _GET['foo'] = 'bar';
2036
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2037
     * </code>
2038
     *
2039
     * @see http://php.net/manual/en/function.filter-input-array.php
2040
     *
2041
     * @param int        $type       <p>
2042
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2043
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2044
     *                               <b>INPUT_ENV</b>.
2045
     *                               </p>
2046
     * @param array|null $definition [optional] <p>
2047
     *                               An array defining the arguments. A valid key is a string
2048
     *                               containing a variable name and a valid value is either a filter type, or an array
2049
     *                               optionally specifying the filter, flags and options. If the value is an
2050
     *                               array, valid keys are filter which specifies the
2051
     *                               filter type,
2052
     *                               flags which specifies any flags that apply to the
2053
     *                               filter, and options which specifies any options that
2054
     *                               apply to the filter. See the example below for a better understanding.
2055
     *                               </p>
2056
     *                               <p>
2057
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2058
     *                               input array are filtered by this filter.
2059
     *                               </p>
2060
     * @param bool       $add_empty  [optional] <p>
2061
     *                               Add missing keys as <b>NULL</b> to the return value.
2062
     *                               </p>
2063
     *
2064
     * @psalm-pure
2065
     *
2066
     * @return mixed
2067
     *               <p>
2068
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2069
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2070
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2071
     *               is not set and <b>NULL</b> if the filter fails.
2072
     *               </p>
2073
     */
2074 1
    public static function filter_input_array(
2075
        int $type,
2076
        $definition = null,
2077
        bool $add_empty = true
2078
    ) {
2079
        /**
2080
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2081
         */
2082 1
        if ($definition === null || \func_num_args() < 2) {
2083
            $a = \filter_input_array($type);
2084
        } else {
2085 1
            $a = \filter_input_array($type, $definition, $add_empty);
2086
        }
2087
2088 1
        return self::filter($a);
2089
    }
2090
2091
    /**
2092
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2093
     *
2094
     * Filters a variable with a specified filter.
2095
     *
2096
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2097
     *
2098
     * @see http://php.net/manual/en/function.filter-var.php
2099
     *
2100
     * @param float|int|string|null $variable <p>
2101
     *                                        Value to filter.
2102
     *                                        </p>
2103
     * @param int                   $filter   [optional] <p>
2104
     *                                        The ID of the filter to apply. The
2105
     *                                        manual page lists the available filters.
2106
     *                                        </p>
2107
     * @param int|int[]|null        $options  [optional] <p>
2108
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2109
     *                                        accepts options, flags can be provided in "flags" field of array. For
2110
     *                                        the "callback" filter, callable type should be passed. The
2111
     *                                        callback must accept one argument, the value to be filtered, and return
2112
     *                                        the value after filtering/sanitizing it.
2113
     *                                        </p>
2114
     *                                        <p>
2115
     *                                        <code>
2116
     *                                        // for filters that accept options, use this format
2117
     *                                        $options = array(
2118
     *                                        'options' => array(
2119
     *                                        'default' => 3, // value to return if the filter fails
2120
     *                                        // other options here
2121
     *                                        'min_range' => 0
2122
     *                                        ),
2123
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2124
     *                                        );
2125
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2126
     *                                        // for filter that only accept flags, you can pass them directly
2127
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2128
     *                                        // for filter that only accept flags, you can also pass as an array
2129
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2130
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2131
     *                                        // callback validate filter
2132
     *                                        function foo($value)
2133
     *                                        {
2134
     *                                        // Expected format: Surname, GivenNames
2135
     *                                        if (strpos($value, ", ") === false) return false;
2136
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2137
     *                                        $empty = (empty($surname) || empty($givennames));
2138
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2139
     *                                        if ($empty || $notstrings) {
2140
     *                                        return false;
2141
     *                                        } else {
2142
     *                                        return $value;
2143
     *                                        }
2144
     *                                        }
2145
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2146
     *                                        </code>
2147
     *                                        </p>
2148
     *
2149
     * @psalm-pure
2150
     *
2151
     * @return mixed
2152
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2153
     */
2154 2
    public static function filter_var(
2155
        $variable,
2156
        int $filter = \FILTER_DEFAULT,
2157
        $options = null
2158
    ) {
2159
        /**
2160
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2161
         */
2162 2
        if (\func_num_args() < 3) {
2163 2
            $variable = \filter_var($variable, $filter);
2164
        } else {
2165 2
            $variable = \filter_var($variable, $filter, $options);
2166
        }
2167
2168 2
        return self::filter($variable);
2169
    }
2170
2171
    /**
2172
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2173
     *
2174
     * Gets multiple variables and optionally filters them.
2175
     *
2176
     * EXAMPLE: <code>
2177
     * $filters = [
2178
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2179
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2180
     *     'email' => FILTER_VALIDATE_EMAIL,
2181
     * ];
2182
     *
2183
     * $data = [
2184
     *     'name' => 'κόσμε',
2185
     *     'age' => '18',
2186
     *     'email' => '[email protected]'
2187
     * ];
2188
     *
2189
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2190
     * </code>
2191
     *
2192
     * @see http://php.net/manual/en/function.filter-var-array.php
2193
     *
2194
     * @param array<mixed>   $data       <p>
2195
     *                                   An array with string keys containing the data to filter.
2196
     *                                   </p>
2197
     * @param array|int|null $definition [optional] <p>
2198
     *                                   An array defining the arguments. A valid key is a string
2199
     *                                   containing a variable name and a valid value is either a
2200
     *                                   filter type, or an
2201
     *                                   array optionally specifying the filter, flags and options.
2202
     *                                   If the value is an array, valid keys are filter
2203
     *                                   which specifies the filter type,
2204
     *                                   flags which specifies any flags that apply to the
2205
     *                                   filter, and options which specifies any options that
2206
     *                                   apply to the filter. See the example below for a better understanding.
2207
     *                                   </p>
2208
     *                                   <p>
2209
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2210
     *                                   in the input array are filtered by this filter.
2211
     *                                   </p>
2212
     * @param bool           $add_empty  [optional] <p>
2213
     *                                   Add missing keys as <b>NULL</b> to the return value.
2214
     *                                   </p>
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return mixed
2219
     *               <p>
2220
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2221
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2222
     *               set.
2223
     *               </p>
2224
     */
2225 2
    public static function filter_var_array(
2226
        array $data,
2227
        $definition = null,
2228
        bool $add_empty = true
2229
    ) {
2230
        /**
2231
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2232
         */
2233 2
        if (\func_num_args() < 2) {
2234 2
            $a = \filter_var_array($data);
2235
        } else {
2236 2
            $a = \filter_var_array($data, $definition, $add_empty);
2237
        }
2238
2239 2
        return self::filter($a);
2240
    }
2241
2242
    /**
2243
     * Checks whether finfo is available on the server.
2244
     *
2245
     * @psalm-pure
2246
     *
2247
     * @return bool
2248
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2249
     *
2250
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2251
     */
2252
    public static function finfo_loaded(): bool
2253
    {
2254
        return \class_exists('finfo');
2255
    }
2256
2257
    /**
2258
     * Returns the first $n characters of the string.
2259
     *
2260
     * @param string $str      <p>The input string.</p>
2261
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2262
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2263
     *
2264
     * @psalm-pure
2265
     *
2266
     * @return string
2267
     */
2268 13
    public static function first_char(
2269
        string $str,
2270
        int $n = 1,
2271
        string $encoding = 'UTF-8'
2272
    ): string {
2273 13
        if ($str === '' || $n <= 0) {
2274 5
            return '';
2275
        }
2276
2277 8
        if ($encoding === 'UTF-8') {
2278 4
            return (string) \mb_substr($str, 0, $n);
2279
        }
2280
2281 4
        return (string) self::substr($str, 0, $n, $encoding);
2282
    }
2283
2284
    /**
2285
     * Check if the number of Unicode characters isn't greater than the specified integer.
2286
     *
2287
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2288
     *
2289
     * @param string $str      the original string to be checked
2290
     * @param int    $box_size the size in number of chars to be checked against string
2291
     *
2292
     * @psalm-pure
2293
     *
2294
     * @return bool
2295
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2296
     */
2297 2
    public static function fits_inside(string $str, int $box_size): bool
2298
    {
2299 2
        return (int) self::strlen($str) <= $box_size;
2300
    }
2301
2302
    /**
2303
     * Try to fix simple broken UTF-8 strings.
2304
     *
2305
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2306
     *
2307
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2308
     *
2309
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2310
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2311
     * See: http://en.wikipedia.org/wiki/Windows-1252
2312
     *
2313
     * @param string $str <p>The input string</p>
2314
     *
2315
     * @psalm-pure
2316
     *
2317
     * @return string
2318
     */
2319 47
    public static function fix_simple_utf8(string $str): string
2320
    {
2321 47
        if ($str === '') {
2322 4
            return '';
2323
        }
2324
2325
        /**
2326
         * @psalm-suppress ImpureStaticVariable
2327
         *
2328
         * @var array<mixed>|null
2329
         */
2330 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2331
2332
        /**
2333
         * @psalm-suppress ImpureStaticVariable
2334
         *
2335
         * @var array<mixed>|null
2336
         */
2337 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2338
2339 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2340 1
            if (self::$BROKEN_UTF8_FIX === null) {
2341 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2342
            }
2343
2344 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2345 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2346
        }
2347
2348 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2349
2350 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2351
    }
2352
2353
    /**
2354
     * Fix a double (or multiple) encoded UTF8 string.
2355
     *
2356
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2357
     *
2358
     * @param string|string[] $str you can use a string or an array of strings
2359
     *
2360
     * @psalm-pure
2361
     *
2362
     * @return string|string[]
2363
     *                         Will return the fixed input-"array" or
2364
     *                         the fixed input-"string"
2365
     *
2366
     * @psalm-suppress InvalidReturnType
2367
     */
2368 2
    public static function fix_utf8($str)
2369
    {
2370 2
        if (\is_array($str)) {
2371 2
            foreach ($str as $k => &$v) {
2372 2
                $v = self::fix_utf8($v);
2373
            }
2374 2
            unset($v);
2375
2376
            /**
2377
             * @psalm-suppress InvalidReturnStatement
2378
             */
2379 2
            return $str;
2380
        }
2381
2382 2
        $str = (string) $str;
2383 2
        $last = '';
2384 2
        while ($last !== $str) {
2385 2
            $last = $str;
2386
            /**
2387
             * @psalm-suppress PossiblyInvalidArgument
2388
             */
2389 2
            $str = self::to_utf8(
2390 2
                self::utf8_decode($str, true)
2391
            );
2392
        }
2393
2394
        /**
2395
         * @psalm-suppress InvalidReturnStatement
2396
         */
2397 2
        return $str;
2398
    }
2399
2400
    /**
2401
     * Get character of a specific character.
2402
     *
2403
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2404
     *
2405
     * @param string $char
2406
     *
2407
     * @psalm-pure
2408
     *
2409
     * @return string
2410
     *                <p>'RTL' or 'LTR'.</p>
2411
     */
2412 2
    public static function getCharDirection(string $char): string
2413
    {
2414 2
        if (self::$SUPPORT['intlChar'] === true) {
2415
            /** @noinspection PhpComposerExtensionStubsInspection */
2416 2
            $tmp_return = \IntlChar::charDirection($char);
2417
2418
            // from "IntlChar"-Class
2419
            $char_direction = [
2420 2
                'RTL' => [1, 13, 14, 15, 21],
2421
                'LTR' => [0, 11, 12, 20],
2422
            ];
2423
2424 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2425
                return 'LTR';
2426
            }
2427
2428 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2429 2
                return 'RTL';
2430
            }
2431
        }
2432
2433 2
        $c = static::chr_to_decimal($char);
2434
2435 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2436 2
            return 'LTR';
2437
        }
2438
2439 2
        if ($c <= 0x85e) {
2440 2
            if ($c === 0x5be ||
2441 2
                $c === 0x5c0 ||
2442 2
                $c === 0x5c3 ||
2443 2
                $c === 0x5c6 ||
2444 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2445 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2446 2
                $c === 0x608 ||
2447 2
                $c === 0x60b ||
2448 2
                $c === 0x60d ||
2449 2
                $c === 0x61b ||
2450 2
                ($c >= 0x61e && $c <= 0x64a) ||
2451
                ($c >= 0x66d && $c <= 0x66f) ||
2452
                ($c >= 0x671 && $c <= 0x6d5) ||
2453
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2454
                ($c >= 0x6ee && $c <= 0x6ef) ||
2455
                ($c >= 0x6fa && $c <= 0x70d) ||
2456
                $c === 0x710 ||
2457
                ($c >= 0x712 && $c <= 0x72f) ||
2458
                ($c >= 0x74d && $c <= 0x7a5) ||
2459
                $c === 0x7b1 ||
2460
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2461
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2462
                $c === 0x7fa ||
2463
                ($c >= 0x800 && $c <= 0x815) ||
2464
                $c === 0x81a ||
2465
                $c === 0x824 ||
2466
                $c === 0x828 ||
2467
                ($c >= 0x830 && $c <= 0x83e) ||
2468
                ($c >= 0x840 && $c <= 0x858) ||
2469 2
                $c === 0x85e
2470
            ) {
2471 2
                return 'RTL';
2472
            }
2473 2
        } elseif ($c === 0x200f) {
2474
            return 'RTL';
2475 2
        } elseif ($c >= 0xfb1d) {
2476 2
            if ($c === 0xfb1d ||
2477 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2478 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2479 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2480 2
                $c === 0xfb3e ||
2481 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2482 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2483 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2484 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2485 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2486 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2487 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2488 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2489 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2490 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2491 2
                $c === 0x10808 ||
2492 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2493 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2494 2
                $c === 0x1083c ||
2495 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2496 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2497 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2498 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2499 2
                $c === 0x1093f ||
2500 2
                $c === 0x10a00 ||
2501 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2502 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2503 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2504 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2505 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2506 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2507 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2508 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2509 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2510 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2511
            ) {
2512 2
                return 'RTL';
2513
            }
2514
        }
2515
2516 2
        return 'LTR';
2517
    }
2518
2519
    /**
2520
     * Check for php-support.
2521
     *
2522
     * @param string|null $key
2523
     *
2524
     * @psalm-pure
2525
     *
2526
     * @return mixed
2527
     *               Return the full support-"array", if $key === null<br>
2528
     *               return bool-value, if $key is used and available<br>
2529
     *               otherwise return <strong>null</strong>
2530
     */
2531 27
    public static function getSupportInfo(string $key = null)
2532
    {
2533 27
        if ($key === null) {
2534 4
            return self::$SUPPORT;
2535
        }
2536
2537 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2538 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2539
        }
2540
        // compatibility fix for old versions
2541 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2542
2543 25
        return self::$SUPPORT[$key] ?? null;
2544
    }
2545
2546
    /**
2547
     * Warning: this method only works for some file-types (png, jpg)
2548
     *          if you need more supported types, please use e.g. "finfo"
2549
     *
2550
     * @param string $str
2551
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2552
     *
2553
     * @psalm-pure
2554
     *
2555
     * @return null[]|string[]
2556
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2557
     *
2558
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2559
     */
2560 40
    public static function get_file_type(
2561
        string $str,
2562
        array $fallback = [
2563
            'ext'  => null,
2564
            'mime' => 'application/octet-stream',
2565
            'type' => null,
2566
        ]
2567
    ): array {
2568 40
        if ($str === '') {
2569
            return $fallback;
2570
        }
2571
2572
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2573 40
        $str_info = \substr($str, 0, 2);
2574 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2575 11
            return $fallback;
2576
        }
2577
2578
        // DEBUG
2579
        //var_dump($str_info);
2580
2581 36
        $str_info = \unpack('C2chars', $str_info);
2582
2583
        /** @noinspection PhpSillyAssignmentInspection */
2584
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2585 36
        $str_info = $str_info;
2586
2587 36
        if ($str_info === false) {
2588
            return $fallback;
2589
        }
2590
        /** @noinspection OffsetOperationsInspection */
2591 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2592
2593
        // DEBUG
2594
        //var_dump($type_code);
2595
2596
        //
2597
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2598
        //
2599
        switch ($type_code) {
2600
            // WARNING: do not add too simple comparisons, because of false-positive results:
2601
            //
2602
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2603
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2604
            //
2605 36
            case 255216:
2606
                $ext = 'jpg';
2607
                $mime = 'image/jpeg';
2608
                $type = 'binary';
2609
2610
                break;
2611 36
            case 13780:
2612 7
                $ext = 'png';
2613 7
                $mime = 'image/png';
2614 7
                $type = 'binary';
2615
2616 7
                break;
2617
            default:
2618 35
                return $fallback;
2619
        }
2620
2621
        return [
2622 7
            'ext'  => $ext,
2623 7
            'mime' => $mime,
2624 7
            'type' => $type,
2625
        ];
2626
    }
2627
2628
    /**
2629
     * @param int    $length         <p>Length of the random string.</p>
2630
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2631
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2632
     *
2633
     * @return string
2634
     */
2635 1
    public static function get_random_string(
2636
        int $length,
2637
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2638
        string $encoding = 'UTF-8'
2639
    ): string {
2640
        // init
2641 1
        $i = 0;
2642 1
        $str = '';
2643
2644
        //
2645
        // add random chars
2646
        //
2647
2648 1
        if ($encoding === 'UTF-8') {
2649 1
            $max_length = (int) \mb_strlen($possible_chars);
2650 1
            if ($max_length === 0) {
2651 1
                return '';
2652
            }
2653
2654 1
            while ($i < $length) {
2655
                try {
2656 1
                    $rand_int = \random_int(0, $max_length - 1);
2657
                } catch (\Exception $e) {
2658
                    /** @noinspection RandomApiMigrationInspection */
2659
                    $rand_int = \mt_rand(0, $max_length - 1);
2660
                }
2661 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2662 1
                if ($char !== false) {
2663 1
                    $str .= $char;
2664 1
                    ++$i;
2665
                }
2666
            }
2667
        } else {
2668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2669
2670
            $max_length = (int) self::strlen($possible_chars, $encoding);
2671
            if ($max_length === 0) {
2672
                return '';
2673
            }
2674
2675
            while ($i < $length) {
2676
                try {
2677
                    $rand_int = \random_int(0, $max_length - 1);
2678
                } catch (\Exception $e) {
2679
                    /** @noinspection RandomApiMigrationInspection */
2680
                    $rand_int = \mt_rand(0, $max_length - 1);
2681
                }
2682
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2683
                if ($char !== false) {
2684
                    $str .= $char;
2685
                    ++$i;
2686
                }
2687
            }
2688
        }
2689
2690 1
        return $str;
2691
    }
2692
2693
    /**
2694
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2695
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2696
     *
2697
     * @return string
2698
     */
2699 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2700
    {
2701
        try {
2702 1
            $rand_int = \random_int(0, \mt_getrandmax());
2703
        } catch (\Exception $e) {
2704
            /** @noinspection RandomApiMigrationInspection */
2705
            $rand_int = \mt_rand(0, \mt_getrandmax());
2706
        }
2707
2708
        $unique_helper = $rand_int .
2709 1
                         \session_id() .
2710 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2711 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2712 1
                         $extra_entropy;
2713
2714 1
        $unique_string = \uniqid($unique_helper, true);
2715
2716 1
        if ($use_md5) {
2717 1
            $unique_string = \md5($unique_string . $unique_helper);
2718
        }
2719
2720 1
        return $unique_string;
2721
    }
2722
2723
    /**
2724
     * alias for "UTF8::string_has_bom()"
2725
     *
2726
     * @param string $str
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *
2732
     * @see        UTF8::string_has_bom()
2733
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2734
     */
2735 2
    public static function hasBom(string $str): bool
2736
    {
2737 2
        return self::string_has_bom($str);
2738
    }
2739
2740
    /**
2741
     * Returns true if the string contains a lower case char, false otherwise.
2742
     *
2743
     * @param string $str <p>The input string.</p>
2744
     *
2745
     * @psalm-pure
2746
     *
2747
     * @return bool
2748
     *              <p>Whether or not the string contains a lower case character.</p>
2749
     */
2750 47
    public static function has_lowercase(string $str): bool
2751
    {
2752 47
        if (self::$SUPPORT['mbstring'] === true) {
2753
            /** @noinspection PhpComposerExtensionStubsInspection */
2754 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2755
        }
2756
2757
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2758
    }
2759
2760
    /**
2761
     * Returns true if the string contains whitespace, false otherwise.
2762
     *
2763
     * @param string $str <p>The input string.</p>
2764
     *
2765
     * @psalm-pure
2766
     *
2767
     * @return bool
2768
     *              <p>Whether or not the string contains whitespace.</p>
2769
     */
2770 11
    public static function has_whitespace(string $str): bool
2771
    {
2772 11
        if (self::$SUPPORT['mbstring'] === true) {
2773
            /** @noinspection PhpComposerExtensionStubsInspection */
2774 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2775
        }
2776
2777
        return self::str_matches_pattern($str, '.*[[:space:]]');
2778
    }
2779
2780
    /**
2781
     * Returns true if the string contains an upper case char, false otherwise.
2782
     *
2783
     * @param string $str <p>The input string.</p>
2784
     *
2785
     * @psalm-pure
2786
     *
2787
     * @return bool
2788
     *              <p>Whether or not the string contains an upper case character.</p>
2789
     */
2790 12
    public static function has_uppercase(string $str): bool
2791
    {
2792 12
        if (self::$SUPPORT['mbstring'] === true) {
2793
            /** @noinspection PhpComposerExtensionStubsInspection */
2794 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2795
        }
2796
2797
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2798
    }
2799
2800
    /**
2801
     * Converts a hexadecimal value into a UTF-8 character.
2802
     *
2803
     * INFO: opposite to UTF8::chr_to_hex()
2804
     *
2805
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2806
     *
2807
     * @param string $hexdec <p>The hexadecimal value.</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return false|string one single UTF-8 character
2812
     */
2813 4
    public static function hex_to_chr(string $hexdec)
2814
    {
2815
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2816 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2817
    }
2818
2819
    /**
2820
     * Converts hexadecimal U+xxxx code point representation to integer.
2821
     *
2822
     * INFO: opposite to UTF8::int_to_hex()
2823
     *
2824
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2825
     *
2826
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2827
     *
2828
     * @psalm-pure
2829
     *
2830
     * @return false|int
2831
     *                   <p>The code point, or false on failure.</p>
2832
     */
2833 2
    public static function hex_to_int($hexdec)
2834
    {
2835
        // init
2836 2
        $hexdec = (string) $hexdec;
2837
2838 2
        if ($hexdec === '') {
2839 2
            return false;
2840
        }
2841
2842 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2843 2
            return \intval($match[1], 16);
2844
        }
2845
2846 2
        return false;
2847
    }
2848
2849
    /**
2850
     * alias for "UTF8::html_entity_decode()"
2851
     *
2852
     * @param string   $str
2853
     * @param int|null $flags
2854
     * @param string   $encoding
2855
     *
2856
     * @psalm-pure
2857
     *
2858
     * @return string
2859
     *
2860
     * @see        UTF8::html_entity_decode()
2861
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2862
     */
2863 2
    public static function html_decode(
2864
        string $str,
2865
        int $flags = null,
2866
        string $encoding = 'UTF-8'
2867
    ): string {
2868 2
        return self::html_entity_decode($str, $flags, $encoding);
2869
    }
2870
2871
    /**
2872
     * Converts a UTF-8 string to a series of HTML numbered entities.
2873
     *
2874
     * INFO: opposite to UTF8::html_decode()
2875
     *
2876
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2877
     *
2878
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2879
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2880
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2881
     *
2882
     * @psalm-pure
2883
     *
2884
     * @return string HTML numbered entities
2885
     */
2886 14
    public static function html_encode(
2887
        string $str,
2888
        bool $keep_ascii_chars = false,
2889
        string $encoding = 'UTF-8'
2890
    ): string {
2891 14
        if ($str === '') {
2892 4
            return '';
2893
        }
2894
2895 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2896 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2897
        }
2898
2899
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2900 14
        if (self::$SUPPORT['mbstring'] === true) {
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            } else {
2904 3
                $start_code = 0x00;
2905
            }
2906
2907 14
            if ($encoding === 'UTF-8') {
2908
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2909 14
                $return = \mb_encode_numericentity(
2910 14
                    $str,
2911 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2912
                );
2913 14
                if ($return !== null && $return !== false) {
2914 14
                    return $return;
2915
                }
2916
            }
2917
2918
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2919 4
            $return = \mb_encode_numericentity(
2920 4
                $str,
2921 4
                [$start_code, 0xfffff, 0, 0xfffff],
2922 4
                $encoding
2923
            );
2924 4
            if ($return !== null && $return !== false) {
2925 4
                return $return;
2926
            }
2927
        }
2928
2929
        //
2930
        // fallback via vanilla php
2931
        //
2932
2933
        return \implode(
2934
            '',
2935
            \array_map(
2936
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2937
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2938
                },
2939
                self::str_split($str)
2940
            )
2941
        );
2942
    }
2943
2944
    /**
2945
     * UTF-8 version of html_entity_decode()
2946
     *
2947
     * The reason we are not using html_entity_decode() by itself is because
2948
     * while it is not technically correct to leave out the semicolon
2949
     * at the end of an entity most browsers will still interpret the entity
2950
     * correctly. html_entity_decode() does not convert entities without
2951
     * semicolons, so we are left with our own little solution here. Bummer.
2952
     *
2953
     * Convert all HTML entities to their applicable characters.
2954
     *
2955
     * INFO: opposite to UTF8::html_encode()
2956
     *
2957
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2958
     *
2959
     * @see http://php.net/manual/en/function.html-entity-decode.php
2960
     *
2961
     * @param string   $str      <p>
2962
     *                           The input string.
2963
     *                           </p>
2964
     * @param int|null $flags    [optional] <p>
2965
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2966
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2967
     *                           <table>
2968
     *                           Available <i>flags</i> constants
2969
     *                           <tr valign="top">
2970
     *                           <td>Constant Name</td>
2971
     *                           <td>Description</td>
2972
     *                           </tr>
2973
     *                           <tr valign="top">
2974
     *                           <td><b>ENT_COMPAT</b></td>
2975
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2976
     *                           </tr>
2977
     *                           <tr valign="top">
2978
     *                           <td><b>ENT_QUOTES</b></td>
2979
     *                           <td>Will convert both double and single quotes.</td>
2980
     *                           </tr>
2981
     *                           <tr valign="top">
2982
     *                           <td><b>ENT_NOQUOTES</b></td>
2983
     *                           <td>Will leave both double and single quotes unconverted.</td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_HTML401</b></td>
2987
     *                           <td>
2988
     *                           Handle code as HTML 4.01.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XML1</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XML 1.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_XHTML</b></td>
2999
     *                           <td>
3000
     *                           Handle code as XHTML.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           <tr valign="top">
3004
     *                           <td><b>ENT_HTML5</b></td>
3005
     *                           <td>
3006
     *                           Handle code as HTML 5.
3007
     *                           </td>
3008
     *                           </tr>
3009
     *                           </table>
3010
     *                           </p>
3011
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3012
     *
3013
     * @psalm-pure
3014
     *
3015
     * @return string the decoded string
3016
     */
3017 51
    public static function html_entity_decode(
3018
        string $str,
3019
        int $flags = null,
3020
        string $encoding = 'UTF-8'
3021
    ): string {
3022
        if (
3023 51
            !isset($str[3]) // examples: &; || &x;
3024
            ||
3025 51
            \strpos($str, '&') === false // no "&"
3026
        ) {
3027 24
            return $str;
3028
        }
3029
3030 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3031 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3032
        }
3033
3034 49
        if ($flags === null) {
3035 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3036
        }
3037
3038
        /** @noinspection InArrayCanBeUsedInspection */
3039
        if (
3040 49
            $encoding !== 'UTF-8'
3041
            &&
3042 49
            $encoding !== 'ISO-8859-1'
3043
            &&
3044 49
            $encoding !== 'WINDOWS-1252'
3045
            &&
3046 49
            self::$SUPPORT['mbstring'] === false
3047
        ) {
3048
            /**
3049
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3050
             */
3051
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3052
        }
3053
3054
        do {
3055 49
            $str_compare = $str;
3056
3057 49
            if (\strpos($str, '&') !== false) {
3058 49
                if (\strpos($str, '&#') !== false) {
3059
                    // decode also numeric & UTF16 two byte entities
3060 41
                    $str = (string) \preg_replace(
3061 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3062 41
                        '$1;',
3063 41
                        $str
3064
                    );
3065
                }
3066
3067 49
                $str = \html_entity_decode(
3068 49
                    $str,
3069 49
                    $flags,
3070 49
                    $encoding
3071
                );
3072
            }
3073 49
        } while ($str_compare !== $str);
3074
3075 49
        return $str;
3076
    }
3077
3078
    /**
3079
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3080
     *
3081
     * @param string $str
3082
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3083
     *
3084
     * @psalm-pure
3085
     *
3086
     * @return string
3087
     */
3088 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3089
    {
3090 6
        return self::htmlspecialchars(
3091 6
            $str,
3092 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3093 6
            $encoding
3094
        );
3095
    }
3096
3097
    /**
3098
     * Remove empty html-tag.
3099
     *
3100
     * e.g.: <pre><tag></tag></pre>
3101
     *
3102
     * @param string $str
3103
     *
3104
     * @psalm-pure
3105
     *
3106
     * @return string
3107
     */
3108 1
    public static function html_stripe_empty_tags(string $str): string
3109
    {
3110 1
        return (string) \preg_replace(
3111 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3112 1
            '',
3113 1
            $str
3114
        );
3115
    }
3116
3117
    /**
3118
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3119
     *
3120
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3121
     *
3122
     * @see http://php.net/manual/en/function.htmlentities.php
3123
     *
3124
     * @param string $str           <p>
3125
     *                              The input string.
3126
     *                              </p>
3127
     * @param int    $flags         [optional] <p>
3128
     *                              A bitmask of one or more of the following flags, which specify how to handle
3129
     *                              quotes, invalid code unit sequences and the used document type. The default is
3130
     *                              ENT_COMPAT | ENT_HTML401.
3131
     *                              <table>
3132
     *                              Available <i>flags</i> constants
3133
     *                              <tr valign="top">
3134
     *                              <td>Constant Name</td>
3135
     *                              <td>Description</td>
3136
     *                              </tr>
3137
     *                              <tr valign="top">
3138
     *                              <td><b>ENT_COMPAT</b></td>
3139
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3140
     *                              </tr>
3141
     *                              <tr valign="top">
3142
     *                              <td><b>ENT_QUOTES</b></td>
3143
     *                              <td>Will convert both double and single quotes.</td>
3144
     *                              </tr>
3145
     *                              <tr valign="top">
3146
     *                              <td><b>ENT_NOQUOTES</b></td>
3147
     *                              <td>Will leave both double and single quotes unconverted.</td>
3148
     *                              </tr>
3149
     *                              <tr valign="top">
3150
     *                              <td><b>ENT_IGNORE</b></td>
3151
     *                              <td>
3152
     *                              Silently discard invalid code unit sequences instead of returning
3153
     *                              an empty string. Using this flag is discouraged as it
3154
     *                              may have security implications.
3155
     *                              </td>
3156
     *                              </tr>
3157
     *                              <tr valign="top">
3158
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3159
     *                              <td>
3160
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3161
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3162
     *                              string.
3163
     *                              </td>
3164
     *                              </tr>
3165
     *                              <tr valign="top">
3166
     *                              <td><b>ENT_DISALLOWED</b></td>
3167
     *                              <td>
3168
     *                              Replace invalid code points for the given document type with a
3169
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3170
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3171
     *                              instance, to ensure the well-formedness of XML documents with
3172
     *                              embedded external content.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_HTML401</b></td>
3177
     *                              <td>
3178
     *                              Handle code as HTML 4.01.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XML1</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XML 1.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_XHTML</b></td>
3189
     *                              <td>
3190
     *                              Handle code as XHTML.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              <tr valign="top">
3194
     *                              <td><b>ENT_HTML5</b></td>
3195
     *                              <td>
3196
     *                              Handle code as HTML 5.
3197
     *                              </td>
3198
     *                              </tr>
3199
     *                              </table>
3200
     *                              </p>
3201
     * @param string $encoding      [optional] <p>
3202
     *                              Like <b>htmlspecialchars</b>,
3203
     *                              <b>htmlentities</b> takes an optional third argument
3204
     *                              <i>encoding</i> which defines encoding used in
3205
     *                              conversion.
3206
     *                              Although this argument is technically optional, you are highly
3207
     *                              encouraged to specify the correct value for your code.
3208
     *                              </p>
3209
     * @param bool   $double_encode [optional] <p>
3210
     *                              When <i>double_encode</i> is turned off PHP will not
3211
     *                              encode existing html entities. The default is to convert everything.
3212
     *                              </p>
3213
     *
3214
     * @psalm-pure
3215
     *
3216
     * @return string
3217
     *                <p>
3218
     *                The encoded string.
3219
     *                <br><br>
3220
     *                If the input <i>string</i> contains an invalid code unit
3221
     *                sequence within the given <i>encoding</i> an empty string
3222
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3223
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3224
     *                </p>
3225
     */
3226 9
    public static function htmlentities(
3227
        string $str,
3228
        int $flags = \ENT_COMPAT,
3229
        string $encoding = 'UTF-8',
3230
        bool $double_encode = true
3231
    ): string {
3232 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3233 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3234
        }
3235
3236 9
        $str = \htmlentities(
3237 9
            $str,
3238 9
            $flags,
3239 9
            $encoding,
3240 9
            $double_encode
3241
        );
3242
3243
        /**
3244
         * PHP doesn't replace a backslash to its html entity since this is something
3245
         * that's mostly used to escape characters when inserting in a database. Since
3246
         * we're using a decent database layer, we don't need this shit and we're replacing
3247
         * the double backslashes by its' html entity equivalent.
3248
         *
3249
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3250
         */
3251 9
        $str = \str_replace('\\', '&#92;', $str);
3252
3253 9
        return self::html_encode($str, true, $encoding);
3254
    }
3255
3256
    /**
3257
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3258
     *
3259
     * INFO: Take a look at "UTF8::htmlentities()"
3260
     *
3261
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3262
     *
3263
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3264
     *
3265
     * @param string $str           <p>
3266
     *                              The string being converted.
3267
     *                              </p>
3268
     * @param int    $flags         [optional] <p>
3269
     *                              A bitmask of one or more of the following flags, which specify how to handle
3270
     *                              quotes, invalid code unit sequences and the used document type. The default is
3271
     *                              ENT_COMPAT | ENT_HTML401.
3272
     *                              <table>
3273
     *                              Available <i>flags</i> constants
3274
     *                              <tr valign="top">
3275
     *                              <td>Constant Name</td>
3276
     *                              <td>Description</td>
3277
     *                              </tr>
3278
     *                              <tr valign="top">
3279
     *                              <td><b>ENT_COMPAT</b></td>
3280
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3281
     *                              </tr>
3282
     *                              <tr valign="top">
3283
     *                              <td><b>ENT_QUOTES</b></td>
3284
     *                              <td>Will convert both double and single quotes.</td>
3285
     *                              </tr>
3286
     *                              <tr valign="top">
3287
     *                              <td><b>ENT_NOQUOTES</b></td>
3288
     *                              <td>Will leave both double and single quotes unconverted.</td>
3289
     *                              </tr>
3290
     *                              <tr valign="top">
3291
     *                              <td><b>ENT_IGNORE</b></td>
3292
     *                              <td>
3293
     *                              Silently discard invalid code unit sequences instead of returning
3294
     *                              an empty string. Using this flag is discouraged as it
3295
     *                              may have security implications.
3296
     *                              </td>
3297
     *                              </tr>
3298
     *                              <tr valign="top">
3299
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3300
     *                              <td>
3301
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3302
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3303
     *                              string.
3304
     *                              </td>
3305
     *                              </tr>
3306
     *                              <tr valign="top">
3307
     *                              <td><b>ENT_DISALLOWED</b></td>
3308
     *                              <td>
3309
     *                              Replace invalid code points for the given document type with a
3310
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3311
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3312
     *                              instance, to ensure the well-formedness of XML documents with
3313
     *                              embedded external content.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_HTML401</b></td>
3318
     *                              <td>
3319
     *                              Handle code as HTML 4.01.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XML1</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XML 1.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_XHTML</b></td>
3330
     *                              <td>
3331
     *                              Handle code as XHTML.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              <tr valign="top">
3335
     *                              <td><b>ENT_HTML5</b></td>
3336
     *                              <td>
3337
     *                              Handle code as HTML 5.
3338
     *                              </td>
3339
     *                              </tr>
3340
     *                              </table>
3341
     *                              </p>
3342
     * @param string $encoding      [optional] <p>
3343
     *                              Defines encoding used in conversion.
3344
     *                              </p>
3345
     *                              <p>
3346
     *                              For the purposes of this function, the encodings
3347
     *                              ISO-8859-1, ISO-8859-15,
3348
     *                              UTF-8, cp866,
3349
     *                              cp1251, cp1252, and
3350
     *                              KOI8-R are effectively equivalent, provided the
3351
     *                              <i>string</i> itself is valid for the encoding, as
3352
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3353
     *                              the same positions in all of these encodings.
3354
     *                              </p>
3355
     * @param bool   $double_encode [optional] <p>
3356
     *                              When <i>double_encode</i> is turned off PHP will not
3357
     *                              encode existing html entities, the default is to convert everything.
3358
     *                              </p>
3359
     *
3360
     * @psalm-pure
3361
     *
3362
     * @return string the converted string.
3363
     *                </p>
3364
     *                <p>
3365
     *                If the input <i>string</i> contains an invalid code unit
3366
     *                sequence within the given <i>encoding</i> an empty string
3367
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3368
     *                <b>ENT_SUBSTITUTE</b> flags are set
3369
     */
3370 8
    public static function htmlspecialchars(
3371
        string $str,
3372
        int $flags = \ENT_COMPAT,
3373
        string $encoding = 'UTF-8',
3374
        bool $double_encode = true
3375
    ): string {
3376 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3377 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3378
        }
3379
3380 8
        return \htmlspecialchars(
3381 8
            $str,
3382 8
            $flags,
3383 8
            $encoding,
3384 8
            $double_encode
3385
        );
3386
    }
3387
3388
    /**
3389
     * Checks whether iconv is available on the server.
3390
     *
3391
     * @psalm-pure
3392
     *
3393
     * @return bool
3394
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3395
     *
3396
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3397
     */
3398
    public static function iconv_loaded(): bool
3399
    {
3400
        return \extension_loaded('iconv');
3401
    }
3402
3403
    /**
3404
     * alias for "UTF8::decimal_to_chr()"
3405
     *
3406
     * @param int|string $int
3407
     *
3408
     * @phpstan-param int|numeric-string $int
3409
     *
3410
     * @psalm-pure
3411
     *
3412
     * @return string
3413
     *
3414
     * @see        UTF8::decimal_to_chr()
3415
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3416
     */
3417 4
    public static function int_to_chr($int): string
3418
    {
3419 4
        return self::decimal_to_chr($int);
3420
    }
3421
3422
    /**
3423
     * Converts Integer to hexadecimal U+xxxx code point representation.
3424
     *
3425
     * INFO: opposite to UTF8::hex_to_int()
3426
     *
3427
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3428
     *
3429
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3430
     * @param string $prefix [optional]
3431
     *
3432
     * @psalm-pure
3433
     *
3434
     * @return string the code point, or empty string on failure
3435
     */
3436 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3437
    {
3438 6
        $hex = \dechex($int);
3439
3440 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3441
3442 6
        return $prefix . $hex . '';
3443
    }
3444
3445
    /**
3446
     * Checks whether intl-char is available on the server.
3447
     *
3448
     * @psalm-pure
3449
     *
3450
     * @return bool
3451
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3452
     *
3453
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3454
     */
3455
    public static function intlChar_loaded(): bool
3456
    {
3457
        return \class_exists('IntlChar');
3458
    }
3459
3460
    /**
3461
     * Checks whether intl is available on the server.
3462
     *
3463
     * @psalm-pure
3464
     *
3465
     * @return bool
3466
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3467
     *
3468
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3469
     */
3470 5
    public static function intl_loaded(): bool
3471
    {
3472 5
        return \extension_loaded('intl');
3473
    }
3474
3475
    /**
3476
     * alias for "UTF8::is_ascii()"
3477
     *
3478
     * @param string $str
3479
     *
3480
     * @psalm-pure
3481
     *
3482
     * @return bool
3483
     *
3484
     * @see        UTF8::is_ascii()
3485
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3486
     */
3487 2
    public static function isAscii(string $str): bool
3488
    {
3489 2
        return ASCII::is_ascii($str);
3490
    }
3491
3492
    /**
3493
     * alias for "UTF8::is_base64()"
3494
     *
3495
     * @param string $str
3496
     *
3497
     * @psalm-pure
3498
     *
3499
     * @return bool
3500
     *
3501
     * @see        UTF8::is_base64()
3502
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3503
     */
3504 2
    public static function isBase64($str): bool
3505
    {
3506 2
        return self::is_base64($str);
3507
    }
3508
3509
    /**
3510
     * alias for "UTF8::is_binary()"
3511
     *
3512
     * @param int|string $str
3513
     * @param bool       $strict
3514
     *
3515
     * @psalm-pure
3516
     *
3517
     * @return bool
3518
     *
3519
     * @see        UTF8::is_binary()
3520
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3521
     */
3522 4
    public static function isBinary($str, bool $strict = false): bool
3523
    {
3524 4
        return self::is_binary($str, $strict);
3525
    }
3526
3527
    /**
3528
     * alias for "UTF8::is_bom()"
3529
     *
3530
     * @param string $utf8_chr
3531
     *
3532
     * @psalm-pure
3533
     *
3534
     * @return bool
3535
     *
3536
     * @see        UTF8::is_bom()
3537
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3538
     */
3539 2
    public static function isBom(string $utf8_chr): bool
3540
    {
3541 2
        return self::is_bom($utf8_chr);
3542
    }
3543
3544
    /**
3545
     * alias for "UTF8::is_html()"
3546
     *
3547
     * @param string $str
3548
     *
3549
     * @psalm-pure
3550
     *
3551
     * @return bool
3552
     *
3553
     * @see        UTF8::is_html()
3554
     * @deprecated <p>please use "UTF8::is_html()"</p>
3555
     */
3556 2
    public static function isHtml(string $str): bool
3557
    {
3558 2
        return self::is_html($str);
3559
    }
3560
3561
    /**
3562
     * alias for "UTF8::is_json()"
3563
     *
3564
     * @param string $str
3565
     *
3566
     * @return bool
3567
     *
3568
     * @see        UTF8::is_json()
3569
     * @deprecated <p>please use "UTF8::is_json()"</p>
3570
     */
3571 1
    public static function isJson(string $str): bool
3572
    {
3573 1
        return self::is_json($str);
3574
    }
3575
3576
    /**
3577
     * alias for "UTF8::is_utf16()"
3578
     *
3579
     * @param string $str
3580
     *
3581
     * @psalm-pure
3582
     *
3583
     * @return false|int
3584
     *                   <strong>false</strong> if is't not UTF16,<br>
3585
     *                   <strong>1</strong> for UTF-16LE,<br>
3586
     *                   <strong>2</strong> for UTF-16BE
3587
     *
3588
     * @see        UTF8::is_utf16()
3589
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3590
     */
3591 2
    public static function isUtf16($str)
3592
    {
3593 2
        return self::is_utf16($str);
3594
    }
3595
3596
    /**
3597
     * alias for "UTF8::is_utf32()"
3598
     *
3599
     * @param string $str
3600
     *
3601
     * @psalm-pure
3602
     *
3603
     * @return false|int
3604
     *                   <strong>false</strong> if is't not UTF16,
3605
     *                   <strong>1</strong> for UTF-32LE,
3606
     *                   <strong>2</strong> for UTF-32BE
3607
     *
3608
     * @see        UTF8::is_utf32()
3609
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3610
     */
3611 2
    public static function isUtf32($str)
3612
    {
3613 2
        return self::is_utf32($str);
3614
    }
3615
3616
    /**
3617
     * alias for "UTF8::is_utf8()"
3618
     *
3619
     * @param string $str
3620
     * @param bool   $strict
3621
     *
3622
     * @psalm-pure
3623
     *
3624
     * @return bool
3625
     *
3626
     * @see        UTF8::is_utf8()
3627
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3628
     */
3629 17
    public static function isUtf8($str, bool $strict = false): bool
3630
    {
3631 17
        return self::is_utf8($str, $strict);
3632
    }
3633
3634
    /**
3635
     * Returns true if the string contains only alphabetic chars, false otherwise.
3636
     *
3637
     * @param string $str <p>The input string.</p>
3638
     *
3639
     * @psalm-pure
3640
     *
3641
     * @return bool
3642
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3643
     */
3644 10
    public static function is_alpha(string $str): bool
3645
    {
3646 10
        if (self::$SUPPORT['mbstring'] === true) {
3647
            /** @noinspection PhpComposerExtensionStubsInspection */
3648 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3649
        }
3650
3651
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3652
    }
3653
3654
    /**
3655
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3656
     *
3657
     * @param string $str <p>The input string.</p>
3658
     *
3659
     * @psalm-pure
3660
     *
3661
     * @return bool
3662
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3663
     */
3664 13
    public static function is_alphanumeric(string $str): bool
3665
    {
3666 13
        if (self::$SUPPORT['mbstring'] === true) {
3667
            /** @noinspection PhpComposerExtensionStubsInspection */
3668 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3669
        }
3670
3671
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3672
    }
3673
3674
    /**
3675
     * Returns true if the string contains only punctuation chars, false otherwise.
3676
     *
3677
     * @param string $str <p>The input string.</p>
3678
     *
3679
     * @psalm-pure
3680
     *
3681
     * @return bool
3682
     *              <p>Whether or not $str contains only punctuation chars.</p>
3683
     */
3684 10
    public static function is_punctuation(string $str): bool
3685
    {
3686 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3687
    }
3688
3689
    /**
3690
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3691
     *
3692
     * @param string $str <p>The input string.</p>
3693
     *
3694
     * @psalm-pure
3695
     *
3696
     * @return bool
3697
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3698
     */
3699 1
    public static function is_printable(string $str): bool
3700
    {
3701 1
        return self::remove_invisible_characters($str) === $str;
3702
    }
3703
3704
    /**
3705
     * Checks if a string is 7 bit ASCII.
3706
     *
3707
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3708
     *
3709
     * @param string $str <p>The string to check.</p>
3710
     *
3711
     * @psalm-pure
3712
     *
3713
     * @return bool
3714
     *              <p>
3715
     *              <strong>true</strong> if it is ASCII<br>
3716
     *              <strong>false</strong> otherwise
3717
     *              </p>
3718
     */
3719 8
    public static function is_ascii(string $str): bool
3720
    {
3721 8
        return ASCII::is_ascii($str);
3722
    }
3723
3724
    /**
3725
     * Returns true if the string is base64 encoded, false otherwise.
3726
     *
3727
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3728
     *
3729
     * @param string|null $str                   <p>The input string.</p>
3730
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3731
     *
3732
     * @psalm-pure
3733
     *
3734
     * @return bool
3735
     *              <p>Whether or not $str is base64 encoded.</p>
3736
     */
3737 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3738
    {
3739
        if (
3740 16
            !$empty_string_is_valid
3741
            &&
3742 16
            $str === ''
3743
        ) {
3744 3
            return false;
3745
        }
3746
3747 15
        if (!\is_string($str)) {
3748 2
            return false;
3749
        }
3750
3751 15
        $base64String = \base64_decode($str, true);
3752
3753 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3754
    }
3755
3756
    /**
3757
     * Check if the input is binary... (is look like a hack).
3758
     *
3759
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3760
     *
3761
     * @param int|string $input
3762
     * @param bool       $strict
3763
     *
3764
     * @psalm-pure
3765
     *
3766
     * @return bool
3767
     */
3768 40
    public static function is_binary($input, bool $strict = false): bool
3769
    {
3770 40
        $input = (string) $input;
3771 40
        if ($input === '') {
3772 10
            return false;
3773
        }
3774
3775 40
        if (\preg_match('~^[01]+$~', $input)) {
3776 13
            return true;
3777
        }
3778
3779 40
        $ext = self::get_file_type($input);
3780 40
        if ($ext['type'] === 'binary') {
3781 7
            return true;
3782
        }
3783
3784 39
        $test_length = \strlen($input);
3785 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3786 39
        if (($test_null_counting / $test_length) > 0.25) {
3787 15
            return true;
3788
        }
3789
3790 35
        if ($strict) {
3791 35
            if (self::$SUPPORT['finfo'] === false) {
3792
                throw new \RuntimeException('ext-fileinfo: is not installed');
3793
            }
3794
3795
            /**
3796
             * @noinspection   PhpComposerExtensionStubsInspection
3797
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3798
             */
3799 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3800 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3801 15
                return true;
3802
            }
3803
        }
3804
3805 31
        return false;
3806
    }
3807
3808
    /**
3809
     * Check if the file is binary.
3810
     *
3811
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3812
     *
3813
     * @param string $file
3814
     *
3815
     * @return bool
3816
     */
3817 6
    public static function is_binary_file($file): bool
3818
    {
3819
        // init
3820 6
        $block = '';
3821
3822 6
        $fp = \fopen($file, 'rb');
3823 6
        if (\is_resource($fp)) {
3824 6
            $block = \fread($fp, 512);
3825 6
            \fclose($fp);
3826
        }
3827
3828 6
        if ($block === '' || $block === false) {
3829 2
            return false;
3830
        }
3831
3832 6
        return self::is_binary($block, true);
3833
    }
3834
3835
    /**
3836
     * Returns true if the string contains only whitespace chars, false otherwise.
3837
     *
3838
     * @param string $str <p>The input string.</p>
3839
     *
3840
     * @psalm-pure
3841
     *
3842
     * @return bool
3843
     *              <p>Whether or not $str contains only whitespace characters.</p>
3844
     */
3845 15
    public static function is_blank(string $str): bool
3846
    {
3847 15
        if (self::$SUPPORT['mbstring'] === true) {
3848
            /** @noinspection PhpComposerExtensionStubsInspection */
3849 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3850
        }
3851
3852
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3853
    }
3854
3855
    /**
3856
     * Checks if the given string is equal to any "Byte Order Mark".
3857
     *
3858
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3859
     *
3860
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3861
     *
3862
     * @param string $str <p>The input string.</p>
3863
     *
3864
     * @psalm-pure
3865
     *
3866
     * @return bool
3867
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3868
     */
3869 2
    public static function is_bom($str): bool
3870
    {
3871
        /** @noinspection PhpUnusedLocalVariableInspection */
3872 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3873 2
            if ($str === $bom_string) {
3874 2
                return true;
3875
            }
3876
        }
3877
3878 2
        return false;
3879
    }
3880
3881
    /**
3882
     * Determine whether the string is considered to be empty.
3883
     *
3884
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3885
     * empty() does not generate a warning if the variable does not exist.
3886
     *
3887
     * @param array|float|int|string $str
3888
     *
3889
     * @psalm-pure
3890
     *
3891
     * @return bool
3892
     *              <p>Whether or not $str is empty().</p>
3893
     */
3894 1
    public static function is_empty($str): bool
3895
    {
3896 1
        return empty($str);
3897
    }
3898
3899
    /**
3900
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3901
     *
3902
     * @param string $str <p>The input string.</p>
3903
     *
3904
     * @psalm-pure
3905
     *
3906
     * @return bool
3907
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3908
     */
3909 13
    public static function is_hexadecimal(string $str): bool
3910
    {
3911 13
        if (self::$SUPPORT['mbstring'] === true) {
3912
            /** @noinspection PhpComposerExtensionStubsInspection */
3913 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3914
        }
3915
3916
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3917
    }
3918
3919
    /**
3920
     * Check if the string contains any HTML tags.
3921
     *
3922
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3923
     *
3924
     * @param string $str <p>The input string.</p>
3925
     *
3926
     * @psalm-pure
3927
     *
3928
     * @return bool
3929
     *              <p>Whether or not $str contains html elements.</p>
3930
     */
3931 3
    public static function is_html(string $str): bool
3932
    {
3933 3
        if ($str === '') {
3934 3
            return false;
3935
        }
3936
3937
        // init
3938 3
        $matches = [];
3939
3940 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3941
3942 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3943
3944 3
        return $matches !== [];
3945
    }
3946
3947
    /**
3948
     * Check if $url is an correct url.
3949
     *
3950
     * @param string $url
3951
     * @param bool   $disallow_localhost
3952
     *
3953
     * @psalm-pure
3954
     *
3955
     * @return bool
3956
     */
3957 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3958
    {
3959 1
        if ($url === '') {
3960 1
            return false;
3961
        }
3962
3963
        // WARNING: keep this as hack protection
3964 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3965 1
            return false;
3966
        }
3967
3968
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3969 1
        if ($disallow_localhost) {
3970 1
            if (self::str_istarts_with_any(
3971 1
                $url,
3972
                [
3973 1
                    'http://localhost',
3974
                    'https://localhost',
3975
                    'http://127.0.0.1',
3976
                    'https://127.0.0.1',
3977
                    'http://::1',
3978
                    'https://::1',
3979
                ]
3980
            )) {
3981 1
                return false;
3982
            }
3983
3984 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3985
            /** @noinspection BypassedUrlValidationInspection */
3986 1
            if (\preg_match($regex, $url)) {
3987 1
                return false;
3988
            }
3989
        }
3990
3991
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3992
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3993 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3994
        /** @noinspection BypassedUrlValidationInspection */
3995 1
        if (\preg_match($regex, $url)) {
3996 1
            return true;
3997
        }
3998
3999
        /** @noinspection BypassedUrlValidationInspection */
4000 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4001
    }
4002
4003
    /**
4004
     * Try to check if "$str" is a JSON-string.
4005
     *
4006
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4007
     *
4008
     * @param string $str                                    <p>The input string.</p>
4009
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4010
     *                                                       results.</p>
4011
     *
4012
     * @return bool
4013
     *              <p>Whether or not the $str is in JSON format.</p>
4014
     */
4015 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4016
    {
4017 42
        if ($str === '') {
4018 4
            return false;
4019
        }
4020
4021 40
        if (self::$SUPPORT['json'] === false) {
4022
            throw new \RuntimeException('ext-json: is not installed');
4023
        }
4024
4025 40
        $jsonOrNull = self::json_decode($str);
4026 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4027 18
            return false;
4028
        }
4029
4030
        if (
4031 24
            $only_array_or_object_results_are_valid
4032
            &&
4033 24
            !\is_object($jsonOrNull)
4034
            &&
4035 24
            !\is_array($jsonOrNull)
4036
        ) {
4037 5
            return false;
4038
        }
4039
4040
        /** @noinspection PhpComposerExtensionStubsInspection */
4041 19
        return \json_last_error() === \JSON_ERROR_NONE;
4042
    }
4043
4044
    /**
4045
     * @param string $str <p>The input string.</p>
4046
     *
4047
     * @psalm-pure
4048
     *
4049
     * @return bool
4050
     *              <p>Whether or not $str contains only lowercase chars.</p>
4051
     */
4052 8
    public static function is_lowercase(string $str): bool
4053
    {
4054 8
        if (self::$SUPPORT['mbstring'] === true) {
4055
            /** @noinspection PhpComposerExtensionStubsInspection */
4056 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4057
        }
4058
4059
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4060
    }
4061
4062
    /**
4063
     * Returns true if the string is serialized, false otherwise.
4064
     *
4065
     * @param string $str <p>The input string.</p>
4066
     *
4067
     * @psalm-pure
4068
     *
4069
     * @return bool
4070
     *              <p>Whether or not $str is serialized.</p>
4071
     */
4072 7
    public static function is_serialized(string $str): bool
4073
    {
4074 7
        if ($str === '') {
4075 1
            return false;
4076
        }
4077
4078
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4079
        /** @noinspection UnserializeExploitsInspection */
4080 6
        return $str === 'b:0;'
4081
               ||
4082 6
               @\unserialize($str) !== false;
4083
    }
4084
4085
    /**
4086
     * Returns true if the string contains only lower case chars, false
4087
     * otherwise.
4088
     *
4089
     * @param string $str <p>The input string.</p>
4090
     *
4091
     * @psalm-pure
4092
     *
4093
     * @return bool
4094
     *              <p>Whether or not $str contains only lower case characters.</p>
4095
     */
4096 8
    public static function is_uppercase(string $str): bool
4097
    {
4098 8
        if (self::$SUPPORT['mbstring'] === true) {
4099
            /** @noinspection PhpComposerExtensionStubsInspection */
4100 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4101
        }
4102
4103
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4104
    }
4105
4106
    /**
4107
     * Check if the string is UTF-16.
4108
     *
4109
     * EXAMPLE: <code>
4110
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4111
     * //
4112
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4113
     * //
4114
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4115
     * </code>
4116
     *
4117
     * @param string $str                       <p>The input string.</p>
4118
     * @param bool   $check_if_string_is_binary
4119
     *
4120
     * @psalm-pure
4121
     *
4122
     * @return false|int
4123
     *                   <strong>false</strong> if is't not UTF-16,<br>
4124
     *                   <strong>1</strong> for UTF-16LE,<br>
4125
     *                   <strong>2</strong> for UTF-16BE
4126
     */
4127 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4128
    {
4129
        // init
4130 22
        $str = (string) $str;
4131 22
        $str_chars = [];
4132
4133
        if (
4134 22
            $check_if_string_is_binary
4135
            &&
4136 22
            !self::is_binary($str, true)
4137
        ) {
4138 2
            return false;
4139
        }
4140
4141 22
        if (self::$SUPPORT['mbstring'] === false) {
4142
            /**
4143
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4144
             */
4145 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4146
        }
4147
4148 22
        $str = self::remove_bom($str);
4149
4150 22
        $maybe_utf16le = 0;
4151 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4152 22
        if ($test) {
4153 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4154 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4155 15
            if ($test3 === $test) {
4156
                /**
4157
                 * @psalm-suppress RedundantCondition
4158
                 */
4159 15
                if ($str_chars === []) {
4160 15
                    $str_chars = self::count_chars($str, true, false);
4161
                }
4162 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4163 15
                    if (\in_array($test3char, $str_chars, true)) {
4164 15
                        ++$maybe_utf16le;
4165
                    }
4166
                }
4167 15
                unset($test3charEmpty);
4168
            }
4169
        }
4170
4171 22
        $maybe_utf16be = 0;
4172 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4173 22
        if ($test) {
4174 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4175 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4176 15
            if ($test3 === $test) {
4177 15
                if ($str_chars === []) {
4178 7
                    $str_chars = self::count_chars($str, true, false);
4179
                }
4180 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4181 15
                    if (\in_array($test3char, $str_chars, true)) {
4182 15
                        ++$maybe_utf16be;
4183
                    }
4184
                }
4185 15
                unset($test3charEmpty);
4186
            }
4187
        }
4188
4189 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4190 7
            if ($maybe_utf16le > $maybe_utf16be) {
4191 5
                return 1;
4192
            }
4193
4194 6
            return 2;
4195
        }
4196
4197 18
        return false;
4198
    }
4199
4200
    /**
4201
     * Check if the string is UTF-32.
4202
     *
4203
     * EXAMPLE: <code>
4204
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4205
     * //
4206
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4207
     * //
4208
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4209
     * </code>
4210
     *
4211
     * @param string $str                       <p>The input string.</p>
4212
     * @param bool   $check_if_string_is_binary
4213
     *
4214
     * @psalm-pure
4215
     *
4216
     * @return false|int
4217
     *                   <strong>false</strong> if is't not UTF-32,<br>
4218
     *                   <strong>1</strong> for UTF-32LE,<br>
4219
     *                   <strong>2</strong> for UTF-32BE
4220
     */
4221 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4222
    {
4223
        // init
4224 20
        $str = (string) $str;
4225 20
        $str_chars = [];
4226
4227
        if (
4228 20
            $check_if_string_is_binary
4229
            &&
4230 20
            !self::is_binary($str, true)
4231
        ) {
4232 2
            return false;
4233
        }
4234
4235 20
        if (self::$SUPPORT['mbstring'] === false) {
4236
            /**
4237
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4238
             */
4239 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4240
        }
4241
4242 20
        $str = self::remove_bom($str);
4243
4244 20
        $maybe_utf32le = 0;
4245 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4246 20
        if ($test) {
4247 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4248 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4249 13
            if ($test3 === $test) {
4250
                /**
4251
                 * @psalm-suppress RedundantCondition
4252
                 */
4253 13
                if ($str_chars === []) {
4254 13
                    $str_chars = self::count_chars($str, true, false);
4255
                }
4256 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4257 13
                    if (\in_array($test3char, $str_chars, true)) {
4258 13
                        ++$maybe_utf32le;
4259
                    }
4260
                }
4261 13
                unset($test3charEmpty);
4262
            }
4263
        }
4264
4265 20
        $maybe_utf32be = 0;
4266 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4267 20
        if ($test) {
4268 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4269 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4270 13
            if ($test3 === $test) {
4271 13
                if ($str_chars === []) {
4272 7
                    $str_chars = self::count_chars($str, true, false);
4273
                }
4274 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4275 13
                    if (\in_array($test3char, $str_chars, true)) {
4276 13
                        ++$maybe_utf32be;
4277
                    }
4278
                }
4279 13
                unset($test3charEmpty);
4280
            }
4281
        }
4282
4283 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4284 3
            if ($maybe_utf32le > $maybe_utf32be) {
4285 2
                return 1;
4286
            }
4287
4288 3
            return 2;
4289
        }
4290
4291 20
        return false;
4292
    }
4293
4294
    /**
4295
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4296
     *
4297
     * EXAMPLE: <code>
4298
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4299
     * //
4300
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4301
     * </code>
4302
     *
4303
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4304
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4305
     *
4306
     * @psalm-pure
4307
     *
4308
     * @return bool
4309
     */
4310 83
    public static function is_utf8($str, bool $strict = false): bool
4311
    {
4312 83
        if (\is_array($str)) {
4313 2
            foreach ($str as &$v) {
4314 2
                if (!self::is_utf8($v, $strict)) {
4315 2
                    return false;
4316
                }
4317
            }
4318
4319
            return true;
4320
        }
4321
4322 83
        return self::is_utf8_string((string) $str, $strict);
4323
    }
4324
4325
    /**
4326
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4327
     * Decodes a JSON string
4328
     *
4329
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4330
     *
4331
     * @see http://php.net/manual/en/function.json-decode.php
4332
     *
4333
     * @param string $json    <p>
4334
     *                        The <i>json</i> string being decoded.
4335
     *                        </p>
4336
     *                        <p>
4337
     *                        This function only works with UTF-8 encoded strings.
4338
     *                        </p>
4339
     *                        <p>PHP implements a superset of
4340
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4341
     *                        only supports these values when they are nested inside an array or an object.
4342
     *                        </p>
4343
     * @param bool   $assoc   [optional] <p>
4344
     *                        When <b>TRUE</b>, returned objects will be converted into
4345
     *                        associative arrays.
4346
     *                        </p>
4347
     * @param int    $depth   [optional] <p>
4348
     *                        User specified recursion depth.
4349
     *                        </p>
4350
     * @param int    $options [optional] <p>
4351
     *                        Bitmask of JSON decode options. Currently only
4352
     *                        <b>JSON_BIGINT_AS_STRING</b>
4353
     *                        is supported (default is to cast large integers as floats)
4354
     *                        </p>
4355
     *
4356
     * @psalm-pure
4357
     *
4358
     * @return mixed
4359
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4360
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4361
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4362
     *               is deeper than the recursion limit.</p>
4363
     */
4364 43
    public static function json_decode(
4365
        string $json,
4366
        bool $assoc = false,
4367
        int $depth = 512,
4368
        int $options = 0
4369
    ) {
4370 43
        $json = self::filter($json);
4371
4372 43
        if (self::$SUPPORT['json'] === false) {
4373
            throw new \RuntimeException('ext-json: is not installed');
4374
        }
4375
4376
        /** @noinspection PhpComposerExtensionStubsInspection */
4377 43
        return \json_decode($json, $assoc, $depth, $options);
4378
    }
4379
4380
    /**
4381
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4382
     * Returns the JSON representation of a value.
4383
     *
4384
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4385
     *
4386
     * @see http://php.net/manual/en/function.json-encode.php
4387
     *
4388
     * @param mixed $value   <p>
4389
     *                       The <i>value</i> being encoded. Can be any type except
4390
     *                       a resource.
4391
     *                       </p>
4392
     *                       <p>
4393
     *                       All string data must be UTF-8 encoded.
4394
     *                       </p>
4395
     *                       <p>PHP implements a superset of
4396
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4397
     *                       only supports these values when they are nested inside an array or an object.
4398
     *                       </p>
4399
     * @param int   $options [optional] <p>
4400
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4401
     *                       <b>JSON_HEX_TAG</b>,
4402
     *                       <b>JSON_HEX_AMP</b>,
4403
     *                       <b>JSON_HEX_APOS</b>,
4404
     *                       <b>JSON_NUMERIC_CHECK</b>,
4405
     *                       <b>JSON_PRETTY_PRINT</b>,
4406
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4407
     *                       <b>JSON_FORCE_OBJECT</b>,
4408
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4409
     *                       constants is described on
4410
     *                       the JSON constants page.
4411
     *                       </p>
4412
     * @param int   $depth   [optional] <p>
4413
     *                       Set the maximum depth. Must be greater than zero.
4414
     *                       </p>
4415
     *
4416
     * @psalm-pure
4417
     *
4418
     * @return false|string
4419
     *                      A JSON encoded <strong>string</strong> on success or<br>
4420
     *                      <strong>FALSE</strong> on failure
4421
     */
4422 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4423
    {
4424 5
        $value = self::filter($value);
4425
4426 5
        if (self::$SUPPORT['json'] === false) {
4427
            throw new \RuntimeException('ext-json: is not installed');
4428
        }
4429
4430
        /** @noinspection PhpComposerExtensionStubsInspection */
4431 5
        return \json_encode($value, $options, $depth);
4432
    }
4433
4434
    /**
4435
     * Checks whether JSON is available on the server.
4436
     *
4437
     * @psalm-pure
4438
     *
4439
     * @return bool
4440
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4441
     *
4442
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4443
     */
4444
    public static function json_loaded(): bool
4445
    {
4446
        return \function_exists('json_decode');
4447
    }
4448
4449
    /**
4450
     * Makes string's first char lowercase.
4451
     *
4452
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4453
     *
4454
     * @param string      $str                           <p>The input string</p>
4455
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4456
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4457
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4458
     *                                                   tr</p>
4459
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4460
     *                                                   -> ß</p>
4461
     *
4462
     * @psalm-pure
4463
     *
4464
     * @return string the resulting string
4465
     */
4466 46
    public static function lcfirst(
4467
        string $str,
4468
        string $encoding = 'UTF-8',
4469
        bool $clean_utf8 = false,
4470
        string $lang = null,
4471
        bool $try_to_keep_the_string_length = false
4472
    ): string {
4473 46
        if ($clean_utf8) {
4474
            $str = self::clean($str);
4475
        }
4476
4477 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4478
4479 46
        if ($encoding === 'UTF-8') {
4480 43
            $str_part_two = (string) \mb_substr($str, 1);
4481
4482 43
            if ($use_mb_functions) {
4483 43
                $str_part_one = \mb_strtolower(
4484 43
                    (string) \mb_substr($str, 0, 1)
4485
                );
4486
            } else {
4487
                $str_part_one = self::strtolower(
4488
                    (string) \mb_substr($str, 0, 1),
4489
                    $encoding,
4490
                    false,
4491
                    $lang,
4492 43
                    $try_to_keep_the_string_length
4493
                );
4494
            }
4495
        } else {
4496 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4497
4498 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4499
4500 3
            $str_part_one = self::strtolower(
4501 3
                (string) self::substr($str, 0, 1, $encoding),
4502 3
                $encoding,
4503 3
                false,
4504 3
                $lang,
4505 3
                $try_to_keep_the_string_length
4506
            );
4507
        }
4508
4509 46
        return $str_part_one . $str_part_two;
4510
    }
4511
4512
    /**
4513
     * alias for "UTF8::lcfirst()"
4514
     *
4515
     * @param string      $str
4516
     * @param string      $encoding
4517
     * @param bool        $clean_utf8
4518
     * @param string|null $lang
4519
     * @param bool        $try_to_keep_the_string_length
4520
     *
4521
     * @psalm-pure
4522
     *
4523
     * @return string
4524
     *
4525
     * @see        UTF8::lcfirst()
4526
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4527
     */
4528 2
    public static function lcword(
4529
        string $str,
4530
        string $encoding = 'UTF-8',
4531
        bool $clean_utf8 = false,
4532
        string $lang = null,
4533
        bool $try_to_keep_the_string_length = false
4534
    ): string {
4535 2
        return self::lcfirst(
4536 2
            $str,
4537 2
            $encoding,
4538 2
            $clean_utf8,
4539 2
            $lang,
4540 2
            $try_to_keep_the_string_length
4541
        );
4542
    }
4543
4544
    /**
4545
     * Lowercase for all words in the string.
4546
     *
4547
     * @param string      $str                           <p>The input string.</p>
4548
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4549
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4550
     *                                                   not start a new word.</p>
4551
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4552
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4553
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4554
     *                                                   tr</p>
4555
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4556
     *                                                   -> ß</p>
4557
     *
4558
     * @psalm-pure
4559
     *
4560
     * @return string
4561
     */
4562 2
    public static function lcwords(
4563
        string $str,
4564
        array $exceptions = [],
4565
        string $char_list = '',
4566
        string $encoding = 'UTF-8',
4567
        bool $clean_utf8 = false,
4568
        string $lang = null,
4569
        bool $try_to_keep_the_string_length = false
4570
    ): string {
4571 2
        if (!$str) {
4572 2
            return '';
4573
        }
4574
4575 2
        $words = self::str_to_words($str, $char_list);
4576 2
        $use_exceptions = $exceptions !== [];
4577
4578 2
        $words_str = '';
4579 2
        foreach ($words as &$word) {
4580 2
            if (!$word) {
4581 2
                continue;
4582
            }
4583
4584
            if (
4585 2
                !$use_exceptions
4586
                ||
4587 2
                !\in_array($word, $exceptions, true)
4588
            ) {
4589 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4590
            } else {
4591 2
                $words_str .= $word;
4592
            }
4593
        }
4594
4595 2
        return $words_str;
4596
    }
4597
4598
    /**
4599
     * alias for "UTF8::lcfirst()"
4600
     *
4601
     * @param string      $str
4602
     * @param string      $encoding
4603
     * @param bool        $clean_utf8
4604
     * @param string|null $lang
4605
     * @param bool        $try_to_keep_the_string_length
4606
     *
4607
     * @psalm-pure
4608
     *
4609
     * @return string
4610
     *
4611
     * @see        UTF8::lcfirst()
4612
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4613
     */
4614 5
    public static function lowerCaseFirst(
4615
        string $str,
4616
        string $encoding = 'UTF-8',
4617
        bool $clean_utf8 = false,
4618
        string $lang = null,
4619
        bool $try_to_keep_the_string_length = false
4620
    ): string {
4621 5
        return self::lcfirst(
4622 5
            $str,
4623 5
            $encoding,
4624 5
            $clean_utf8,
4625 5
            $lang,
4626 5
            $try_to_keep_the_string_length
4627
        );
4628
    }
4629
4630
    /**
4631
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4632
     *
4633
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4634
     *
4635
     * @param string      $str   <p>The string to be trimmed</p>
4636
     * @param string|null $chars <p>Optional characters to be stripped</p>
4637
     *
4638
     * @psalm-pure
4639
     *
4640
     * @return string the string with unwanted characters stripped from the left
4641
     */
4642 23
    public static function ltrim(string $str = '', string $chars = null): string
4643
    {
4644 23
        if ($str === '') {
4645 3
            return '';
4646
        }
4647
4648 22
        if (self::$SUPPORT['mbstring'] === true) {
4649 22
            if ($chars !== null) {
4650
                /** @noinspection PregQuoteUsageInspection */
4651 11
                $chars = \preg_quote($chars);
4652 11
                $pattern = "^[${chars}]+";
4653
            } else {
4654 14
                $pattern = '^[\\s]+';
4655
            }
4656
4657
            /** @noinspection PhpComposerExtensionStubsInspection */
4658 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4659
        }
4660
4661
        if ($chars !== null) {
4662
            $chars = \preg_quote($chars, '/');
4663
            $pattern = "^[${chars}]+";
4664
        } else {
4665
            $pattern = '^[\\s]+';
4666
        }
4667
4668
        return self::regex_replace($str, $pattern, '');
4669
    }
4670
4671
    /**
4672
     * Returns the UTF-8 character with the maximum code point in the given data.
4673
     *
4674
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4675
     *
4676
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4677
     *
4678
     * @psalm-pure
4679
     *
4680
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4681
     */
4682 2
    public static function max($arg)
4683
    {
4684 2
        if (\is_array($arg)) {
4685 2
            $arg = \implode('', $arg);
4686
        }
4687
4688 2
        $codepoints = self::codepoints($arg);
4689 2
        if ($codepoints === []) {
4690 2
            return null;
4691
        }
4692
4693 2
        $codepoint_max = \max($codepoints);
4694
4695 2
        return self::chr((int) $codepoint_max);
4696
    }
4697
4698
    /**
4699
     * Calculates and returns the maximum number of bytes taken by any
4700
     * UTF-8 encoded character in the given string.
4701
     *
4702
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4703
     *
4704
     * @param string $str <p>The original Unicode string.</p>
4705
     *
4706
     * @psalm-pure
4707
     *
4708
     * @return int
4709
     *             <p>Max byte lengths of the given chars.</p>
4710
     */
4711 2
    public static function max_chr_width(string $str): int
4712
    {
4713 2
        $bytes = self::chr_size_list($str);
4714 2
        if ($bytes !== []) {
4715 2
            return (int) \max($bytes);
4716
        }
4717
4718 2
        return 0;
4719
    }
4720
4721
    /**
4722
     * Checks whether mbstring is available on the server.
4723
     *
4724
     * @psalm-pure
4725
     *
4726
     * @return bool
4727
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4728
     *
4729
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4730
     */
4731 28
    public static function mbstring_loaded(): bool
4732
    {
4733 28
        return \extension_loaded('mbstring');
4734
    }
4735
4736
    /**
4737
     * Returns the UTF-8 character with the minimum code point in the given data.
4738
     *
4739
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4740
     *
4741
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4742
     *
4743
     * @psalm-pure
4744
     *
4745
     * @return string|null
4746
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4747
     */
4748 2
    public static function min($arg)
4749
    {
4750 2
        if (\is_array($arg)) {
4751 2
            $arg = \implode('', $arg);
4752
        }
4753
4754 2
        $codepoints = self::codepoints($arg);
4755 2
        if ($codepoints === []) {
4756 2
            return null;
4757
        }
4758
4759 2
        $codepoint_min = \min($codepoints);
4760
4761 2
        return self::chr((int) $codepoint_min);
4762
    }
4763
4764
    /**
4765
     * alias for "UTF8::normalize_encoding()"
4766
     *
4767
     * @param mixed $encoding
4768
     * @param mixed $fallback
4769
     *
4770
     * @psalm-pure
4771
     *
4772
     * @return mixed
4773
     *
4774
     * @see        UTF8::normalize_encoding()
4775
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4776
     */
4777 2
    public static function normalizeEncoding($encoding, $fallback = '')
4778
    {
4779 2
        return self::normalize_encoding($encoding, $fallback);
4780
    }
4781
4782
    /**
4783
     * Normalize the encoding-"name" input.
4784
     *
4785
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4786
     *
4787
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4788
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4789
     *
4790
     * @psalm-pure
4791
     *
4792
     * @return mixed|string
4793
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4794
     *
4795
     * @template TNormalizeEncodingFallback
4796
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4797
     * @phpstan-return string|TNormalizeEncodingFallback
4798
     */
4799 339
    public static function normalize_encoding($encoding, $fallback = '')
4800
    {
4801
        /**
4802
         * @psalm-suppress ImpureStaticVariable
4803
         *
4804
         * @var array<string,string>
4805
         */
4806 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4807
4808
        // init
4809 339
        $encoding = (string) $encoding;
4810
4811 339
        if (!$encoding) {
4812 290
            return $fallback;
4813
        }
4814
4815
        if (
4816 53
            $encoding === 'UTF-8'
4817
            ||
4818 53
            $encoding === 'UTF8'
4819
        ) {
4820 29
            return 'UTF-8';
4821
        }
4822
4823
        if (
4824 44
            $encoding === '8BIT'
4825
            ||
4826 44
            $encoding === 'BINARY'
4827
        ) {
4828
            return 'CP850';
4829
        }
4830
4831
        if (
4832 44
            $encoding === 'HTML'
4833
            ||
4834 44
            $encoding === 'HTML-ENTITIES'
4835
        ) {
4836 2
            return 'HTML-ENTITIES';
4837
        }
4838
4839
        if (
4840 44
            $encoding === 'ISO'
4841
            ||
4842 44
            $encoding === 'ISO-8859-1'
4843
        ) {
4844 41
            return 'ISO-8859-1';
4845
        }
4846
4847
        if (
4848 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4849
            ||
4850 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4851
        ) {
4852
            return $fallback;
4853
        }
4854
4855 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4856 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4857
        }
4858
4859 5
        if (self::$ENCODINGS === null) {
4860 1
            self::$ENCODINGS = self::getData('encodings');
4861
        }
4862
4863 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4864 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4865
4866 3
            return $encoding;
4867
        }
4868
4869 4
        $encoding_original = $encoding;
4870 4
        $encoding = \strtoupper($encoding);
4871 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4872
4873
        $equivalences = [
4874 4
            'ISO8859'     => 'ISO-8859-1',
4875
            'ISO88591'    => 'ISO-8859-1',
4876
            'ISO'         => 'ISO-8859-1',
4877
            'LATIN'       => 'ISO-8859-1',
4878
            'LATIN1'      => 'ISO-8859-1', // Western European
4879
            'ISO88592'    => 'ISO-8859-2',
4880
            'LATIN2'      => 'ISO-8859-2', // Central European
4881
            'ISO88593'    => 'ISO-8859-3',
4882
            'LATIN3'      => 'ISO-8859-3', // Southern European
4883
            'ISO88594'    => 'ISO-8859-4',
4884
            'LATIN4'      => 'ISO-8859-4', // Northern European
4885
            'ISO88595'    => 'ISO-8859-5',
4886
            'ISO88596'    => 'ISO-8859-6', // Greek
4887
            'ISO88597'    => 'ISO-8859-7',
4888
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4889
            'ISO88599'    => 'ISO-8859-9',
4890
            'LATIN5'      => 'ISO-8859-9', // Turkish
4891
            'ISO885911'   => 'ISO-8859-11',
4892
            'TIS620'      => 'ISO-8859-11', // Thai
4893
            'ISO885910'   => 'ISO-8859-10',
4894
            'LATIN6'      => 'ISO-8859-10', // Nordic
4895
            'ISO885913'   => 'ISO-8859-13',
4896
            'LATIN7'      => 'ISO-8859-13', // Baltic
4897
            'ISO885914'   => 'ISO-8859-14',
4898
            'LATIN8'      => 'ISO-8859-14', // Celtic
4899
            'ISO885915'   => 'ISO-8859-15',
4900
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4901
            'ISO885916'   => 'ISO-8859-16',
4902
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4903
            'CP1250'      => 'WINDOWS-1250',
4904
            'WIN1250'     => 'WINDOWS-1250',
4905
            'WINDOWS1250' => 'WINDOWS-1250',
4906
            'CP1251'      => 'WINDOWS-1251',
4907
            'WIN1251'     => 'WINDOWS-1251',
4908
            'WINDOWS1251' => 'WINDOWS-1251',
4909
            'CP1252'      => 'WINDOWS-1252',
4910
            'WIN1252'     => 'WINDOWS-1252',
4911
            'WINDOWS1252' => 'WINDOWS-1252',
4912
            'CP1253'      => 'WINDOWS-1253',
4913
            'WIN1253'     => 'WINDOWS-1253',
4914
            'WINDOWS1253' => 'WINDOWS-1253',
4915
            'CP1254'      => 'WINDOWS-1254',
4916
            'WIN1254'     => 'WINDOWS-1254',
4917
            'WINDOWS1254' => 'WINDOWS-1254',
4918
            'CP1255'      => 'WINDOWS-1255',
4919
            'WIN1255'     => 'WINDOWS-1255',
4920
            'WINDOWS1255' => 'WINDOWS-1255',
4921
            'CP1256'      => 'WINDOWS-1256',
4922
            'WIN1256'     => 'WINDOWS-1256',
4923
            'WINDOWS1256' => 'WINDOWS-1256',
4924
            'CP1257'      => 'WINDOWS-1257',
4925
            'WIN1257'     => 'WINDOWS-1257',
4926
            'WINDOWS1257' => 'WINDOWS-1257',
4927
            'CP1258'      => 'WINDOWS-1258',
4928
            'WIN1258'     => 'WINDOWS-1258',
4929
            'WINDOWS1258' => 'WINDOWS-1258',
4930
            'UTF16'       => 'UTF-16',
4931
            'UTF32'       => 'UTF-32',
4932
            'UTF8'        => 'UTF-8',
4933
            'UTF'         => 'UTF-8',
4934
            'UTF7'        => 'UTF-7',
4935
            '8BIT'        => 'CP850',
4936
            'BINARY'      => 'CP850',
4937
        ];
4938
4939 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4940 3
            $encoding = $equivalences[$encoding_upper_helper];
4941
        }
4942
4943 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4944
4945 4
        return $encoding;
4946
    }
4947
4948
    /**
4949
     * Standardize line ending to unix-like.
4950
     *
4951
     * @param string          $str      <p>The input string.</p>
4952
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4953
     *                                  here.</p>
4954
     *
4955
     * @psalm-pure
4956
     *
4957
     * @return string
4958
     *                <p>A string with normalized line ending.</p>
4959
     */
4960 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4961
    {
4962 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4963
    }
4964
4965
    /**
4966
     * Normalize some MS Word special characters.
4967
     *
4968
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4969
     *
4970
     * @param string $str <p>The string to be normalized.</p>
4971
     *
4972
     * @psalm-pure
4973
     *
4974
     * @return string
4975
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4976
     */
4977 10
    public static function normalize_msword(string $str): string
4978
    {
4979 10
        return ASCII::normalize_msword($str);
4980
    }
4981
4982
    /**
4983
     * Normalize the whitespace.
4984
     *
4985
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4986
     *
4987
     * @param string $str                        <p>The string to be normalized.</p>
4988
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4989
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4990
     *                                           bidirectional text chars.</p>
4991
     *
4992
     * @psalm-pure
4993
     *
4994
     * @return string
4995
     *                <p>A string with normalized whitespace.</p>
4996
     */
4997 61
    public static function normalize_whitespace(
4998
        string $str,
4999
        bool $keep_non_breaking_space = false,
5000
        bool $keep_bidi_unicode_controls = false
5001
    ): string {
5002 61
        return ASCII::normalize_whitespace(
5003 61
            $str,
5004 61
            $keep_non_breaking_space,
5005 61
            $keep_bidi_unicode_controls
5006
        );
5007
    }
5008
5009
    /**
5010
     * Calculates Unicode code point of the given UTF-8 encoded character.
5011
     *
5012
     * INFO: opposite to UTF8::chr()
5013
     *
5014
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5015
     *
5016
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5017
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5018
     *
5019
     * @psalm-pure
5020
     *
5021
     * @return int
5022
     *             <p>Unicode code point of the given character,<br>
5023
     *             0 on invalid UTF-8 byte sequence</p>
5024
     */
5025 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5026
    {
5027
        /**
5028
         * @psalm-suppress ImpureStaticVariable
5029
         *
5030
         * @var array<string,int>
5031
         */
5032 27
        static $CHAR_CACHE = [];
5033
5034
        // init
5035 27
        $chr = (string) $chr;
5036
5037 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5038 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5039
        }
5040
5041 27
        $cache_key = $chr . '_' . $encoding;
5042 27
        if (isset($CHAR_CACHE[$cache_key])) {
5043 27
            return $CHAR_CACHE[$cache_key];
5044
        }
5045
5046
        // check again, if it's still not UTF-8
5047 11
        if ($encoding !== 'UTF-8') {
5048 3
            $chr = self::encode($encoding, $chr);
5049
        }
5050
5051 11
        if (self::$ORD === null) {
5052
            self::$ORD = self::getData('ord');
5053
        }
5054
5055 11
        if (isset(self::$ORD[$chr])) {
5056 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5057
        }
5058
5059
        //
5060
        // fallback via "IntlChar"
5061
        //
5062
5063 6
        if (self::$SUPPORT['intlChar'] === true) {
5064
            /** @noinspection PhpComposerExtensionStubsInspection */
5065 5
            $code = \IntlChar::ord($chr);
5066 5
            if ($code) {
5067 5
                return $CHAR_CACHE[$cache_key] = $code;
5068
            }
5069
        }
5070
5071
        //
5072
        // fallback via vanilla php
5073
        //
5074
5075
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5076 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5077
        /** @noinspection OffsetOperationsInspection */
5078 1
        $code = $chr ? $chr[1] : 0;
5079
5080
        /** @noinspection OffsetOperationsInspection */
5081 1
        if ($code >= 0xF0 && isset($chr[4])) {
5082
            /** @noinspection UnnecessaryCastingInspection */
5083
            /** @noinspection OffsetOperationsInspection */
5084
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5085
        }
5086
5087
        /** @noinspection OffsetOperationsInspection */
5088 1
        if ($code >= 0xE0 && isset($chr[3])) {
5089
            /** @noinspection UnnecessaryCastingInspection */
5090
            /** @noinspection OffsetOperationsInspection */
5091 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5092
        }
5093
5094
        /** @noinspection OffsetOperationsInspection */
5095 1
        if ($code >= 0xC0 && isset($chr[2])) {
5096
            /** @noinspection UnnecessaryCastingInspection */
5097
            /** @noinspection OffsetOperationsInspection */
5098 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5099
        }
5100
5101
        return $CHAR_CACHE[$cache_key] = $code;
5102
    }
5103
5104
    /**
5105
     * Parses the string into an array (into the the second parameter).
5106
     *
5107
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5108
     *          if the second parameter is not set!
5109
     *
5110
     * EXAMPLE: <code>
5111
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5112
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5113
     * </code>
5114
     *
5115
     * @see http://php.net/manual/en/function.parse-str.php
5116
     *
5117
     * @param string $str        <p>The input string.</p>
5118
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5119
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5120
     *
5121
     * @psalm-pure
5122
     *
5123
     * @return bool
5124
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5125
     */
5126 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5127
    {
5128 2
        if ($clean_utf8) {
5129 2
            $str = self::clean($str);
5130
        }
5131
5132 2
        if (self::$SUPPORT['mbstring'] === true) {
5133 2
            $return = \mb_parse_str($str, $result);
5134
5135 2
            return $return !== false && $result !== [];
5136
        }
5137
5138
        /**
5139
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5140
         */
5141
        \parse_str($str, $result);
5142
5143
        return $result !== [];
5144
    }
5145
5146
    /**
5147
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5148
     *
5149
     * @psalm-pure
5150
     *
5151
     * @return bool
5152
     *              <p>
5153
     *              <strong>true</strong> if support is available,<br>
5154
     *              <strong>false</strong> otherwise
5155
     *              </p>
5156
     */
5157
    public static function pcre_utf8_support(): bool
5158
    {
5159
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5160
        return (bool) @\preg_match('//u', '');
5161
    }
5162
5163
    /**
5164
     * Create an array containing a range of UTF-8 characters.
5165
     *
5166
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5167
     *
5168
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5169
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5170
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5171
     *                              "is_numeric"</p>
5172
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5173
     * @param float|int  $step      [optional] <p>
5174
     *                              If a step value is given, it will be used as the
5175
     *                              increment between elements in the sequence. step
5176
     *                              should be given as a positive number. If not specified,
5177
     *                              step will default to 1.
5178
     *                              </p>
5179
     *
5180
     * @psalm-pure
5181
     *
5182
     * @return string[]
5183
     */
5184 2
    public static function range(
5185
        $var1,
5186
        $var2,
5187
        bool $use_ctype = true,
5188
        string $encoding = 'UTF-8',
5189
        $step = 1
5190
    ): array {
5191 2
        if (!$var1 || !$var2) {
5192 2
            return [];
5193
        }
5194
5195 2
        if ($step !== 1) {
5196
            /**
5197
             * @psalm-suppress RedundantConditionGivenDocblockType
5198
             * @psalm-suppress DocblockTypeContradiction
5199
             */
5200 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5201
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5202
            }
5203
5204
            /**
5205
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5206
             */
5207 1
            if ($step <= 0) {
5208
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5209
            }
5210
        }
5211
5212 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5213
            throw new \RuntimeException('ext-ctype: is not installed');
5214
        }
5215
5216 2
        $is_digit = false;
5217 2
        $is_xdigit = false;
5218
5219
        /** @noinspection PhpComposerExtensionStubsInspection */
5220 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5221 2
            $is_digit = true;
5222 2
            $start = (int) $var1;
5223 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5224
            $is_xdigit = true;
5225
            $start = (int) self::hex_to_int((string) $var1);
5226 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5227 1
            $start = (int) $var1;
5228
        } else {
5229 2
            $start = self::ord((string) $var1);
5230
        }
5231
5232 2
        if (!$start) {
5233
            return [];
5234
        }
5235
5236 2
        if ($is_digit) {
5237 2
            $end = (int) $var2;
5238 2
        } elseif ($is_xdigit) {
5239
            $end = (int) self::hex_to_int((string) $var2);
5240 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5241 1
            $end = (int) $var2;
5242
        } else {
5243 2
            $end = self::ord((string) $var2);
5244
        }
5245
5246 2
        if (!$end) {
5247
            return [];
5248
        }
5249
5250 2
        $array = [];
5251 2
        foreach (\range($start, $end, $step) as $i) {
5252 2
            $array[] = (string) self::chr((int) $i, $encoding);
5253
        }
5254
5255 2
        return $array;
5256
    }
5257
5258
    /**
5259
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5260
     *
5261
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5262
     *
5263
     * e.g:
5264
     * 'test+test'                     => 'test+test'
5265
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5266
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5267
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5268
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5269
     * 'Düsseldorf'                   => 'Düsseldorf'
5270
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5271
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5272
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5273
     *
5274
     * @param string $str          <p>The input string.</p>
5275
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5276
     *
5277
     * @psalm-pure
5278
     *
5279
     * @return string
5280
     *                <p>The decoded URL, as a string.</p>
5281
     */
5282 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5283
    {
5284 7
        if ($str === '') {
5285 4
            return '';
5286
        }
5287
5288
        if (
5289 7
            \strpos($str, '&') === false
5290
            &&
5291 7
            \strpos($str, '%') === false
5292
            &&
5293 7
            \strpos($str, '+') === false
5294
            &&
5295 7
            \strpos($str, '\u') === false
5296
        ) {
5297 4
            return self::fix_simple_utf8($str);
5298
        }
5299
5300 7
        $str = self::urldecode_unicode_helper($str);
5301
5302 7
        if ($multi_decode) {
5303
            do {
5304 6
                $str_compare = $str;
5305
5306
                /**
5307
                 * @psalm-suppress PossiblyInvalidArgument
5308
                 */
5309 6
                $str = self::fix_simple_utf8(
5310 6
                    \rawurldecode(
5311 6
                        self::html_entity_decode(
5312 6
                            self::to_utf8($str),
5313 6
                            \ENT_QUOTES | \ENT_HTML5
5314
                        )
5315
                    )
5316
                );
5317 6
            } while ($str_compare !== $str);
5318
        } else {
5319
            /**
5320
             * @psalm-suppress PossiblyInvalidArgument
5321
             */
5322 1
            $str = self::fix_simple_utf8(
5323 1
                \rawurldecode(
5324 1
                    self::html_entity_decode(
5325 1
                        self::to_utf8($str),
5326 1
                        \ENT_QUOTES | \ENT_HTML5
5327
                    )
5328
                )
5329
            );
5330
        }
5331
5332 7
        return $str;
5333
    }
5334
5335
    /**
5336
     * Replaces all occurrences of $pattern in $str by $replacement.
5337
     *
5338
     * @param string $str         <p>The input string.</p>
5339
     * @param string $pattern     <p>The regular expression pattern.</p>
5340
     * @param string $replacement <p>The string to replace with.</p>
5341
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5342
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5343
     *
5344
     * @psalm-pure
5345
     *
5346
     * @return string
5347
     */
5348 18
    public static function regex_replace(
5349
        string $str,
5350
        string $pattern,
5351
        string $replacement,
5352
        string $options = '',
5353
        string $delimiter = '/'
5354
    ): string {
5355 18
        if ($options === 'msr') {
5356 9
            $options = 'ms';
5357
        }
5358
5359
        // fallback
5360 18
        if (!$delimiter) {
5361
            $delimiter = '/';
5362
        }
5363
5364 18
        return (string) \preg_replace(
5365 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5366 18
            $replacement,
5367 18
            $str
5368
        );
5369
    }
5370
5371
    /**
5372
     * alias for "UTF8::remove_bom()"
5373
     *
5374
     * @param string $str
5375
     *
5376
     * @psalm-pure
5377
     *
5378
     * @return string
5379
     *
5380
     * @see        UTF8::remove_bom()
5381
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5382
     */
5383 1
    public static function removeBOM(string $str): string
5384
    {
5385 1
        return self::remove_bom($str);
5386
    }
5387
5388
    /**
5389
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5390
     *
5391
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5392
     *
5393
     * @param string $str <p>The input string.</p>
5394
     *
5395
     * @psalm-pure
5396
     *
5397
     * @return string
5398
     *                <p>A string without UTF-BOM.</p>
5399
     */
5400 55
    public static function remove_bom(string $str): string
5401
    {
5402 55
        if ($str === '') {
5403 9
            return '';
5404
        }
5405
5406 55
        $str_length = \strlen($str);
5407 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5408 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5409
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5410 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5411 11
                if ($str_tmp === false) {
5412
                    return '';
5413
                }
5414
5415 11
                $str_length -= (int) $bom_byte_length;
5416
5417 55
                $str = (string) $str_tmp;
5418
            }
5419
        }
5420
5421 55
        return $str;
5422
    }
5423
5424
    /**
5425
     * Removes duplicate occurrences of a string in another string.
5426
     *
5427
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5428
     *
5429
     * @param string          $str  <p>The base string.</p>
5430
     * @param string|string[] $what <p>String to search for in the base string.</p>
5431
     *
5432
     * @psalm-pure
5433
     *
5434
     * @return string
5435
     *                <p>A string with removed duplicates.</p>
5436
     */
5437 2
    public static function remove_duplicates(string $str, $what = ' '): string
5438
    {
5439 2
        if (\is_string($what)) {
5440 2
            $what = [$what];
5441
        }
5442
5443
        /**
5444
         * @psalm-suppress RedundantConditionGivenDocblockType
5445
         */
5446 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5447 2
            foreach ($what as $item) {
5448 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5449
            }
5450
        }
5451
5452 2
        return $str;
5453
    }
5454
5455
    /**
5456
     * Remove html via "strip_tags()" from the string.
5457
     *
5458
     * @param string $str            <p>The input string.</p>
5459
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5460
     *                               should not be stripped. Default: null
5461
     *                               </p>
5462
     *
5463
     * @psalm-pure
5464
     *
5465
     * @return string
5466
     *                <p>A string with without html tags.</p>
5467
     */
5468 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5469
    {
5470 6
        return \strip_tags($str, $allowable_tags);
5471
    }
5472
5473
    /**
5474
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5475
     *
5476
     * @param string $str         <p>The input string.</p>
5477
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5478
     *
5479
     * @psalm-pure
5480
     *
5481
     * @return string
5482
     *                <p>A string without breaks.</p>
5483
     */
5484 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5485
    {
5486 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5487
    }
5488
5489
    /**
5490
     * Remove invisible characters from a string.
5491
     *
5492
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5493
     *
5494
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5495
     *
5496
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5497
     *
5498
     * @param string $str         <p>The input string.</p>
5499
     * @param bool   $url_encoded [optional] <p>
5500
     *                            Try to remove url encoded control character.
5501
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5502
     *                            <br>
5503
     *                            Default: false
5504
     *                            </p>
5505
     * @param string $replacement [optional] <p>The replacement character.</p>
5506
     *
5507
     * @psalm-pure
5508
     *
5509
     * @return string
5510
     *                <p>A string without invisible chars.</p>
5511
     */
5512 92
    public static function remove_invisible_characters(
5513
        string $str,
5514
        bool $url_encoded = false,
5515
        string $replacement = ''
5516
    ): string {
5517 92
        return ASCII::remove_invisible_characters(
5518 92
            $str,
5519 92
            $url_encoded,
5520 92
            $replacement
5521
        );
5522
    }
5523
5524
    /**
5525
     * Returns a new string with the prefix $substring removed, if present.
5526
     *
5527
     * @param string $str       <p>The input string.</p>
5528
     * @param string $substring <p>The prefix to remove.</p>
5529
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5530
     *
5531
     * @psalm-pure
5532
     *
5533
     * @return string
5534
     *                <p>A string without the prefix $substring.</p>
5535
     */
5536 12
    public static function remove_left(
5537
        string $str,
5538
        string $substring,
5539
        string $encoding = 'UTF-8'
5540
    ): string {
5541
        if (
5542 12
            $substring
5543
            &&
5544 12
            \strpos($str, $substring) === 0
5545
        ) {
5546 6
            if ($encoding === 'UTF-8') {
5547 4
                return (string) \mb_substr(
5548 4
                    $str,
5549 4
                    (int) \mb_strlen($substring)
5550
                );
5551
            }
5552
5553 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5554
5555 2
            return (string) self::substr(
5556 2
                $str,
5557 2
                (int) self::strlen($substring, $encoding),
5558 2
                null,
5559 2
                $encoding
5560
            );
5561
        }
5562
5563 6
        return $str;
5564
    }
5565
5566
    /**
5567
     * Returns a new string with the suffix $substring removed, if present.
5568
     *
5569
     * @param string $str
5570
     * @param string $substring <p>The suffix to remove.</p>
5571
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5572
     *
5573
     * @psalm-pure
5574
     *
5575
     * @return string
5576
     *                <p>A string having a $str without the suffix $substring.</p>
5577
     */
5578 12
    public static function remove_right(
5579
        string $str,
5580
        string $substring,
5581
        string $encoding = 'UTF-8'
5582
    ): string {
5583 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5584 6
            if ($encoding === 'UTF-8') {
5585 4
                return (string) \mb_substr(
5586 4
                    $str,
5587 4
                    0,
5588 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5589
                );
5590
            }
5591
5592 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5593
5594 2
            return (string) self::substr(
5595 2
                $str,
5596 2
                0,
5597 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5598 2
                $encoding
5599
            );
5600
        }
5601
5602 6
        return $str;
5603
    }
5604
5605
    /**
5606
     * Replaces all occurrences of $search in $str by $replacement.
5607
     *
5608
     * @param string $str            <p>The input string.</p>
5609
     * @param string $search         <p>The needle to search for.</p>
5610
     * @param string $replacement    <p>The string to replace with.</p>
5611
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5612
     *
5613
     * @psalm-pure
5614
     *
5615
     * @return string
5616
     *                <p>A string with replaced parts.</p>
5617
     */
5618 29
    public static function replace(
5619
        string $str,
5620
        string $search,
5621
        string $replacement,
5622
        bool $case_sensitive = true
5623
    ): string {
5624 29
        if ($case_sensitive) {
5625 22
            return \str_replace($search, $replacement, $str);
5626
        }
5627
5628 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5629
    }
5630
5631
    /**
5632
     * Replaces all occurrences of $search in $str by $replacement.
5633
     *
5634
     * @param string       $str            <p>The input string.</p>
5635
     * @param array        $search         <p>The elements to search for.</p>
5636
     * @param array|string $replacement    <p>The string to replace with.</p>
5637
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5638
     *
5639
     * @psalm-pure
5640
     *
5641
     * @return string
5642
     *                <p>A string with replaced parts.</p>
5643
     */
5644 30
    public static function replace_all(
5645
        string $str,
5646
        array $search,
5647
        $replacement,
5648
        bool $case_sensitive = true
5649
    ): string {
5650 30
        if ($case_sensitive) {
5651 23
            return \str_replace($search, $replacement, $str);
5652
        }
5653
5654 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5655
    }
5656
5657
    /**
5658
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5659
     *
5660
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5661
     *
5662
     * @param string $str                        <p>The input string</p>
5663
     * @param string $replacement_char           <p>The replacement character.</p>
5664
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5665
     *
5666
     * @psalm-pure
5667
     *
5668
     * @return string
5669
     *                <p>A string without diamond question marks (�).</p>
5670
     */
5671 35
    public static function replace_diamond_question_mark(
5672
        string $str,
5673
        string $replacement_char = '',
5674
        bool $process_invalid_utf8_chars = true
5675
    ): string {
5676 35
        if ($str === '') {
5677 9
            return '';
5678
        }
5679
5680 35
        if ($process_invalid_utf8_chars) {
5681 35
            if ($replacement_char === '') {
5682 35
                $replacement_char_helper = 'none';
5683
            } else {
5684 2
                $replacement_char_helper = \ord($replacement_char);
5685
            }
5686
5687 35
            if (self::$SUPPORT['mbstring'] === false) {
5688
                // if there is no native support for "mbstring",
5689
                // then we need to clean the string before ...
5690
                $str = self::clean($str);
5691
            }
5692
5693
            /**
5694
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5695
             */
5696 35
            $save = \mb_substitute_character();
5697
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5698 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5698
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5699
            // the polyfill maybe return false, so cast to string
5700 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5701 35
            \mb_substitute_character($save);
5702
        }
5703
5704 35
        return \str_replace(
5705
            [
5706 35
                "\xEF\xBF\xBD",
5707
                '�',
5708
            ],
5709
            [
5710 35
                $replacement_char,
5711 35
                $replacement_char,
5712
            ],
5713 35
            $str
5714
        );
5715
    }
5716
5717
    /**
5718
     * Strip whitespace or other characters from the end of a UTF-8 string.
5719
     *
5720
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5721
     *
5722
     * @param string      $str   <p>The string to be trimmed.</p>
5723
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5724
     *
5725
     * @psalm-pure
5726
     *
5727
     * @return string
5728
     *                <p>A string with unwanted characters stripped from the right.</p>
5729
     */
5730 21
    public static function rtrim(string $str = '', string $chars = null): string
5731
    {
5732 21
        if ($str === '') {
5733 3
            return '';
5734
        }
5735
5736 20
        if (self::$SUPPORT['mbstring'] === true) {
5737 20
            if ($chars !== null) {
5738
                /** @noinspection PregQuoteUsageInspection */
5739 9
                $chars = \preg_quote($chars);
5740 9
                $pattern = "[${chars}]+$";
5741
            } else {
5742 14
                $pattern = '[\\s]+$';
5743
            }
5744
5745
            /** @noinspection PhpComposerExtensionStubsInspection */
5746 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5747
        }
5748
5749
        if ($chars !== null) {
5750
            $chars = \preg_quote($chars, '/');
5751
            $pattern = "[${chars}]+$";
5752
        } else {
5753
            $pattern = '[\\s]+$';
5754
        }
5755
5756
        return self::regex_replace($str, $pattern, '');
5757
    }
5758
5759
    /**
5760
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5761
     *
5762
     * @param bool $useEcho
5763
     *
5764
     * @psalm-pure
5765
     *
5766
     * @return string|void
5767
     */
5768 2
    public static function showSupport(bool $useEcho = true)
5769
    {
5770
        // init
5771 2
        $html = '';
5772
5773 2
        $html .= '<pre>';
5774
        /** @noinspection AlterInForeachInspection */
5775 2
        foreach (self::$SUPPORT as $key => &$value) {
5776 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5777
        }
5778 2
        $html .= '</pre>';
5779
5780 2
        if ($useEcho) {
5781 1
            echo $html;
5782
        }
5783
5784 2
        return $html;
5785
    }
5786
5787
    /**
5788
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5789
     *
5790
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5791
     *
5792
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5793
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5794
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5795
     *
5796
     * @psalm-pure
5797
     *
5798
     * @return string
5799
     *                <p>The HTML numbered entity for the given character.</p>
5800
     */
5801 2
    public static function single_chr_html_encode(
5802
        string $char,
5803
        bool $keep_ascii_chars = false,
5804
        string $encoding = 'UTF-8'
5805
    ): string {
5806 2
        if ($char === '') {
5807 2
            return '';
5808
        }
5809
5810
        if (
5811 2
            $keep_ascii_chars
5812
            &&
5813 2
            ASCII::is_ascii($char)
5814
        ) {
5815 2
            return $char;
5816
        }
5817
5818 2
        return '&#' . self::ord($char, $encoding) . ';';
5819
    }
5820
5821
    /**
5822
     * @param string $str
5823
     * @param int    $tab_length
5824
     *
5825
     * @psalm-pure
5826
     *
5827
     * @return string
5828
     */
5829 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5830
    {
5831 5
        if ($tab_length === 4) {
5832 3
            $tab = '    ';
5833 2
        } elseif ($tab_length === 2) {
5834 1
            $tab = '  ';
5835
        } else {
5836 1
            $tab = \str_repeat(' ', $tab_length);
5837
        }
5838
5839 5
        return \str_replace($tab, "\t", $str);
5840
    }
5841
5842
    /**
5843
     * alias for "UTF8::str_split()"
5844
     *
5845
     * @param int|string $str
5846
     * @param int        $length
5847
     * @param bool       $clean_utf8
5848
     *
5849
     * @psalm-pure
5850
     *
5851
     * @return string[]
5852
     *
5853
     * @see        UTF8::str_split()
5854
     * @deprecated <p>please use "UTF8::str_split()"</p>
5855
     */
5856 9
    public static function split(
5857
        $str,
5858
        int $length = 1,
5859
        bool $clean_utf8 = false
5860
    ): array {
5861
        /** @var string[] */
5862 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5863
    }
5864
5865
    /**
5866
     * alias for "UTF8::str_starts_with()"
5867
     *
5868
     * @param string $haystack
5869
     * @param string $needle
5870
     *
5871
     * @psalm-pure
5872
     *
5873
     * @return bool
5874
     *
5875
     * @see        UTF8::str_starts_with()
5876
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5877
     */
5878 1
    public static function str_begins(string $haystack, string $needle): bool
5879
    {
5880 1
        return self::str_starts_with($haystack, $needle);
5881
    }
5882
5883
    /**
5884
     * Returns a camelCase version of the string. Trims surrounding spaces,
5885
     * capitalizes letters following digits, spaces, dashes and underscores,
5886
     * and removes spaces, dashes, as well as underscores.
5887
     *
5888
     * @param string      $str                           <p>The input string.</p>
5889
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5890
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5891
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5892
     *                                                   tr</p>
5893
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5894
     *                                                   -> ß</p>
5895
     *
5896
     * @psalm-pure
5897
     *
5898
     * @return string
5899
     */
5900 32
    public static function str_camelize(
5901
        string $str,
5902
        string $encoding = 'UTF-8',
5903
        bool $clean_utf8 = false,
5904
        string $lang = null,
5905
        bool $try_to_keep_the_string_length = false
5906
    ): string {
5907 32
        if ($clean_utf8) {
5908
            $str = self::clean($str);
5909
        }
5910
5911 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5912 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5913
        }
5914
5915 32
        $str = self::lcfirst(
5916 32
            \trim($str),
5917 32
            $encoding,
5918 32
            false,
5919 32
            $lang,
5920 32
            $try_to_keep_the_string_length
5921
        );
5922 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5923
5924 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5925
5926 32
        $str = (string) \preg_replace_callback(
5927 32
            '/[-_\\s]+(.)?/u',
5928
            /**
5929
             * @param array $match
5930
             *
5931
             * @psalm-pure
5932
             *
5933
             * @return string
5934
             */
5935
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5936 27
                if (isset($match[1])) {
5937 27
                    if ($use_mb_functions) {
5938 27
                        if ($encoding === 'UTF-8') {
5939 27
                            return \mb_strtoupper($match[1]);
5940
                        }
5941
5942
                        return \mb_strtoupper($match[1], $encoding);
5943
                    }
5944
5945
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5946
                }
5947
5948 1
                return '';
5949 32
            },
5950 32
            $str
5951
        );
5952
5953 32
        return (string) \preg_replace_callback(
5954 32
            '/[\\p{N}]+(.)?/u',
5955
            /**
5956
             * @param array $match
5957
             *
5958
             * @psalm-pure
5959
             *
5960
             * @return string
5961
             */
5962
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5963 6
                if ($use_mb_functions) {
5964 6
                    if ($encoding === 'UTF-8') {
5965 6
                        return \mb_strtoupper($match[0]);
5966
                    }
5967
5968
                    return \mb_strtoupper($match[0], $encoding);
5969
                }
5970
5971
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5972 32
            },
5973 32
            $str
5974
        );
5975
    }
5976
5977
    /**
5978
     * Returns the string with the first letter of each word capitalized,
5979
     * except for when the word is a name which shouldn't be capitalized.
5980
     *
5981
     * @param string $str
5982
     *
5983
     * @psalm-pure
5984
     *
5985
     * @return string
5986
     *                <p>A string with $str capitalized.</p>
5987
     */
5988 1
    public static function str_capitalize_name(string $str): string
5989
    {
5990 1
        return self::str_capitalize_name_helper(
5991 1
            self::str_capitalize_name_helper(
5992 1
                self::collapse_whitespace($str),
5993 1
                ' '
5994
            ),
5995 1
            '-'
5996
        );
5997
    }
5998
5999
    /**
6000
     * Returns true if the string contains $needle, false otherwise. By default
6001
     * the comparison is case-sensitive, but can be made insensitive by setting
6002
     * $case_sensitive to false.
6003
     *
6004
     * @param string $haystack       <p>The input string.</p>
6005
     * @param string $needle         <p>Substring to look for.</p>
6006
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6007
     *
6008
     * @psalm-pure
6009
     *
6010
     * @return bool
6011
     *              <p>Whether or not $haystack contains $needle.</p>
6012
     */
6013 21
    public static function str_contains(
6014
        string $haystack,
6015
        string $needle,
6016
        bool $case_sensitive = true
6017
    ): bool {
6018 21
        if ($case_sensitive) {
6019 11
            if (\PHP_VERSION_ID >= 80000) {
6020
                return \str_contains($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_contains was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6020
                return /** @scrutinizer ignore-call */ \str_contains($haystack, $needle);
Loading history...
6021
            }
6022
6023 11
            return \strpos($haystack, $needle) !== false;
6024
        }
6025
6026 10
        return \mb_stripos($haystack, $needle) !== false;
6027
    }
6028
6029
    /**
6030
     * Returns true if the string contains all $needles, false otherwise. By
6031
     * default the comparison is case-sensitive, but can be made insensitive by
6032
     * setting $case_sensitive to false.
6033
     *
6034
     * @param string $haystack       <p>The input string.</p>
6035
     * @param array  $needles        <p>SubStrings to look for.</p>
6036
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6037
     *
6038
     * @psalm-pure
6039
     *
6040
     * @return bool
6041
     *              <p>Whether or not $haystack contains $needle.</p>
6042
     */
6043 45
    public static function str_contains_all(
6044
        string $haystack,
6045
        array $needles,
6046
        bool $case_sensitive = true
6047
    ): bool {
6048 45
        if ($haystack === '' || $needles === []) {
6049 1
            return false;
6050
        }
6051
6052
        /** @noinspection LoopWhichDoesNotLoopInspection */
6053 44
        foreach ($needles as &$needle) {
6054 44
            if ($case_sensitive) {
6055
                /** @noinspection NestedPositiveIfStatementsInspection */
6056 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6057 12
                    return false;
6058
                }
6059
            }
6060
6061 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6062 33
                return false;
6063
            }
6064
        }
6065
6066 24
        return true;
6067
    }
6068
6069
    /**
6070
     * Returns true if the string contains any $needles, false otherwise. By
6071
     * default the comparison is case-sensitive, but can be made insensitive by
6072
     * setting $case_sensitive to false.
6073
     *
6074
     * @param string $haystack       <p>The input string.</p>
6075
     * @param array  $needles        <p>SubStrings to look for.</p>
6076
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6077
     *
6078
     * @psalm-pure
6079
     *
6080
     * @return bool
6081
     *              <p>Whether or not $str contains $needle.</p>
6082
     */
6083 46
    public static function str_contains_any(
6084
        string $haystack,
6085
        array $needles,
6086
        bool $case_sensitive = true
6087
    ): bool {
6088 46
        if ($haystack === '' || $needles === []) {
6089 1
            return false;
6090
        }
6091
6092
        /** @noinspection LoopWhichDoesNotLoopInspection */
6093 45
        foreach ($needles as &$needle) {
6094 45
            if (!$needle) {
6095
                continue;
6096
            }
6097
6098 45
            if ($case_sensitive) {
6099 25
                if (\strpos($haystack, $needle) !== false) {
6100 14
                    return true;
6101
                }
6102
6103 13
                continue;
6104
            }
6105
6106 20
            if (\mb_stripos($haystack, $needle) !== false) {
6107 20
                return true;
6108
            }
6109
        }
6110
6111 19
        return false;
6112
    }
6113
6114
    /**
6115
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6116
     * inserted before uppercase characters (with the exception of the first
6117
     * character of the string), and in place of spaces as well as underscores.
6118
     *
6119
     * @param string $str      <p>The input string.</p>
6120
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6121
     *
6122
     * @psalm-pure
6123
     *
6124
     * @return string
6125
     */
6126 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6127
    {
6128 19
        return self::str_delimit($str, '-', $encoding);
6129
    }
6130
6131
    /**
6132
     * Returns a lowercase and trimmed string separated by the given delimiter.
6133
     * Delimiters are inserted before uppercase characters (with the exception
6134
     * of the first character of the string), and in place of spaces, dashes,
6135
     * and underscores. Alpha delimiters are not converted to lowercase.
6136
     *
6137
     * @param string      $str                           <p>The input string.</p>
6138
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6139
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6140
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6141
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6142
     *                                                   tr</p>
6143
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6144
     *                                                   ß</p>
6145
     *
6146
     * @psalm-pure
6147
     *
6148
     * @return string
6149
     */
6150 49
    public static function str_delimit(
6151
        string $str,
6152
        string $delimiter,
6153
        string $encoding = 'UTF-8',
6154
        bool $clean_utf8 = false,
6155
        string $lang = null,
6156
        bool $try_to_keep_the_string_length = false
6157
    ): string {
6158 49
        if (self::$SUPPORT['mbstring'] === true) {
6159
            /** @noinspection PhpComposerExtensionStubsInspection */
6160 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6161
6162 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6163 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6164 22
                $str = \mb_strtolower($str);
6165
            } else {
6166 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6167
            }
6168
6169
            /** @noinspection PhpComposerExtensionStubsInspection */
6170 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6171
        }
6172
6173
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6174
6175
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6176
        if ($use_mb_functions && $encoding === 'UTF-8') {
6177
            $str = \mb_strtolower($str);
6178
        } else {
6179
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6180
        }
6181
6182
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6183
    }
6184
6185
    /**
6186
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6187
     *
6188
     * EXAMPLE: <code>
6189
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6190
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6191
     * </code>
6192
     *
6193
     * @param string $str <p>The input string.</p>
6194
     *
6195
     * @psalm-pure
6196
     *
6197
     * @return false|string
6198
     *                      <p>
6199
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6200
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6201
     *                      </p>
6202
     */
6203 31
    public static function str_detect_encoding($str)
6204
    {
6205
        // init
6206 31
        $str = (string) $str;
6207
6208
        //
6209
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6210
        //
6211
6212 31
        if (self::is_binary($str, true)) {
6213 11
            $is_utf32 = self::is_utf32($str, false);
6214 11
            if ($is_utf32 === 1) {
6215
                return 'UTF-32LE';
6216
            }
6217 11
            if ($is_utf32 === 2) {
6218 1
                return 'UTF-32BE';
6219
            }
6220
6221 11
            $is_utf16 = self::is_utf16($str, false);
6222 11
            if ($is_utf16 === 1) {
6223 3
                return 'UTF-16LE';
6224
            }
6225 11
            if ($is_utf16 === 2) {
6226 2
                return 'UTF-16BE';
6227
            }
6228
6229
            // is binary but not "UTF-16" or "UTF-32"
6230 9
            return false;
6231
        }
6232
6233
        //
6234
        // 2.) simple check for ASCII chars
6235
        //
6236
6237 27
        if (ASCII::is_ascii($str)) {
6238 10
            return 'ASCII';
6239
        }
6240
6241
        //
6242
        // 3.) simple check for UTF-8 chars
6243
        //
6244
6245 27
        if (self::is_utf8_string($str)) {
6246 19
            return 'UTF-8';
6247
        }
6248
6249
        //
6250
        // 4.) check via "mb_detect_encoding()"
6251
        //
6252
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6253
6254
        $encoding_detecting_order = [
6255 16
            'ISO-8859-1',
6256
            'ISO-8859-2',
6257
            'ISO-8859-3',
6258
            'ISO-8859-4',
6259
            'ISO-8859-5',
6260
            'ISO-8859-6',
6261
            'ISO-8859-7',
6262
            'ISO-8859-8',
6263
            'ISO-8859-9',
6264
            'ISO-8859-10',
6265
            'ISO-8859-13',
6266
            'ISO-8859-14',
6267
            'ISO-8859-15',
6268
            'ISO-8859-16',
6269
            'WINDOWS-1251',
6270
            'WINDOWS-1252',
6271
            'WINDOWS-1254',
6272
            'CP932',
6273
            'CP936',
6274
            'CP950',
6275
            'CP866',
6276
            'CP850',
6277
            'CP51932',
6278
            'CP50220',
6279
            'CP50221',
6280
            'CP50222',
6281
            'ISO-2022-JP',
6282
            'ISO-2022-KR',
6283
            'JIS',
6284
            'JIS-ms',
6285
            'EUC-CN',
6286
            'EUC-JP',
6287
        ];
6288
6289 16
        if (self::$SUPPORT['mbstring'] === true) {
6290
            // info: do not use the symfony polyfill here
6291 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6292 16
            if ($encoding) {
6293 16
                return $encoding;
6294
            }
6295
        }
6296
6297
        //
6298
        // 5.) check via "iconv()"
6299
        //
6300
6301
        if (self::$ENCODINGS === null) {
6302
            self::$ENCODINGS = self::getData('encodings');
6303
        }
6304
6305
        foreach (self::$ENCODINGS as $encoding_tmp) {
6306
            // INFO: //IGNORE but still throw notice
6307
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6308
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6309
                return $encoding_tmp;
6310
            }
6311
        }
6312
6313
        return false;
6314
    }
6315
6316
    /**
6317
     * alias for "UTF8::str_ends_with()"
6318
     *
6319
     * @param string $haystack
6320
     * @param string $needle
6321
     *
6322
     * @psalm-pure
6323
     *
6324
     * @return bool
6325
     *
6326
     * @see        UTF8::str_ends_with()
6327
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6328
     */
6329 1
    public static function str_ends(string $haystack, string $needle): bool
6330
    {
6331 1
        return self::str_ends_with($haystack, $needle);
6332
    }
6333
6334
    /**
6335
     * Check if the string ends with the given substring.
6336
     *
6337
     * EXAMPLE: <code>
6338
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6339
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6340
     * </code>
6341
     *
6342
     * @param string $haystack <p>The string to search in.</p>
6343
     * @param string $needle   <p>The substring to search for.</p>
6344
     *
6345
     * @psalm-pure
6346
     *
6347
     * @return bool
6348
     */
6349 9
    public static function str_ends_with(string $haystack, string $needle): bool
6350
    {
6351 9
        if ($needle === '') {
6352 2
            return true;
6353
        }
6354
6355 9
        if ($haystack === '') {
6356
            return false;
6357
        }
6358
6359 9
        if (\PHP_VERSION_ID >= 80000) {
6360
            return \str_ends_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_ends_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6360
            return /** @scrutinizer ignore-call */ \str_ends_with($haystack, $needle);
Loading history...
6361
        }
6362
6363 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6364
    }
6365
6366
    /**
6367
     * Returns true if the string ends with any of $substrings, false otherwise.
6368
     *
6369
     * - case-sensitive
6370
     *
6371
     * @param string   $str        <p>The input string.</p>
6372
     * @param string[] $substrings <p>Substrings to look for.</p>
6373
     *
6374
     * @psalm-pure
6375
     *
6376
     * @return bool
6377
     *              <p>Whether or not $str ends with $substring.</p>
6378
     */
6379 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6380
    {
6381 7
        if ($substrings === []) {
6382
            return false;
6383
        }
6384
6385 7
        foreach ($substrings as &$substring) {
6386 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6387 7
                return true;
6388
            }
6389
        }
6390
6391 6
        return false;
6392
    }
6393
6394
    /**
6395
     * Ensures that the string begins with $substring. If it doesn't, it's
6396
     * prepended.
6397
     *
6398
     * @param string $str       <p>The input string.</p>
6399
     * @param string $substring <p>The substring to add if not present.</p>
6400
     *
6401
     * @psalm-pure
6402
     *
6403
     * @return string
6404
     */
6405 10
    public static function str_ensure_left(string $str, string $substring): string
6406
    {
6407
        if (
6408 10
            $substring !== ''
6409
            &&
6410 10
            \strpos($str, $substring) === 0
6411
        ) {
6412 6
            return $str;
6413
        }
6414
6415 4
        return $substring . $str;
6416
    }
6417
6418
    /**
6419
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6420
     *
6421
     * @param string $str       <p>The input string.</p>
6422
     * @param string $substring <p>The substring to add if not present.</p>
6423
     *
6424
     * @psalm-pure
6425
     *
6426
     * @return string
6427
     */
6428 10
    public static function str_ensure_right(string $str, string $substring): string
6429
    {
6430
        if (
6431 10
            $str === ''
6432
            ||
6433 10
            $substring === ''
6434
            ||
6435 10
            \substr($str, -\strlen($substring)) !== $substring
6436
        ) {
6437 4
            $str .= $substring;
6438
        }
6439
6440 10
        return $str;
6441
    }
6442
6443
    /**
6444
     * Capitalizes the first word of the string, replaces underscores with
6445
     * spaces, and strips '_id'.
6446
     *
6447
     * @param string $str
6448
     *
6449
     * @psalm-pure
6450
     *
6451
     * @return string
6452
     */
6453 3
    public static function str_humanize($str): string
6454
    {
6455 3
        $str = \str_replace(
6456
            [
6457 3
                '_id',
6458
                '_',
6459
            ],
6460
            [
6461 3
                '',
6462
                ' ',
6463
            ],
6464 3
            $str
6465
        );
6466
6467 3
        return self::ucfirst(\trim($str));
6468
    }
6469
6470
    /**
6471
     * alias for "UTF8::str_istarts_with()"
6472
     *
6473
     * @param string $haystack
6474
     * @param string $needle
6475
     *
6476
     * @psalm-pure
6477
     *
6478
     * @return bool
6479
     *
6480
     * @see        UTF8::str_istarts_with()
6481
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6482
     */
6483 1
    public static function str_ibegins(string $haystack, string $needle): bool
6484
    {
6485 1
        return self::str_istarts_with($haystack, $needle);
6486
    }
6487
6488
    /**
6489
     * alias for "UTF8::str_iends_with()"
6490
     *
6491
     * @param string $haystack
6492
     * @param string $needle
6493
     *
6494
     * @psalm-pure
6495
     *
6496
     * @return bool
6497
     *
6498
     * @see        UTF8::str_iends_with()
6499
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6500
     */
6501 1
    public static function str_iends(string $haystack, string $needle): bool
6502
    {
6503 1
        return self::str_iends_with($haystack, $needle);
6504
    }
6505
6506
    /**
6507
     * Check if the string ends with the given substring, case-insensitive.
6508
     *
6509
     * EXAMPLE: <code>
6510
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6511
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6512
     * </code>
6513
     *
6514
     * @param string $haystack <p>The string to search in.</p>
6515
     * @param string $needle   <p>The substring to search for.</p>
6516
     *
6517
     * @psalm-pure
6518
     *
6519
     * @return bool
6520
     */
6521 12
    public static function str_iends_with(string $haystack, string $needle): bool
6522
    {
6523 12
        if ($needle === '') {
6524 2
            return true;
6525
        }
6526
6527 12
        if ($haystack === '') {
6528
            return false;
6529
        }
6530
6531 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6532
    }
6533
6534
    /**
6535
     * Returns true if the string ends with any of $substrings, false otherwise.
6536
     *
6537
     * - case-insensitive
6538
     *
6539
     * @param string   $str        <p>The input string.</p>
6540
     * @param string[] $substrings <p>Substrings to look for.</p>
6541
     *
6542
     * @psalm-pure
6543
     *
6544
     * @return bool
6545
     *              <p>Whether or not $str ends with $substring.</p>
6546
     */
6547 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6548
    {
6549 4
        if ($substrings === []) {
6550
            return false;
6551
        }
6552
6553 4
        foreach ($substrings as &$substring) {
6554 4
            if (self::str_iends_with($str, $substring)) {
6555 4
                return true;
6556
            }
6557
        }
6558
6559
        return false;
6560
    }
6561
6562
    /**
6563
     * Returns the index of the first occurrence of $needle in the string,
6564
     * and false if not found. Accepts an optional offset from which to begin
6565
     * the search.
6566
     *
6567
     * @param string $str      <p>The input string.</p>
6568
     * @param string $needle   <p>Substring to look for.</p>
6569
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6570
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6571
     *
6572
     * @psalm-pure
6573
     *
6574
     * @return false|int
6575
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6576
     *
6577
     * @see        UTF8::stripos()
6578
     * @deprecated <p>please use "UTF8::stripos()"</p>
6579
     */
6580 1
    public static function str_iindex_first(
6581
        string $str,
6582
        string $needle,
6583
        int $offset = 0,
6584
        string $encoding = 'UTF-8'
6585
    ) {
6586 1
        return self::stripos(
6587 1
            $str,
6588 1
            $needle,
6589 1
            $offset,
6590 1
            $encoding
6591
        );
6592
    }
6593
6594
    /**
6595
     * Returns the index of the last occurrence of $needle in the string,
6596
     * and false if not found. Accepts an optional offset from which to begin
6597
     * the search. Offsets may be negative to count from the last character
6598
     * in the string.
6599
     *
6600
     * @param string $str      <p>The input string.</p>
6601
     * @param string $needle   <p>Substring to look for.</p>
6602
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6603
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6604
     *
6605
     * @psalm-pure
6606
     *
6607
     * @return false|int
6608
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6609
     *
6610
     * @see        UTF8::strripos()
6611
     * @deprecated <p>please use "UTF8::strripos()"</p>
6612
     */
6613 10
    public static function str_iindex_last(
6614
        string $str,
6615
        string $needle,
6616
        int $offset = 0,
6617
        string $encoding = 'UTF-8'
6618
    ) {
6619 10
        return self::strripos(
6620 10
            $str,
6621 10
            $needle,
6622 10
            $offset,
6623 10
            $encoding
6624
        );
6625
    }
6626
6627
    /**
6628
     * Returns the index of the first occurrence of $needle in the string,
6629
     * and false if not found. Accepts an optional offset from which to begin
6630
     * the search.
6631
     *
6632
     * @param string $str      <p>The input string.</p>
6633
     * @param string $needle   <p>Substring to look for.</p>
6634
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6635
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6636
     *
6637
     * @psalm-pure
6638
     *
6639
     * @return false|int
6640
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6641
     *
6642
     * @see        UTF8::strpos()
6643
     * @deprecated <p>please use "UTF8::strpos()"</p>
6644
     */
6645 11
    public static function str_index_first(
6646
        string $str,
6647
        string $needle,
6648
        int $offset = 0,
6649
        string $encoding = 'UTF-8'
6650
    ) {
6651 11
        return self::strpos(
6652 11
            $str,
6653 11
            $needle,
6654 11
            $offset,
6655 11
            $encoding
6656
        );
6657
    }
6658
6659
    /**
6660
     * Returns the index of the last occurrence of $needle in the string,
6661
     * and false if not found. Accepts an optional offset from which to begin
6662
     * the search. Offsets may be negative to count from the last character
6663
     * in the string.
6664
     *
6665
     * @param string $str      <p>The input string.</p>
6666
     * @param string $needle   <p>Substring to look for.</p>
6667
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6668
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6669
     *
6670
     * @psalm-pure
6671
     *
6672
     * @return false|int
6673
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6674
     *
6675
     * @see        UTF8::strrpos()
6676
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6677
     */
6678 10
    public static function str_index_last(
6679
        string $str,
6680
        string $needle,
6681
        int $offset = 0,
6682
        string $encoding = 'UTF-8'
6683
    ) {
6684 10
        return self::strrpos(
6685 10
            $str,
6686 10
            $needle,
6687 10
            $offset,
6688 10
            $encoding
6689
        );
6690
    }
6691
6692
    /**
6693
     * Inserts $substring into the string at the $index provided.
6694
     *
6695
     * @param string $str       <p>The input string.</p>
6696
     * @param string $substring <p>String to be inserted.</p>
6697
     * @param int    $index     <p>The index at which to insert the substring.</p>
6698
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6699
     *
6700
     * @psalm-pure
6701
     *
6702
     * @return string
6703
     */
6704 8
    public static function str_insert(
6705
        string $str,
6706
        string $substring,
6707
        int $index,
6708
        string $encoding = 'UTF-8'
6709
    ): string {
6710 8
        if ($encoding === 'UTF-8') {
6711 4
            $len = (int) \mb_strlen($str);
6712 4
            if ($index > $len) {
6713
                return $str;
6714
            }
6715
6716
            /** @noinspection UnnecessaryCastingInspection */
6717 4
            return (string) \mb_substr($str, 0, $index) .
6718 4
                   $substring .
6719 4
                   (string) \mb_substr($str, $index, $len);
6720
        }
6721
6722 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6723
6724 4
        $len = (int) self::strlen($str, $encoding);
6725 4
        if ($index > $len) {
6726 1
            return $str;
6727
        }
6728
6729 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6730 3
               $substring .
6731 3
               ((string) self::substr($str, $index, $len, $encoding));
6732
    }
6733
6734
    /**
6735
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6736
     *
6737
     * EXAMPLE: <code>
6738
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6739
     * </code>
6740
     *
6741
     * @see http://php.net/manual/en/function.str-ireplace.php
6742
     *
6743
     * @param string|string[] $search      <p>
6744
     *                                     Every replacement with search array is
6745
     *                                     performed on the result of previous replacement.
6746
     *                                     </p>
6747
     * @param string|string[] $replacement <p>The replacement.</p>
6748
     * @param string|string[] $subject     <p>
6749
     *                                     If subject is an array, then the search and
6750
     *                                     replace is performed with every entry of
6751
     *                                     subject, and the return value is an array as
6752
     *                                     well.
6753
     *                                     </p>
6754
     * @param int             $count       [optional] <p>
6755
     *                                     The number of matched and replaced needles will
6756
     *                                     be returned in count which is passed by
6757
     *                                     reference.
6758
     *                                     </p>
6759
     *
6760
     * @psalm-pure
6761
     *
6762
     * @return string|string[]
6763
     *                         <p>A string or an array of replacements.</p>
6764
     *
6765
     * @template TStrIReplaceSubject
6766
     * @phpstan-param TStrIReplaceSubject $subject
6767
     * @phpstan-return TStrIReplaceSubject
6768
     */
6769 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6770
    {
6771 29
        $search = (array) $search;
6772
6773
        /** @noinspection AlterInForeachInspection */
6774 29
        foreach ($search as &$s) {
6775 29
            $s = (string) $s;
6776 29
            if ($s === '') {
6777 6
                $s = '/^(?<=.)$/';
6778
            } else {
6779 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6780
            }
6781
        }
6782
6783
        // fallback
6784
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6785 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6786 1
            $replacement = '';
6787
        }
6788
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6789 29
        if ($subject === null) {
6790 1
            $subject = '';
6791
        }
6792
6793
        /**
6794
         * @psalm-suppress PossiblyNullArgument
6795
         * @phpstan-var TStrIReplaceSubject $subject
6796
         */
6797 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6798
6799 29
        return $subject;
6800
    }
6801
6802
    /**
6803
     * Replaces $search from the beginning of string with $replacement.
6804
     *
6805
     * @param string $str         <p>The input string.</p>
6806
     * @param string $search      <p>The string to search for.</p>
6807
     * @param string $replacement <p>The replacement.</p>
6808
     *
6809
     * @psalm-pure
6810
     *
6811
     * @return string
6812
     *                <p>The string after the replacement.</p>
6813
     */
6814 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6815
    {
6816 17
        if ($str === '') {
6817 4
            if ($replacement === '') {
6818 2
                return '';
6819
            }
6820
6821 2
            if ($search === '') {
6822 2
                return $replacement;
6823
            }
6824
        }
6825
6826 13
        if ($search === '') {
6827 2
            return $str . $replacement;
6828
        }
6829
6830 11
        $searchLength = \strlen($search);
6831 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6832 10
            return $replacement . \substr($str, $searchLength);
6833
        }
6834
6835 1
        return $str;
6836
    }
6837
6838
    /**
6839
     * Replaces $search from the ending of string with $replacement.
6840
     *
6841
     * @param string $str         <p>The input string.</p>
6842
     * @param string $search      <p>The string to search for.</p>
6843
     * @param string $replacement <p>The replacement.</p>
6844
     *
6845
     * @psalm-pure
6846
     *
6847
     * @return string
6848
     *                <p>The string after the replacement.</p>
6849
     */
6850 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6851
    {
6852 17
        if ($str === '') {
6853 4
            if ($replacement === '') {
6854 2
                return '';
6855
            }
6856
6857 2
            if ($search === '') {
6858 2
                return $replacement;
6859
            }
6860
        }
6861
6862 13
        if ($search === '') {
6863 2
            return $str . $replacement;
6864
        }
6865
6866 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6867 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6868
        }
6869
6870 11
        return $str;
6871
    }
6872
6873
    /**
6874
     * Check if the string starts with the given substring, case-insensitive.
6875
     *
6876
     * EXAMPLE: <code>
6877
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6878
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6879
     * </code>
6880
     *
6881
     * @param string $haystack <p>The string to search in.</p>
6882
     * @param string $needle   <p>The substring to search for.</p>
6883
     *
6884
     * @psalm-pure
6885
     *
6886
     * @return bool
6887
     */
6888 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6889
    {
6890 13
        if ($needle === '') {
6891 2
            return true;
6892
        }
6893
6894 13
        if ($haystack === '') {
6895
            return false;
6896
        }
6897
6898 13
        return self::stripos($haystack, $needle) === 0;
6899
    }
6900
6901
    /**
6902
     * Returns true if the string begins with any of $substrings, false otherwise.
6903
     *
6904
     * - case-insensitive
6905
     *
6906
     * @param string $str        <p>The input string.</p>
6907
     * @param array  $substrings <p>Substrings to look for.</p>
6908
     *
6909
     * @psalm-pure
6910
     *
6911
     * @return bool
6912
     *              <p>Whether or not $str starts with $substring.</p>
6913
     */
6914 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6915
    {
6916 5
        if ($str === '') {
6917
            return false;
6918
        }
6919
6920 5
        if ($substrings === []) {
6921
            return false;
6922
        }
6923
6924 5
        foreach ($substrings as &$substring) {
6925 5
            if (self::str_istarts_with($str, $substring)) {
6926 5
                return true;
6927
            }
6928
        }
6929
6930 1
        return false;
6931
    }
6932
6933
    /**
6934
     * Gets the substring after the first occurrence of a separator.
6935
     *
6936
     * @param string $str       <p>The input string.</p>
6937
     * @param string $separator <p>The string separator.</p>
6938
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6939
     *
6940
     * @psalm-pure
6941
     *
6942
     * @return string
6943
     */
6944 1
    public static function str_isubstr_after_first_separator(
6945
        string $str,
6946
        string $separator,
6947
        string $encoding = 'UTF-8'
6948
    ): string {
6949 1
        if ($separator === '' || $str === '') {
6950 1
            return '';
6951
        }
6952
6953 1
        $offset = self::stripos($str, $separator);
6954 1
        if ($offset === false) {
6955 1
            return '';
6956
        }
6957
6958 1
        if ($encoding === 'UTF-8') {
6959 1
            return (string) \mb_substr(
6960 1
                $str,
6961 1
                $offset + (int) \mb_strlen($separator)
6962
            );
6963
        }
6964
6965
        return (string) self::substr(
6966
            $str,
6967
            $offset + (int) self::strlen($separator, $encoding),
6968
            null,
6969
            $encoding
6970
        );
6971
    }
6972
6973
    /**
6974
     * Gets the substring after the last occurrence of a separator.
6975
     *
6976
     * @param string $str       <p>The input string.</p>
6977
     * @param string $separator <p>The string separator.</p>
6978
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6979
     *
6980
     * @psalm-pure
6981
     *
6982
     * @return string
6983
     */
6984 1
    public static function str_isubstr_after_last_separator(
6985
        string $str,
6986
        string $separator,
6987
        string $encoding = 'UTF-8'
6988
    ): string {
6989 1
        if ($separator === '' || $str === '') {
6990 1
            return '';
6991
        }
6992
6993 1
        $offset = self::strripos($str, $separator);
6994 1
        if ($offset === false) {
6995 1
            return '';
6996
        }
6997
6998 1
        if ($encoding === 'UTF-8') {
6999 1
            return (string) \mb_substr(
7000 1
                $str,
7001 1
                $offset + (int) self::strlen($separator)
7002
            );
7003
        }
7004
7005
        return (string) self::substr(
7006
            $str,
7007
            $offset + (int) self::strlen($separator, $encoding),
7008
            null,
7009
            $encoding
7010
        );
7011
    }
7012
7013
    /**
7014
     * Gets the substring before the first occurrence of a separator.
7015
     *
7016
     * @param string $str       <p>The input string.</p>
7017
     * @param string $separator <p>The string separator.</p>
7018
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7019
     *
7020
     * @psalm-pure
7021
     *
7022
     * @return string
7023
     */
7024 1
    public static function str_isubstr_before_first_separator(
7025
        string $str,
7026
        string $separator,
7027
        string $encoding = 'UTF-8'
7028
    ): string {
7029 1
        if ($separator === '' || $str === '') {
7030 1
            return '';
7031
        }
7032
7033 1
        $offset = self::stripos($str, $separator);
7034 1
        if ($offset === false) {
7035 1
            return '';
7036
        }
7037
7038 1
        if ($encoding === 'UTF-8') {
7039 1
            return (string) \mb_substr($str, 0, $offset);
7040
        }
7041
7042
        return (string) self::substr($str, 0, $offset, $encoding);
7043
    }
7044
7045
    /**
7046
     * Gets the substring before the last occurrence of a separator.
7047
     *
7048
     * @param string $str       <p>The input string.</p>
7049
     * @param string $separator <p>The string separator.</p>
7050
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7051
     *
7052
     * @psalm-pure
7053
     *
7054
     * @return string
7055
     */
7056 1
    public static function str_isubstr_before_last_separator(
7057
        string $str,
7058
        string $separator,
7059
        string $encoding = 'UTF-8'
7060
    ): string {
7061 1
        if ($separator === '' || $str === '') {
7062 1
            return '';
7063
        }
7064
7065 1
        if ($encoding === 'UTF-8') {
7066 1
            $offset = \mb_strripos($str, $separator);
7067 1
            if ($offset === false) {
7068 1
                return '';
7069
            }
7070
7071 1
            return (string) \mb_substr($str, 0, $offset);
7072
        }
7073
7074
        $offset = self::strripos($str, $separator, 0, $encoding);
7075
        if ($offset === false) {
7076
            return '';
7077
        }
7078
7079
        return (string) self::substr($str, 0, $offset, $encoding);
7080
    }
7081
7082
    /**
7083
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7084
     *
7085
     * @param string $str           <p>The input string.</p>
7086
     * @param string $needle        <p>The string to look for.</p>
7087
     * @param bool   $before_needle [optional] <p>Default: false</p>
7088
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7089
     *
7090
     * @psalm-pure
7091
     *
7092
     * @return string
7093
     */
7094 2
    public static function str_isubstr_first(
7095
        string $str,
7096
        string $needle,
7097
        bool $before_needle = false,
7098
        string $encoding = 'UTF-8'
7099
    ): string {
7100
        if (
7101 2
            $needle === ''
7102
            ||
7103 2
            $str === ''
7104
        ) {
7105 2
            return '';
7106
        }
7107
7108 2
        $part = self::stristr(
7109 2
            $str,
7110 2
            $needle,
7111 2
            $before_needle,
7112 2
            $encoding
7113
        );
7114 2
        if ($part === false) {
7115 2
            return '';
7116
        }
7117
7118 2
        return $part;
7119
    }
7120
7121
    /**
7122
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7123
     *
7124
     * @param string $str           <p>The input string.</p>
7125
     * @param string $needle        <p>The string to look for.</p>
7126
     * @param bool   $before_needle [optional] <p>Default: false</p>
7127
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7128
     *
7129
     * @psalm-pure
7130
     *
7131
     * @return string
7132
     */
7133 1
    public static function str_isubstr_last(
7134
        string $str,
7135
        string $needle,
7136
        bool $before_needle = false,
7137
        string $encoding = 'UTF-8'
7138
    ): string {
7139
        if (
7140 1
            $needle === ''
7141
            ||
7142 1
            $str === ''
7143
        ) {
7144 1
            return '';
7145
        }
7146
7147 1
        $part = self::strrichr(
7148 1
            $str,
7149 1
            $needle,
7150 1
            $before_needle,
7151 1
            $encoding
7152
        );
7153 1
        if ($part === false) {
7154 1
            return '';
7155
        }
7156
7157 1
        return $part;
7158
    }
7159
7160
    /**
7161
     * Returns the last $n characters of the string.
7162
     *
7163
     * @param string $str      <p>The input string.</p>
7164
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7165
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7166
     *
7167
     * @psalm-pure
7168
     *
7169
     * @return string
7170
     */
7171 12
    public static function str_last_char(
7172
        string $str,
7173
        int $n = 1,
7174
        string $encoding = 'UTF-8'
7175
    ): string {
7176 12
        if ($str === '' || $n <= 0) {
7177 4
            return '';
7178
        }
7179
7180 8
        if ($encoding === 'UTF-8') {
7181 4
            return (string) \mb_substr($str, -$n);
7182
        }
7183
7184 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7185
7186 4
        return (string) self::substr($str, -$n, null, $encoding);
7187
    }
7188
7189
    /**
7190
     * Limit the number of characters in a string.
7191
     *
7192
     * @param string $str        <p>The input string.</p>
7193
     * @param int    $length     [optional] <p>Default: 100</p>
7194
     * @param string $str_add_on [optional] <p>Default: …</p>
7195
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7196
     *
7197
     * @psalm-pure
7198
     *
7199
     * @return string
7200
     */
7201 2
    public static function str_limit(
7202
        string $str,
7203
        int $length = 100,
7204
        string $str_add_on = '…',
7205
        string $encoding = 'UTF-8'
7206
    ): string {
7207 2
        if ($str === '' || $length <= 0) {
7208 2
            return '';
7209
        }
7210
7211 2
        if ($encoding === 'UTF-8') {
7212 2
            if ((int) \mb_strlen($str) <= $length) {
7213 2
                return $str;
7214
            }
7215
7216
            /** @noinspection UnnecessaryCastingInspection */
7217 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7218
        }
7219
7220
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7221
7222
        if ((int) self::strlen($str, $encoding) <= $length) {
7223
            return $str;
7224
        }
7225
7226
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7227
    }
7228
7229
    /**
7230
     * Limit the number of characters in a string, but also after the next word.
7231
     *
7232
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7233
     *
7234
     * @param string $str        <p>The input string.</p>
7235
     * @param int    $length     [optional] <p>Default: 100</p>
7236
     * @param string $str_add_on [optional] <p>Default: …</p>
7237
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7238
     *
7239
     * @psalm-pure
7240
     *
7241
     * @return string
7242
     */
7243 6
    public static function str_limit_after_word(
7244
        string $str,
7245
        int $length = 100,
7246
        string $str_add_on = '…',
7247
        string $encoding = 'UTF-8'
7248
    ): string {
7249 6
        if ($str === '' || $length <= 0) {
7250 2
            return '';
7251
        }
7252
7253 6
        if ($encoding === 'UTF-8') {
7254
            /** @noinspection UnnecessaryCastingInspection */
7255 2
            if ((int) \mb_strlen($str) <= $length) {
7256 2
                return $str;
7257
            }
7258
7259 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7260 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7261
            }
7262
7263 2
            $str = \mb_substr($str, 0, $length);
7264
7265 2
            $array = \explode(' ', $str, -1);
7266 2
            $new_str = \implode(' ', $array);
7267
7268 2
            if ($new_str === '') {
7269 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7270
            }
7271
        } else {
7272 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7273
                return $str;
7274
            }
7275
7276 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7277 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7278
            }
7279
7280
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7281 1
            $str = self::substr($str, 0, $length, $encoding);
7282
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7283 1
            if ($str === false) {
7284
                return '' . $str_add_on;
7285
            }
7286
7287 1
            $array = \explode(' ', $str, -1);
7288 1
            $new_str = \implode(' ', $array);
7289
7290 1
            if ($new_str === '') {
7291
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7292
            }
7293
        }
7294
7295 3
        return $new_str . $str_add_on;
7296
    }
7297
7298
    /**
7299
     * Returns the longest common prefix between the $str1 and $str2.
7300
     *
7301
     * @param string $str1     <p>The input sting.</p>
7302
     * @param string $str2     <p>Second string for comparison.</p>
7303
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7304
     *
7305
     * @psalm-pure
7306
     *
7307
     * @return string
7308
     */
7309 10
    public static function str_longest_common_prefix(
7310
        string $str1,
7311
        string $str2,
7312
        string $encoding = 'UTF-8'
7313
    ): string {
7314
        // init
7315 10
        $longest_common_prefix = '';
7316
7317 10
        if ($encoding === 'UTF-8') {
7318 5
            $max_length = (int) \min(
7319 5
                \mb_strlen($str1),
7320 5
                \mb_strlen($str2)
7321
            );
7322
7323 5
            for ($i = 0; $i < $max_length; ++$i) {
7324 4
                $char = \mb_substr($str1, $i, 1);
7325
7326
                if (
7327 4
                    $char !== false
7328
                    &&
7329 4
                    $char === \mb_substr($str2, $i, 1)
7330
                ) {
7331 3
                    $longest_common_prefix .= $char;
7332
                } else {
7333 3
                    break;
7334
                }
7335
            }
7336
        } else {
7337 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7338
7339 5
            $max_length = (int) \min(
7340 5
                self::strlen($str1, $encoding),
7341 5
                self::strlen($str2, $encoding)
7342
            );
7343
7344 5
            for ($i = 0; $i < $max_length; ++$i) {
7345 4
                $char = self::substr($str1, $i, 1, $encoding);
7346
7347
                if (
7348 4
                    $char !== false
7349
                    &&
7350 4
                    $char === self::substr($str2, $i, 1, $encoding)
7351
                ) {
7352 3
                    $longest_common_prefix .= $char;
7353
                } else {
7354 3
                    break;
7355
                }
7356
            }
7357
        }
7358
7359 10
        return $longest_common_prefix;
7360
    }
7361
7362
    /**
7363
     * Returns the longest common substring between the $str1 and $str2.
7364
     * In the case of ties, it returns that which occurs first.
7365
     *
7366
     * @param string $str1
7367
     * @param string $str2     <p>Second string for comparison.</p>
7368
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7369
     *
7370
     * @psalm-pure
7371
     *
7372
     * @return string
7373
     *                <p>A string with its $str being the longest common substring.</p>
7374
     */
7375 11
    public static function str_longest_common_substring(
7376
        string $str1,
7377
        string $str2,
7378
        string $encoding = 'UTF-8'
7379
    ): string {
7380 11
        if ($str1 === '' || $str2 === '') {
7381 2
            return '';
7382
        }
7383
7384
        // Uses dynamic programming to solve
7385
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7386
7387 9
        if ($encoding === 'UTF-8') {
7388 4
            $str_length = (int) \mb_strlen($str1);
7389 4
            $other_length = (int) \mb_strlen($str2);
7390
        } else {
7391 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7392
7393 5
            $str_length = (int) self::strlen($str1, $encoding);
7394 5
            $other_length = (int) self::strlen($str2, $encoding);
7395
        }
7396
7397
        // Return if either string is empty
7398 9
        if ($str_length === 0 || $other_length === 0) {
7399
            return '';
7400
        }
7401
7402 9
        $len = 0;
7403 9
        $end = 0;
7404 9
        $table = \array_fill(
7405 9
            0,
7406 9
            $str_length + 1,
7407 9
            \array_fill(0, $other_length + 1, 0)
7408
        );
7409
7410 9
        if ($encoding === 'UTF-8') {
7411 9
            for ($i = 1; $i <= $str_length; ++$i) {
7412 9
                for ($j = 1; $j <= $other_length; ++$j) {
7413 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7414 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7415
7416 9
                    if ($str_char === $other_char) {
7417 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7418 8
                        if ($table[$i][$j] > $len) {
7419 8
                            $len = $table[$i][$j];
7420 8
                            $end = $i;
7421
                        }
7422
                    } else {
7423 9
                        $table[$i][$j] = 0;
7424
                    }
7425
                }
7426
            }
7427
        } else {
7428
            for ($i = 1; $i <= $str_length; ++$i) {
7429
                for ($j = 1; $j <= $other_length; ++$j) {
7430
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7431
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7432
7433
                    if ($str_char === $other_char) {
7434
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7435
                        if ($table[$i][$j] > $len) {
7436
                            $len = $table[$i][$j];
7437
                            $end = $i;
7438
                        }
7439
                    } else {
7440
                        $table[$i][$j] = 0;
7441
                    }
7442
                }
7443
            }
7444
        }
7445
7446 9
        if ($encoding === 'UTF-8') {
7447 9
            return (string) \mb_substr($str1, $end - $len, $len);
7448
        }
7449
7450
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7451
    }
7452
7453
    /**
7454
     * Returns the longest common suffix between the $str1 and $str2.
7455
     *
7456
     * @param string $str1
7457
     * @param string $str2     <p>Second string for comparison.</p>
7458
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7459
     *
7460
     * @psalm-pure
7461
     *
7462
     * @return string
7463
     */
7464 10
    public static function str_longest_common_suffix(
7465
        string $str1,
7466
        string $str2,
7467
        string $encoding = 'UTF-8'
7468
    ): string {
7469 10
        if ($str1 === '' || $str2 === '') {
7470 2
            return '';
7471
        }
7472
7473 8
        if ($encoding === 'UTF-8') {
7474 4
            $max_length = (int) \min(
7475 4
                \mb_strlen($str1, $encoding),
7476 4
                \mb_strlen($str2, $encoding)
7477
            );
7478
7479 4
            $longest_common_suffix = '';
7480 4
            for ($i = 1; $i <= $max_length; ++$i) {
7481 4
                $char = \mb_substr($str1, -$i, 1);
7482
7483
                if (
7484 4
                    $char !== false
7485
                    &&
7486 4
                    $char === \mb_substr($str2, -$i, 1)
7487
                ) {
7488 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7489
                } else {
7490 3
                    break;
7491
                }
7492
            }
7493
        } else {
7494 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7495
7496 4
            $max_length = (int) \min(
7497 4
                self::strlen($str1, $encoding),
7498 4
                self::strlen($str2, $encoding)
7499
            );
7500
7501 4
            $longest_common_suffix = '';
7502 4
            for ($i = 1; $i <= $max_length; ++$i) {
7503 4
                $char = self::substr($str1, -$i, 1, $encoding);
7504
7505
                if (
7506 4
                    $char !== false
7507
                    &&
7508 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7509
                ) {
7510 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7511
                } else {
7512 3
                    break;
7513
                }
7514
            }
7515
        }
7516
7517 8
        return $longest_common_suffix;
7518
    }
7519
7520
    /**
7521
     * Returns true if $str matches the supplied pattern, false otherwise.
7522
     *
7523
     * @param string $str     <p>The input string.</p>
7524
     * @param string $pattern <p>Regex pattern to match against.</p>
7525
     *
7526
     * @psalm-pure
7527
     *
7528
     * @return bool
7529
     *              <p>Whether or not $str matches the pattern.</p>
7530
     */
7531 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7532
    {
7533 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7534
    }
7535
7536
    /**
7537
     * Returns whether or not a character exists at an index. Offsets may be
7538
     * negative to count from the last character in the string. Implements
7539
     * part of the ArrayAccess interface.
7540
     *
7541
     * @param string $str      <p>The input string.</p>
7542
     * @param int    $offset   <p>The index to check.</p>
7543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7544
     *
7545
     * @psalm-pure
7546
     *
7547
     * @return bool
7548
     *              <p>Whether or not the index exists.</p>
7549
     */
7550 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7551
    {
7552
        // init
7553 6
        $length = (int) self::strlen($str, $encoding);
7554
7555 6
        if ($offset >= 0) {
7556 3
            return $length > $offset;
7557
        }
7558
7559 3
        return $length >= \abs($offset);
7560
    }
7561
7562
    /**
7563
     * Returns the character at the given index. Offsets may be negative to
7564
     * count from the last character in the string. Implements part of the
7565
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7566
     * does not exist.
7567
     *
7568
     * @param string $str      <p>The input string.</p>
7569
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7570
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7571
     *
7572
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7573
     *
7574
     * @return string
7575
     *                <p>The character at the specified index.</p>
7576
     *
7577
     * @psalm-pure
7578
     */
7579 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7580
    {
7581
        // init
7582 2
        $length = (int) self::strlen($str);
7583
7584
        if (
7585 2
            ($index >= 0 && $length <= $index)
7586
            ||
7587 2
            $length < \abs($index)
7588
        ) {
7589 1
            throw new \OutOfBoundsException('No character exists at the index');
7590
        }
7591
7592 1
        return self::char_at($str, $index, $encoding);
7593
    }
7594
7595
    /**
7596
     * Pad a UTF-8 string to a given length with another string.
7597
     *
7598
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7599
     *
7600
     * @param string     $str        <p>The input string.</p>
7601
     * @param int        $pad_length <p>The length of return string.</p>
7602
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7603
     * @param int|string $pad_type   [optional] <p>
7604
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7605
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7606
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7607
     *                               </p>
7608
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7609
     *
7610
     * @psalm-pure
7611
     *
7612
     * @return string
7613
     *                <p>Returns the padded string.</p>
7614
     */
7615 41
    public static function str_pad(
7616
        string $str,
7617
        int $pad_length,
7618
        string $pad_string = ' ',
7619
        $pad_type = \STR_PAD_RIGHT,
7620
        string $encoding = 'UTF-8'
7621
    ): string {
7622 41
        if ($pad_length === 0 || $pad_string === '') {
7623 1
            return $str;
7624
        }
7625
7626 41
        if ($pad_type !== (int) $pad_type) {
7627 13
            if ($pad_type === 'left') {
7628 3
                $pad_type = \STR_PAD_LEFT;
7629 10
            } elseif ($pad_type === 'right') {
7630 6
                $pad_type = \STR_PAD_RIGHT;
7631 4
            } elseif ($pad_type === 'both') {
7632 3
                $pad_type = \STR_PAD_BOTH;
7633
            } else {
7634 1
                throw new \InvalidArgumentException(
7635 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7636
                );
7637
            }
7638
        }
7639
7640 40
        if ($encoding === 'UTF-8') {
7641 25
            $str_length = (int) \mb_strlen($str);
7642
7643 25
            if ($pad_length >= $str_length) {
7644
                switch ($pad_type) {
7645 25
                    case \STR_PAD_LEFT:
7646 8
                        $ps_length = (int) \mb_strlen($pad_string);
7647
7648 8
                        $diff = ($pad_length - $str_length);
7649
7650 8
                        $pre = (string) \mb_substr(
7651 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7652 8
                            0,
7653 8
                            $diff
7654
                        );
7655 8
                        $post = '';
7656
7657 8
                        break;
7658
7659 20
                    case \STR_PAD_BOTH:
7660 14
                        $diff = ($pad_length - $str_length);
7661
7662 14
                        $ps_length_left = (int) \floor($diff / 2);
7663
7664 14
                        $ps_length_right = (int) \ceil($diff / 2);
7665
7666 14
                        $pre = (string) \mb_substr(
7667 14
                            \str_repeat($pad_string, $ps_length_left),
7668 14
                            0,
7669 14
                            $ps_length_left
7670
                        );
7671 14
                        $post = (string) \mb_substr(
7672 14
                            \str_repeat($pad_string, $ps_length_right),
7673 14
                            0,
7674 14
                            $ps_length_right
7675
                        );
7676
7677 14
                        break;
7678
7679 9
                    case \STR_PAD_RIGHT:
7680
                    default:
7681 9
                        $ps_length = (int) \mb_strlen($pad_string);
7682
7683 9
                        $diff = ($pad_length - $str_length);
7684
7685 9
                        $post = (string) \mb_substr(
7686 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7687 9
                            0,
7688 9
                            $diff
7689
                        );
7690 9
                        $pre = '';
7691
                }
7692
7693 25
                return $pre . $str . $post;
7694
            }
7695
7696 3
            return $str;
7697
        }
7698
7699 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7700
7701 15
        $str_length = (int) self::strlen($str, $encoding);
7702
7703 15
        if ($pad_length >= $str_length) {
7704
            switch ($pad_type) {
7705 14
                case \STR_PAD_LEFT:
7706 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7707
7708 5
                    $diff = ($pad_length - $str_length);
7709
7710 5
                    $pre = (string) self::substr(
7711 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7712 5
                        0,
7713 5
                        $diff,
7714 5
                        $encoding
7715
                    );
7716 5
                    $post = '';
7717
7718 5
                    break;
7719
7720 9
                case \STR_PAD_BOTH:
7721 3
                    $diff = ($pad_length - $str_length);
7722
7723 3
                    $ps_length_left = (int) \floor($diff / 2);
7724
7725 3
                    $ps_length_right = (int) \ceil($diff / 2);
7726
7727 3
                    $pre = (string) self::substr(
7728 3
                        \str_repeat($pad_string, $ps_length_left),
7729 3
                        0,
7730 3
                        $ps_length_left,
7731 3
                        $encoding
7732
                    );
7733 3
                    $post = (string) self::substr(
7734 3
                        \str_repeat($pad_string, $ps_length_right),
7735 3
                        0,
7736 3
                        $ps_length_right,
7737 3
                        $encoding
7738
                    );
7739
7740 3
                    break;
7741
7742 6
                case \STR_PAD_RIGHT:
7743
                default:
7744 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7745
7746 6
                    $diff = ($pad_length - $str_length);
7747
7748 6
                    $post = (string) self::substr(
7749 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7750 6
                        0,
7751 6
                        $diff,
7752 6
                        $encoding
7753
                    );
7754 6
                    $pre = '';
7755
            }
7756
7757 14
            return $pre . $str . $post;
7758
        }
7759
7760 1
        return $str;
7761
    }
7762
7763
    /**
7764
     * Returns a new string of a given length such that both sides of the
7765
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7766
     *
7767
     * @param string $str
7768
     * @param int    $length   <p>Desired string length after padding.</p>
7769
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7770
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7771
     *
7772
     * @psalm-pure
7773
     *
7774
     * @return string
7775
     *                <p>The string with padding applied.</p>
7776
     */
7777 11
    public static function str_pad_both(
7778
        string $str,
7779
        int $length,
7780
        string $pad_str = ' ',
7781
        string $encoding = 'UTF-8'
7782
    ): string {
7783 11
        return self::str_pad(
7784 11
            $str,
7785 11
            $length,
7786 11
            $pad_str,
7787 11
            \STR_PAD_BOTH,
7788 11
            $encoding
7789
        );
7790
    }
7791
7792
    /**
7793
     * Returns a new string of a given length such that the beginning of the
7794
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7795
     *
7796
     * @param string $str
7797
     * @param int    $length   <p>Desired string length after padding.</p>
7798
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7799
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7800
     *
7801
     * @psalm-pure
7802
     *
7803
     * @return string
7804
     *                <p>The string with left padding.</p>
7805
     */
7806 7
    public static function str_pad_left(
7807
        string $str,
7808
        int $length,
7809
        string $pad_str = ' ',
7810
        string $encoding = 'UTF-8'
7811
    ): string {
7812 7
        return self::str_pad(
7813 7
            $str,
7814 7
            $length,
7815 7
            $pad_str,
7816 7
            \STR_PAD_LEFT,
7817 7
            $encoding
7818
        );
7819
    }
7820
7821
    /**
7822
     * Returns a new string of a given length such that the end of the string
7823
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7824
     *
7825
     * @param string $str
7826
     * @param int    $length   <p>Desired string length after padding.</p>
7827
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7828
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7829
     *
7830
     * @psalm-pure
7831
     *
7832
     * @return string
7833
     *                <p>The string with right padding.</p>
7834
     */
7835 7
    public static function str_pad_right(
7836
        string $str,
7837
        int $length,
7838
        string $pad_str = ' ',
7839
        string $encoding = 'UTF-8'
7840
    ): string {
7841 7
        return self::str_pad(
7842 7
            $str,
7843 7
            $length,
7844 7
            $pad_str,
7845 7
            \STR_PAD_RIGHT,
7846 7
            $encoding
7847
        );
7848
    }
7849
7850
    /**
7851
     * Repeat a string.
7852
     *
7853
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7854
     *
7855
     * @param string $str        <p>
7856
     *                           The string to be repeated.
7857
     *                           </p>
7858
     * @param int    $multiplier <p>
7859
     *                           Number of time the input string should be
7860
     *                           repeated.
7861
     *                           </p>
7862
     *                           <p>
7863
     *                           multiplier has to be greater than or equal to 0.
7864
     *                           If the multiplier is set to 0, the function
7865
     *                           will return an empty string.
7866
     *                           </p>
7867
     *
7868
     * @psalm-pure
7869
     *
7870
     * @return string
7871
     *                <p>The repeated string.</p>
7872
     */
7873 9
    public static function str_repeat(string $str, int $multiplier): string
7874
    {
7875 9
        $str = self::filter($str);
7876
7877 9
        return \str_repeat($str, $multiplier);
7878
    }
7879
7880
    /**
7881
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7882
     *
7883
     * Replace all occurrences of the search string with the replacement string
7884
     *
7885
     * @see http://php.net/manual/en/function.str-replace.php
7886
     *
7887
     * @param string|string[] $search  <p>
7888
     *                                 The value being searched for, otherwise known as the needle.
7889
     *                                 An array may be used to designate multiple needles.
7890
     *                                 </p>
7891
     * @param string|string[] $replace <p>
7892
     *                                 The replacement value that replaces found search
7893
     *                                 values. An array may be used to designate multiple replacements.
7894
     *                                 </p>
7895
     * @param string|string[] $subject <p>
7896
     *                                 The string or array of strings being searched and replaced on,
7897
     *                                 otherwise known as the haystack.
7898
     *                                 </p>
7899
     *                                 <p>
7900
     *                                 If subject is an array, then the search and
7901
     *                                 replace is performed with every entry of
7902
     *                                 subject, and the return value is an array as
7903
     *                                 well.
7904
     *                                 </p>
7905
     * @param int|null        $count   [optional] <p>
7906
     *                                 If passed, this will hold the number of matched and replaced needles.
7907
     *                                 </p>
7908
     *
7909
     * @psalm-pure
7910
     *
7911
     * @return string|string[]
7912
     *                         <p>This function returns a string or an array with the replaced values.</p>
7913
     *
7914
     * @template TStrReplaceSubject
7915
     * @phpstan-param TStrReplaceSubject $subject
7916
     * @phpstan-return TStrReplaceSubject
7917
     *
7918
     * @deprecated please use \str_replace() instead
7919
     */
7920 12
    public static function str_replace(
7921
        $search,
7922
        $replace,
7923
        $subject,
7924
        int &$count = null
7925
    ) {
7926
        /**
7927
         * @psalm-suppress PossiblyNullArgument
7928
         * @phpstan-var TStrReplaceSubject $return;
7929
         */
7930 12
        $return = \str_replace(
7931 12
            $search,
7932 12
            $replace,
7933 12
            $subject,
7934 12
            $count
7935
        );
7936
7937 12
        return $return;
7938
    }
7939
7940
    /**
7941
     * Replaces $search from the beginning of string with $replacement.
7942
     *
7943
     * @param string $str         <p>The input string.</p>
7944
     * @param string $search      <p>The string to search for.</p>
7945
     * @param string $replacement <p>The replacement.</p>
7946
     *
7947
     * @psalm-pure
7948
     *
7949
     * @return string
7950
     *                <p>A string after the replacements.</p>
7951
     */
7952 17
    public static function str_replace_beginning(
7953
        string $str,
7954
        string $search,
7955
        string $replacement
7956
    ): string {
7957 17
        if ($str === '') {
7958 4
            if ($replacement === '') {
7959 2
                return '';
7960
            }
7961
7962 2
            if ($search === '') {
7963 2
                return $replacement;
7964
            }
7965
        }
7966
7967 13
        if ($search === '') {
7968 2
            return $str . $replacement;
7969
        }
7970
7971 11
        $searchLength = \strlen($search);
7972 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7973 9
            return $replacement . \substr($str, $searchLength);
7974
        }
7975
7976 2
        return $str;
7977
    }
7978
7979
    /**
7980
     * Replaces $search from the ending of string with $replacement.
7981
     *
7982
     * @param string $str         <p>The input string.</p>
7983
     * @param string $search      <p>The string to search for.</p>
7984
     * @param string $replacement <p>The replacement.</p>
7985
     *
7986
     * @psalm-pure
7987
     *
7988
     * @return string
7989
     *                <p>A string after the replacements.</p>
7990
     */
7991 17
    public static function str_replace_ending(
7992
        string $str,
7993
        string $search,
7994
        string $replacement
7995
    ): string {
7996 17
        if ($str === '') {
7997 4
            if ($replacement === '') {
7998 2
                return '';
7999
            }
8000
8001 2
            if ($search === '') {
8002 2
                return $replacement;
8003
            }
8004
        }
8005
8006 13
        if ($search === '') {
8007 2
            return $str . $replacement;
8008
        }
8009
8010 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8011 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8012
        }
8013
8014 11
        return $str;
8015
    }
8016
8017
    /**
8018
     * Replace the first "$search"-term with the "$replace"-term.
8019
     *
8020
     * @param string $search
8021
     * @param string $replace
8022
     * @param string $subject
8023
     *
8024
     * @psalm-pure
8025
     *
8026
     * @return string
8027
     *
8028
     * @psalm-suppress InvalidReturnType
8029
     */
8030 2
    public static function str_replace_first(
8031
        string $search,
8032
        string $replace,
8033
        string $subject
8034
    ): string {
8035 2
        $pos = self::strpos($subject, $search);
8036
8037 2
        if ($pos !== false) {
8038
            /**
8039
             * @psalm-suppress InvalidReturnStatement
8040
             */
8041 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8042 2
                $subject,
8043 2
                $replace,
8044 2
                $pos,
8045 2
                (int) self::strlen($search)
8046
            );
8047
        }
8048
8049 2
        return $subject;
8050
    }
8051
8052
    /**
8053
     * Replace the last "$search"-term with the "$replace"-term.
8054
     *
8055
     * @param string $search
8056
     * @param string $replace
8057
     * @param string $subject
8058
     *
8059
     * @psalm-pure
8060
     *
8061
     * @return string
8062
     *
8063
     * @psalm-suppress InvalidReturnType
8064
     */
8065 2
    public static function str_replace_last(
8066
        string $search,
8067
        string $replace,
8068
        string $subject
8069
    ): string {
8070 2
        $pos = self::strrpos($subject, $search);
8071 2
        if ($pos !== false) {
8072
            /**
8073
             * @psalm-suppress InvalidReturnStatement
8074
             */
8075 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8076 2
                $subject,
8077 2
                $replace,
8078 2
                $pos,
8079 2
                (int) self::strlen($search)
8080
            );
8081
        }
8082
8083 2
        return $subject;
8084
    }
8085
8086
    /**
8087
     * Shuffles all the characters in the string.
8088
     *
8089
     * INFO: uses random algorithm which is weak for cryptography purposes
8090
     *
8091
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8092
     *
8093
     * @param string $str      <p>The input string</p>
8094
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8095
     *
8096
     * @return string
8097
     *                <p>The shuffled string.</p>
8098
     */
8099 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8100
    {
8101 5
        if ($encoding === 'UTF-8') {
8102 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8103
            /** @noinspection NonSecureShuffleUsageInspection */
8104 5
            \shuffle($indexes);
8105
8106
            // init
8107 5
            $shuffled_str = '';
8108
8109 5
            foreach ($indexes as &$i) {
8110 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8111 5
                if ($tmp_sub_str !== false) {
8112 5
                    $shuffled_str .= $tmp_sub_str;
8113
                }
8114
            }
8115
        } else {
8116
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8117
8118
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8119
            /** @noinspection NonSecureShuffleUsageInspection */
8120
            \shuffle($indexes);
8121
8122
            // init
8123
            $shuffled_str = '';
8124
8125
            foreach ($indexes as &$i) {
8126
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8127
                if ($tmp_sub_str !== false) {
8128
                    $shuffled_str .= $tmp_sub_str;
8129
                }
8130
            }
8131
        }
8132
8133 5
        return $shuffled_str;
8134
    }
8135
8136
    /**
8137
     * Returns the substring beginning at $start, and up to, but not including
8138
     * the index specified by $end. If $end is omitted, the function extracts
8139
     * the remaining string. If $end is negative, it is computed from the end
8140
     * of the string.
8141
     *
8142
     * @param string   $str
8143
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8144
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8145
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8146
     *
8147
     * @psalm-pure
8148
     *
8149
     * @return false|string
8150
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8151
     *                      characters long, <b>FALSE</b> will be returned.
8152
     */
8153 18
    public static function str_slice(
8154
        string $str,
8155
        int $start,
8156
        int $end = null,
8157
        string $encoding = 'UTF-8'
8158
    ) {
8159 18
        if ($encoding === 'UTF-8') {
8160 7
            if ($end === null) {
8161 1
                $length = (int) \mb_strlen($str);
8162 6
            } elseif ($end >= 0 && $end <= $start) {
8163 2
                return '';
8164 4
            } elseif ($end < 0) {
8165 1
                $length = (int) \mb_strlen($str) + $end - $start;
8166
            } else {
8167 3
                $length = $end - $start;
8168
            }
8169
8170 5
            return \mb_substr($str, $start, $length);
8171
        }
8172
8173 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8174
8175 11
        if ($end === null) {
8176 5
            $length = (int) self::strlen($str, $encoding);
8177 6
        } elseif ($end >= 0 && $end <= $start) {
8178 2
            return '';
8179 4
        } elseif ($end < 0) {
8180 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8181
        } else {
8182 3
            $length = $end - $start;
8183
        }
8184
8185 9
        return self::substr($str, $start, $length, $encoding);
8186
    }
8187
8188
    /**
8189
     * Convert a string to e.g.: "snake_case"
8190
     *
8191
     * @param string $str
8192
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8193
     *
8194
     * @psalm-pure
8195
     *
8196
     * @return string
8197
     *                <p>A string in snake_case.</p>
8198
     */
8199 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8200
    {
8201 22
        if ($str === '') {
8202
            return '';
8203
        }
8204
8205 22
        $str = \str_replace(
8206 22
            '-',
8207 22
            '_',
8208 22
            self::normalize_whitespace($str)
8209
        );
8210
8211 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8212 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8213
        }
8214
8215 22
        $str = (string) \preg_replace_callback(
8216 22
            '/([\\p{N}|\\p{Lu}])/u',
8217
            /**
8218
             * @param string[] $matches
8219
             *
8220
             * @psalm-pure
8221
             *
8222
             * @return string
8223
             */
8224
            static function (array $matches) use ($encoding): string {
8225 9
                $match = $matches[1];
8226 9
                $match_int = (int) $match;
8227
8228 9
                if ((string) $match_int === $match) {
8229 4
                    return '_' . $match . '_';
8230
                }
8231
8232 5
                if ($encoding === 'UTF-8') {
8233 5
                    return '_' . \mb_strtolower($match);
8234
                }
8235
8236
                return '_' . self::strtolower($match, $encoding);
8237 22
            },
8238 22
            $str
8239
        );
8240
8241 22
        $str = (string) \preg_replace(
8242
            [
8243 22
                '/\\s+/u',           // convert spaces to "_"
8244
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8245
                '/_+/',                 // remove double "_"
8246
            ],
8247
            [
8248 22
                '_',
8249
                '',
8250
                '_',
8251
            ],
8252 22
            $str
8253
        );
8254
8255 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8256
    }
8257
8258
    /**
8259
     * Sort all characters according to code points.
8260
     *
8261
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8262
     *
8263
     * @param string $str    <p>A UTF-8 string.</p>
8264
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8265
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8266
     *
8267
     * @psalm-pure
8268
     *
8269
     * @return string
8270
     *                <p>A string of sorted characters.</p>
8271
     */
8272 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8273
    {
8274 2
        $array = self::codepoints($str);
8275
8276 2
        if ($unique) {
8277 2
            $array = \array_flip(\array_flip($array));
8278
        }
8279
8280 2
        if ($desc) {
8281 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8281
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8282
        } else {
8283 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8283
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8284
        }
8285
8286 2
        return self::string($array);
8287
    }
8288
8289
    /**
8290
     * Convert a string to an array of Unicode characters.
8291
     *
8292
     * EXAMPLE: <code>
8293
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8294
     * </code>
8295
     *
8296
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8297
     * @param int            $length                  [optional] <p>Max character length of each array
8298
     *                                                lement.</p>
8299
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8300
     *                                                string.</p>
8301
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8302
     *                                                "mb_substr"</p>
8303
     *
8304
     * @psalm-pure
8305
     *
8306
     * @return string[][]
8307
     *                    <p>An array containing chunks of the input.</p>
8308
     */
8309 1
    public static function str_split_array(
8310
        array $input,
8311
        int $length = 1,
8312
        bool $clean_utf8 = false,
8313
        bool $try_to_use_mb_functions = true
8314
    ): array {
8315 1
        foreach ($input as $k => &$v) {
8316 1
            $v = self::str_split(
8317 1
                $v,
8318 1
                $length,
8319 1
                $clean_utf8,
8320 1
                $try_to_use_mb_functions
8321
            );
8322
        }
8323
8324
        /** @var string[][] $input */
8325 1
        return $input;
8326
    }
8327
8328
    /**
8329
     * Convert a string to an array of unicode characters.
8330
     *
8331
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8332
     *
8333
     * @param int|string $input                   <p>The string or int to split into array.</p>
8334
     * @param int        $length                  [optional] <p>Max character length of each array
8335
     *                                            element.</p>
8336
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8337
     *                                            string.</p>
8338
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8339
     *                                            "mb_substr"</p>
8340
     *
8341
     * @psalm-pure
8342
     *
8343
     * @return string[]
8344
     *                  <p>An array containing chunks of chars from the input.</p>
8345
     *
8346
     * @noinspection SuspiciousBinaryOperationInspection
8347
     * @noinspection OffsetOperationsInspection
8348
     */
8349 90
    public static function str_split(
8350
        $input,
8351
        int $length = 1,
8352
        bool $clean_utf8 = false,
8353
        bool $try_to_use_mb_functions = true
8354
    ): array {
8355 90
        if ($length <= 0) {
8356 3
            return [];
8357
        }
8358
8359
        // this is only an old fallback
8360
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8361
        /** @var int|int[]|string|string[] $input */
8362 89
        $input = $input;
8363 89
        if (\is_array($input)) {
8364
            /**
8365
             * @psalm-suppress InvalidReturnStatement
8366
             */
8367
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8368
                $input,
8369
                $length,
8370
                $clean_utf8,
8371
                $try_to_use_mb_functions
8372
            );
8373
        }
8374
8375
        // init
8376 89
        $input = (string) $input;
8377
8378 89
        if ($input === '') {
8379 14
            return [];
8380
        }
8381
8382 86
        if ($clean_utf8) {
8383 19
            $input = self::clean($input);
8384
        }
8385
8386
        if (
8387 86
            $try_to_use_mb_functions
8388
            &&
8389 86
            self::$SUPPORT['mbstring'] === true
8390
        ) {
8391 82
            if (\function_exists('mb_str_split')) {
8392
                /**
8393
                 * @psalm-suppress ImpureFunctionCall - why?
8394
                 */
8395 82
                $return = \mb_str_split($input, $length);
8396 82
                if ($return !== false) {
8397 82
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8398
                }
8399
            }
8400
8401
            $i_max = \mb_strlen($input);
8402
            if ($i_max <= 127) {
8403
                $ret = [];
8404
                for ($i = 0; $i < $i_max; ++$i) {
8405
                    $ret[] = \mb_substr($input, $i, 1);
8406
                }
8407
            } else {
8408
                $return_array = [];
8409
                \preg_match_all('/./us', $input, $return_array);
8410
                $ret = $return_array[0] ?? [];
8411
            }
8412 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8413 17
            $return_array = [];
8414 17
            \preg_match_all('/./us', $input, $return_array);
8415 17
            $ret = $return_array[0] ?? [];
8416
        } else {
8417
8418
            // fallback
8419
8420 8
            $ret = [];
8421 8
            $len = \strlen($input);
8422
8423
            /** @noinspection ForeachInvariantsInspection */
8424 8
            for ($i = 0; $i < $len; ++$i) {
8425 8
                if (($input[$i] & "\x80") === "\x00") {
8426 8
                    $ret[] = $input[$i];
8427
                } elseif (
8428 8
                    isset($input[$i + 1])
8429
                    &&
8430 8
                    ($input[$i] & "\xE0") === "\xC0"
8431
                ) {
8432 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8433 4
                        $ret[] = $input[$i] . $input[$i + 1];
8434
8435 4
                        ++$i;
8436
                    }
8437
                } elseif (
8438 6
                    isset($input[$i + 2])
8439
                    &&
8440 6
                    ($input[$i] & "\xF0") === "\xE0"
8441
                ) {
8442
                    if (
8443 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8444
                        &&
8445 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8446
                    ) {
8447 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8448
8449 6
                        $i += 2;
8450
                    }
8451
                } elseif (
8452
                    isset($input[$i + 3])
8453
                    &&
8454
                    ($input[$i] & "\xF8") === "\xF0"
8455
                ) {
8456
                    if (
8457
                        ($input[$i + 1] & "\xC0") === "\x80"
8458
                        &&
8459
                        ($input[$i + 2] & "\xC0") === "\x80"
8460
                        &&
8461
                        ($input[$i + 3] & "\xC0") === "\x80"
8462
                    ) {
8463
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8464
8465
                        $i += 3;
8466
                    }
8467
                }
8468
            }
8469
        }
8470
8471 23
        if ($length > 1) {
8472 2
            $ret = \array_chunk($ret, $length);
8473
8474 2
            return \array_map(
8475
                static function (array $item): string {
8476 2
                    return \implode('', $item);
8477 2
                },
8478 2
                $ret
8479
            );
8480
        }
8481
8482 23
        if (isset($ret[0]) && $ret[0] === '') {
8483
            return [];
8484
        }
8485
8486 23
        return $ret;
8487
    }
8488
8489
    /**
8490
     * Splits the string with the provided regular expression, returning an
8491
     * array of strings. An optional integer $limit will truncate the
8492
     * results.
8493
     *
8494
     * @param string $str
8495
     * @param string $pattern <p>The regex with which to split the string.</p>
8496
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8497
     *
8498
     * @psalm-pure
8499
     *
8500
     * @return string[]
8501
     *                  <p>An array of strings.</p>
8502
     */
8503 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8504
    {
8505 16
        if ($limit === 0) {
8506 2
            return [];
8507
        }
8508
8509 14
        if ($pattern === '') {
8510 1
            return [$str];
8511
        }
8512
8513 13
        if (self::$SUPPORT['mbstring'] === true) {
8514 13
            if ($limit >= 0) {
8515
                /** @noinspection PhpComposerExtensionStubsInspection */
8516 8
                $result_tmp = \mb_split($pattern, $str);
8517
8518 8
                $result = [];
8519 8
                foreach ($result_tmp as $item_tmp) {
8520 8
                    if ($limit === 0) {
8521 4
                        break;
8522
                    }
8523 8
                    --$limit;
8524
8525 8
                    $result[] = $item_tmp;
8526
                }
8527
8528 8
                return $result;
8529
            }
8530
8531
            /** @noinspection PhpComposerExtensionStubsInspection */
8532 5
            return \mb_split($pattern, $str);
8533
        }
8534
8535
        if ($limit > 0) {
8536
            ++$limit;
8537
        } else {
8538
            $limit = -1;
8539
        }
8540
8541
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8542
8543
        if ($array === false) {
8544
            return [];
8545
        }
8546
8547
        if ($limit > 0 && \count($array) === $limit) {
8548
            \array_pop($array);
8549
        }
8550
8551
        return $array;
8552
    }
8553
8554
    /**
8555
     * Check if the string starts with the given substring.
8556
     *
8557
     * EXAMPLE: <code>
8558
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8559
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8560
     * </code>
8561
     *
8562
     * @param string $haystack <p>The string to search in.</p>
8563
     * @param string $needle   <p>The substring to search for.</p>
8564
     *
8565
     * @psalm-pure
8566
     *
8567
     * @return bool
8568
     */
8569 19
    public static function str_starts_with(string $haystack, string $needle): bool
8570
    {
8571 19
        if ($needle === '') {
8572 2
            return true;
8573
        }
8574
8575 19
        if ($haystack === '') {
8576
            return false;
8577
        }
8578
8579 19
        if (\PHP_VERSION_ID >= 80000) {
8580
            return \str_starts_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_starts_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

8580
            return /** @scrutinizer ignore-call */ \str_starts_with($haystack, $needle);
Loading history...
8581
        }
8582
8583 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8584
    }
8585
8586
    /**
8587
     * Returns true if the string begins with any of $substrings, false otherwise.
8588
     *
8589
     * - case-sensitive
8590
     *
8591
     * @param string $str        <p>The input string.</p>
8592
     * @param array  $substrings <p>Substrings to look for.</p>
8593
     *
8594
     * @psalm-pure
8595
     *
8596
     * @return bool
8597
     *              <p>Whether or not $str starts with $substring.</p>
8598
     */
8599 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8600
    {
8601 8
        if ($str === '') {
8602
            return false;
8603
        }
8604
8605 8
        if ($substrings === []) {
8606
            return false;
8607
        }
8608
8609 8
        foreach ($substrings as &$substring) {
8610 8
            if (self::str_starts_with($str, $substring)) {
8611 8
                return true;
8612
            }
8613
        }
8614
8615 6
        return false;
8616
    }
8617
8618
    /**
8619
     * Gets the substring after the first occurrence of a separator.
8620
     *
8621
     * @param string $str       <p>The input string.</p>
8622
     * @param string $separator <p>The string separator.</p>
8623
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8624
     *
8625
     * @psalm-pure
8626
     *
8627
     * @return string
8628
     */
8629 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8630
    {
8631 1
        if ($separator === '' || $str === '') {
8632 1
            return '';
8633
        }
8634
8635 1
        if ($encoding === 'UTF-8') {
8636 1
            $offset = \mb_strpos($str, $separator);
8637 1
            if ($offset === false) {
8638 1
                return '';
8639
            }
8640
8641 1
            return (string) \mb_substr(
8642 1
                $str,
8643 1
                $offset + (int) \mb_strlen($separator)
8644
            );
8645
        }
8646
8647
        $offset = self::strpos($str, $separator, 0, $encoding);
8648
        if ($offset === false) {
8649
            return '';
8650
        }
8651
8652
        return (string) \mb_substr(
8653
            $str,
8654
            $offset + (int) self::strlen($separator, $encoding),
8655
            null,
8656
            $encoding
8657
        );
8658
    }
8659
8660
    /**
8661
     * Gets the substring after the last occurrence of a separator.
8662
     *
8663
     * @param string $str       <p>The input string.</p>
8664
     * @param string $separator <p>The string separator.</p>
8665
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8666
     *
8667
     * @psalm-pure
8668
     *
8669
     * @return string
8670
     */
8671 1
    public static function str_substr_after_last_separator(
8672
        string $str,
8673
        string $separator,
8674
        string $encoding = 'UTF-8'
8675
    ): string {
8676 1
        if ($separator === '' || $str === '') {
8677 1
            return '';
8678
        }
8679
8680 1
        if ($encoding === 'UTF-8') {
8681 1
            $offset = \mb_strrpos($str, $separator);
8682 1
            if ($offset === false) {
8683 1
                return '';
8684
            }
8685
8686 1
            return (string) \mb_substr(
8687 1
                $str,
8688 1
                $offset + (int) \mb_strlen($separator)
8689
            );
8690
        }
8691
8692
        $offset = self::strrpos($str, $separator, 0, $encoding);
8693
        if ($offset === false) {
8694
            return '';
8695
        }
8696
8697
        return (string) self::substr(
8698
            $str,
8699
            $offset + (int) self::strlen($separator, $encoding),
8700
            null,
8701
            $encoding
8702
        );
8703
    }
8704
8705
    /**
8706
     * Gets the substring before the first occurrence of a separator.
8707
     *
8708
     * @param string $str       <p>The input string.</p>
8709
     * @param string $separator <p>The string separator.</p>
8710
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8711
     *
8712
     * @psalm-pure
8713
     *
8714
     * @return string
8715
     */
8716 1
    public static function str_substr_before_first_separator(
8717
        string $str,
8718
        string $separator,
8719
        string $encoding = 'UTF-8'
8720
    ): string {
8721 1
        if ($separator === '' || $str === '') {
8722 1
            return '';
8723
        }
8724
8725 1
        if ($encoding === 'UTF-8') {
8726 1
            $offset = \mb_strpos($str, $separator);
8727 1
            if ($offset === false) {
8728 1
                return '';
8729
            }
8730
8731 1
            return (string) \mb_substr(
8732 1
                $str,
8733 1
                0,
8734 1
                $offset
8735
            );
8736
        }
8737
8738
        $offset = self::strpos($str, $separator, 0, $encoding);
8739
        if ($offset === false) {
8740
            return '';
8741
        }
8742
8743
        return (string) self::substr(
8744
            $str,
8745
            0,
8746
            $offset,
8747
            $encoding
8748
        );
8749
    }
8750
8751
    /**
8752
     * Gets the substring before the last occurrence of a separator.
8753
     *
8754
     * @param string $str       <p>The input string.</p>
8755
     * @param string $separator <p>The string separator.</p>
8756
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8757
     *
8758
     * @psalm-pure
8759
     *
8760
     * @return string
8761
     */
8762 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8763
    {
8764 1
        if ($separator === '' || $str === '') {
8765 1
            return '';
8766
        }
8767
8768 1
        if ($encoding === 'UTF-8') {
8769 1
            $offset = \mb_strrpos($str, $separator);
8770 1
            if ($offset === false) {
8771 1
                return '';
8772
            }
8773
8774 1
            return (string) \mb_substr(
8775 1
                $str,
8776 1
                0,
8777 1
                $offset
8778
            );
8779
        }
8780
8781
        $offset = self::strrpos($str, $separator, 0, $encoding);
8782
        if ($offset === false) {
8783
            return '';
8784
        }
8785
8786
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8787
8788
        return (string) self::substr(
8789
            $str,
8790
            0,
8791
            $offset,
8792
            $encoding
8793
        );
8794
    }
8795
8796
    /**
8797
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8798
     *
8799
     * @param string $str           <p>The input string.</p>
8800
     * @param string $needle        <p>The string to look for.</p>
8801
     * @param bool   $before_needle [optional] <p>Default: false</p>
8802
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8803
     *
8804
     * @psalm-pure
8805
     *
8806
     * @return string
8807
     */
8808 2
    public static function str_substr_first(
8809
        string $str,
8810
        string $needle,
8811
        bool $before_needle = false,
8812
        string $encoding = 'UTF-8'
8813
    ): string {
8814 2
        if ($str === '' || $needle === '') {
8815 2
            return '';
8816
        }
8817
8818 2
        if ($encoding === 'UTF-8') {
8819 2
            if ($before_needle) {
8820 1
                $part = \mb_strstr(
8821 1
                    $str,
8822 1
                    $needle,
8823 1
                    $before_needle
8824
                );
8825
            } else {
8826 1
                $part = \mb_strstr(
8827 1
                    $str,
8828 2
                    $needle
8829
                );
8830
            }
8831
        } else {
8832
            $part = self::strstr(
8833
                $str,
8834
                $needle,
8835
                $before_needle,
8836
                $encoding
8837
            );
8838
        }
8839
8840 2
        return $part === false ? '' : $part;
8841
    }
8842
8843
    /**
8844
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8845
     *
8846
     * @param string $str           <p>The input string.</p>
8847
     * @param string $needle        <p>The string to look for.</p>
8848
     * @param bool   $before_needle [optional] <p>Default: false</p>
8849
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8850
     *
8851
     * @psalm-pure
8852
     *
8853
     * @return string
8854
     */
8855 2
    public static function str_substr_last(
8856
        string $str,
8857
        string $needle,
8858
        bool $before_needle = false,
8859
        string $encoding = 'UTF-8'
8860
    ): string {
8861 2
        if ($str === '' || $needle === '') {
8862 2
            return '';
8863
        }
8864
8865 2
        if ($encoding === 'UTF-8') {
8866 2
            if ($before_needle) {
8867 1
                $part = \mb_strrchr(
8868 1
                    $str,
8869 1
                    $needle,
8870 1
                    $before_needle
8871
                );
8872
            } else {
8873 1
                $part = \mb_strrchr(
8874 1
                    $str,
8875 2
                    $needle
8876
                );
8877
            }
8878
        } else {
8879
            $part = self::strrchr(
8880
                $str,
8881
                $needle,
8882
                $before_needle,
8883
                $encoding
8884
            );
8885
        }
8886
8887 2
        return $part === false ? '' : $part;
8888
    }
8889
8890
    /**
8891
     * Surrounds $str with the given substring.
8892
     *
8893
     * @param string $str
8894
     * @param string $substring <p>The substring to add to both sides.</p>
8895
     *
8896
     * @psalm-pure
8897
     *
8898
     * @return string
8899
     *                <p>A string with the substring both prepended and appended.</p>
8900
     */
8901 5
    public static function str_surround(string $str, string $substring): string
8902
    {
8903 5
        return $substring . $str . $substring;
8904
    }
8905
8906
    /**
8907
     * Returns a trimmed string with the first letter of each word capitalized.
8908
     * Also accepts an array, $ignore, allowing you to list words not to be
8909
     * capitalized.
8910
     *
8911
     * @param string              $str
8912
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8913
     *                                                           null. Default: null</p>
8914
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8915
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8916
     *                                                           string.</p>
8917
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8918
     *                                                           el, lt, tr</p>
8919
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8920
     *                                                           e.g. ẞ -> ß</p>
8921
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8922
     *                                                           first</p>
8923
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8924
     *                                                           whitespace separator === words.</p>
8925
     *
8926
     * @psalm-pure
8927
     *
8928
     * @return string
8929
     *                <p>The titleized string.</p>
8930
     *
8931
     * @noinspection PhpTooManyParametersInspection
8932
     */
8933 10
    public static function str_titleize(
8934
        string $str,
8935
        array $ignore = null,
8936
        string $encoding = 'UTF-8',
8937
        bool $clean_utf8 = false,
8938
        string $lang = null,
8939
        bool $try_to_keep_the_string_length = false,
8940
        bool $use_trim_first = true,
8941
        string $word_define_chars = null
8942
    ): string {
8943 10
        if ($str === '') {
8944
            return '';
8945
        }
8946
8947 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8948 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8949
        }
8950
8951 10
        if ($use_trim_first) {
8952 10
            $str = \trim($str);
8953
        }
8954
8955 10
        if ($clean_utf8) {
8956
            $str = self::clean($str);
8957
        }
8958
8959 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8960
8961 10
        if ($word_define_chars) {
8962 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8963
        } else {
8964 6
            $word_define_chars = '';
8965
        }
8966
8967 10
        $str = (string) \preg_replace_callback(
8968 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8969
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8970 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8971 4
                    return $match[0];
8972
                }
8973
8974 10
                if ($use_mb_functions) {
8975 10
                    if ($encoding === 'UTF-8') {
8976 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8977 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8978
                    }
8979
8980
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8981
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8982
                }
8983
8984
                return self::ucfirst(
8985
                    self::strtolower(
8986
                        $match[0],
8987
                        $encoding,
8988
                        false,
8989
                        $lang,
8990
                        $try_to_keep_the_string_length
8991
                    ),
8992
                    $encoding,
8993
                    false,
8994
                    $lang,
8995
                    $try_to_keep_the_string_length
8996
                );
8997 10
            },
8998 10
            $str
8999
        );
9000
9001 10
        return $str;
9002
    }
9003
9004
    /**
9005
     * Convert a string into a obfuscate string.
9006
     *
9007
     * EXAMPLE: <code>
9008
     *
9009
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9010
     * </code>
9011
     *
9012
     * @param string   $str
9013
     * @param float    $percent
9014
     * @param string   $obfuscateChar
9015
     * @param string[] $keepChars
9016
     *
9017
     * @psalm-pure
9018
     *
9019
     * @return string
9020
     *                <p>The obfuscate string.</p>
9021
     */
9022 1
    public static function str_obfuscate(
9023
        string $str,
9024
        float $percent = 0.5,
9025
        string $obfuscateChar = '*',
9026
        array $keepChars = []
9027
    ): string {
9028 1
        $obfuscateCharHelper = "\u{2603}";
9029 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9030
9031 1
        $chars = self::chars($str);
9032 1
        $charsMax = \count($chars);
9033 1
        $charsMaxChange = \round($charsMax * $percent);
9034 1
        $charsCounter = 0;
9035 1
        $charKeyDone = [];
9036
9037 1
        while ($charsCounter < $charsMaxChange) {
9038 1
            foreach ($chars as $charKey => $char) {
9039 1
                if (isset($charKeyDone[$charKey])) {
9040 1
                    continue;
9041
                }
9042
9043 1
                if (\random_int(0, 100) > 50) {
9044 1
                    continue;
9045
                }
9046
9047 1
                if ($char === $obfuscateChar) {
9048
                    continue;
9049
                }
9050
9051 1
                ++$charsCounter;
9052 1
                $charKeyDone[$charKey] = true;
9053
9054 1
                if ($charsCounter > $charsMaxChange) {
9055
                    break;
9056
                }
9057
9058 1
                if (\in_array($char, $keepChars, true)) {
9059 1
                    continue;
9060
                }
9061
9062 1
                $chars[$charKey] = $obfuscateChar;
9063
            }
9064
        }
9065
9066 1
        $str = \implode('', $chars);
9067
9068 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9069
    }
9070
9071
    /**
9072
     * Returns a trimmed string in proper title case.
9073
     *
9074
     * Also accepts an array, $ignore, allowing you to list words not to be
9075
     * capitalized.
9076
     *
9077
     * Adapted from John Gruber's script.
9078
     *
9079
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9080
     *
9081
     * @param string $str
9082
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9083
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9084
     *
9085
     * @psalm-pure
9086
     *
9087
     * @return string
9088
     *                <p>The titleized string.</p>
9089
     */
9090 35
    public static function str_titleize_for_humans(
9091
        string $str,
9092
        array $ignore = [],
9093
        string $encoding = 'UTF-8'
9094
    ): string {
9095 35
        if ($str === '') {
9096
            return '';
9097
        }
9098
9099
        $small_words = [
9100 35
            '(?<!q&)a',
9101
            'an',
9102
            'and',
9103
            'as',
9104
            'at(?!&t)',
9105
            'but',
9106
            'by',
9107
            'en',
9108
            'for',
9109
            'if',
9110
            'in',
9111
            'of',
9112
            'on',
9113
            'or',
9114
            'the',
9115
            'to',
9116
            'v[.]?',
9117
            'via',
9118
            'vs[.]?',
9119
        ];
9120
9121 35
        if ($ignore !== []) {
9122 1
            $small_words = \array_merge($small_words, $ignore);
9123
        }
9124
9125 35
        $small_words_rx = \implode('|', $small_words);
9126 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9127
9128 35
        $str = \trim($str);
9129
9130 35
        if (!self::has_lowercase($str)) {
9131 2
            $str = self::strtolower($str, $encoding);
9132
        }
9133
9134
        // the main substitutions
9135
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9136 35
        $str = (string) \preg_replace_callback(
9137
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9138
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9139 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9140
                        |
9141 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9142
                        |
9143 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9144
                        |
9145 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9146
                      ) (_*) \\b                                                          # 6. With trailing underscore
9147
                    ~ux',
9148
            /**
9149
             * @param string[] $matches
9150
             *
9151
             * @psalm-pure
9152
             *
9153
             * @return string
9154
             */
9155
            static function (array $matches) use ($encoding): string {
9156
                // preserve leading underscore
9157 35
                $str = $matches[1];
9158 35
                if ($matches[2]) {
9159
                    // preserve URLs, domains, emails and file paths
9160 5
                    $str .= $matches[2];
9161 35
                } elseif ($matches[3]) {
9162
                    // lower-case small words
9163 25
                    $str .= self::strtolower($matches[3], $encoding);
9164 35
                } elseif ($matches[4]) {
9165
                    // capitalize word w/o internal caps
9166 34
                    $str .= static::ucfirst($matches[4], $encoding);
9167
                } else {
9168
                    // preserve other kinds of word (iPhone)
9169 7
                    $str .= $matches[5];
9170
                }
9171
                // preserve trailing underscore
9172 35
                $str .= $matches[6];
9173
9174 35
                return $str;
9175 35
            },
9176 35
            $str
9177
        );
9178
9179
        // Exceptions for small words: capitalize at start of title...
9180 35
        $str = (string) \preg_replace_callback(
9181
            '~(  \\A [[:punct:]]*            # start of title...
9182
                      |  [:.;?!][ ]+                # or of subsentence...
9183
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9184 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9185
                     ~uxi',
9186
            /**
9187
             * @param string[] $matches
9188
             *
9189
             * @psalm-pure
9190
             *
9191
             * @return string
9192
             */
9193
            static function (array $matches) use ($encoding): string {
9194 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9195 35
            },
9196 35
            $str
9197
        );
9198
9199
        // ...and end of title
9200 35
        $str = (string) \preg_replace_callback(
9201 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9202
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9203
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9204
                     ~uxi',
9205
            /**
9206
             * @param string[] $matches
9207
             *
9208
             * @psalm-pure
9209
             *
9210
             * @return string
9211
             */
9212
            static function (array $matches) use ($encoding): string {
9213 3
                return static::ucfirst($matches[1], $encoding);
9214 35
            },
9215 35
            $str
9216
        );
9217
9218
        // Exceptions for small words in hyphenated compound words.
9219
        // e.g. "in-flight" -> In-Flight
9220 35
        $str = (string) \preg_replace_callback(
9221
            '~\\b
9222
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9223 35
                        ( ' . $small_words_rx . ' )
9224
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9225
                       ~uxi',
9226
            /**
9227
             * @param string[] $matches
9228
             *
9229
             * @psalm-pure
9230
             *
9231
             * @return string
9232
             */
9233
            static function (array $matches) use ($encoding): string {
9234
                return static::ucfirst($matches[1], $encoding);
9235 35
            },
9236 35
            $str
9237
        );
9238
9239
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9240 35
        $str = (string) \preg_replace_callback(
9241
            '~\\b
9242
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9243
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9244 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9245
                      (?!	- )                 # Negative lookahead for another -
9246
                     ~uxi',
9247
            /**
9248
             * @param string[] $matches
9249
             *
9250
             * @psalm-pure
9251
             *
9252
             * @return string
9253
             */
9254
            static function (array $matches) use ($encoding): string {
9255
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9256 35
            },
9257 35
            $str
9258
        );
9259
9260 35
        return $str;
9261
    }
9262
9263
    /**
9264
     * Get a binary representation of a specific string.
9265
     *
9266
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9267
     *
9268
     * @param string $str <p>The input string.</p>
9269
     *
9270
     * @psalm-pure
9271
     *
9272
     * @return false|string
9273
     *                      <p>false on error</p>
9274
     */
9275 2
    public static function str_to_binary(string $str)
9276
    {
9277
        /** @var array|false $value - needed for PhpStan (stubs error) */
9278 2
        $value = \unpack('H*', $str);
9279 2
        if ($value === false) {
9280
            return false;
9281
        }
9282
9283
        /** @noinspection OffsetOperationsInspection */
9284 2
        return \base_convert($value[1], 16, 2);
9285
    }
9286
9287
    /**
9288
     * @param string   $str
9289
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9290
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9291
     *
9292
     * @psalm-pure
9293
     *
9294
     * @return string[]
9295
     */
9296 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9297
    {
9298 17
        if ($str === '') {
9299 1
            return $remove_empty_values ? [] : [''];
9300
        }
9301
9302 16
        if (self::$SUPPORT['mbstring'] === true) {
9303
            /** @noinspection PhpComposerExtensionStubsInspection */
9304 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9305
        } else {
9306
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9307
        }
9308
9309 16
        if ($return === false) {
9310
            return $remove_empty_values ? [] : [''];
9311
        }
9312
9313
        if (
9314 16
            $remove_short_values === null
9315
            &&
9316 16
            !$remove_empty_values
9317
        ) {
9318 16
            return $return;
9319
        }
9320
9321
        return self::reduce_string_array(
9322
            $return,
9323
            $remove_empty_values,
9324
            $remove_short_values
9325
        );
9326
    }
9327
9328
    /**
9329
     * Convert a string into an array of words.
9330
     *
9331
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9332
     *
9333
     * @param string   $str
9334
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9335
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9336
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9337
     *
9338
     * @psalm-pure
9339
     *
9340
     * @return string[]
9341
     */
9342 13
    public static function str_to_words(
9343
        string $str,
9344
        string $char_list = '',
9345
        bool $remove_empty_values = false,
9346
        int $remove_short_values = null
9347
    ): array {
9348 13
        if ($str === '') {
9349 4
            return $remove_empty_values ? [] : [''];
9350
        }
9351
9352 13
        $char_list = self::rxClass($char_list, '\pL');
9353
9354 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9355 13
        if ($return === false) {
9356
            return $remove_empty_values ? [] : [''];
9357
        }
9358
9359
        if (
9360 13
            $remove_short_values === null
9361
            &&
9362 13
            !$remove_empty_values
9363
        ) {
9364 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9365
        }
9366
9367 2
        $tmp_return = self::reduce_string_array(
9368 2
            $return,
9369 2
            $remove_empty_values,
9370 2
            $remove_short_values
9371
        );
9372
9373 2
        foreach ($tmp_return as &$item) {
9374 2
            $item = (string) $item;
9375
        }
9376
9377 2
        return $tmp_return;
9378
    }
9379
9380
    /**
9381
     * alias for "UTF8::to_ascii()"
9382
     *
9383
     * @param string $str
9384
     * @param string $unknown
9385
     * @param bool   $strict
9386
     *
9387
     * @psalm-pure
9388
     *
9389
     * @return string
9390
     *
9391
     * @see        UTF8::to_ascii()
9392
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9393
     */
9394 7
    public static function str_transliterate(
9395
        string $str,
9396
        string $unknown = '?',
9397
        bool $strict = false
9398
    ): string {
9399 7
        return self::to_ascii($str, $unknown, $strict);
9400
    }
9401
9402
    /**
9403
     * Truncates the string to a given length. If $substring is provided, and
9404
     * truncating occurs, the string is further truncated so that the substring
9405
     * may be appended without exceeding the desired length.
9406
     *
9407
     * @param string $str
9408
     * @param int    $length    <p>Desired length of the truncated string.</p>
9409
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9410
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9411
     *
9412
     * @psalm-pure
9413
     *
9414
     * @return string
9415
     *                <p>A string after truncating.</p>
9416
     */
9417 22
    public static function str_truncate(
9418
        string $str,
9419
        int $length,
9420
        string $substring = '',
9421
        string $encoding = 'UTF-8'
9422
    ): string {
9423 22
        if ($str === '') {
9424
            return '';
9425
        }
9426
9427 22
        if ($encoding === 'UTF-8') {
9428 10
            if ($length >= (int) \mb_strlen($str)) {
9429 2
                return $str;
9430
            }
9431
9432 8
            if ($substring !== '') {
9433 4
                $length -= (int) \mb_strlen($substring);
9434
9435
                /** @noinspection UnnecessaryCastingInspection */
9436 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9437
            }
9438
9439
            /** @noinspection UnnecessaryCastingInspection */
9440 4
            return (string) \mb_substr($str, 0, $length);
9441
        }
9442
9443 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9444
9445 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9446 2
            return $str;
9447
        }
9448
9449 10
        if ($substring !== '') {
9450 6
            $length -= (int) self::strlen($substring, $encoding);
9451
        }
9452
9453
        return (
9454 10
               (string) self::substr(
9455 10
                   $str,
9456 10
                   0,
9457 10
                   $length,
9458 10
                   $encoding
9459
               )
9460 10
               ) . $substring;
9461
    }
9462
9463
    /**
9464
     * Truncates the string to a given length, while ensuring that it does not
9465
     * split words. If $substring is provided, and truncating occurs, the
9466
     * string is further truncated so that the substring may be appended without
9467
     * exceeding the desired length.
9468
     *
9469
     * @param string $str
9470
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9471
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9472
     *                                                       Default:
9473
     *                                                       ''</p>
9474
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9475
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9476
     *
9477
     * @psalm-pure
9478
     *
9479
     * @return string
9480
     *                <p>A string after truncating.</p>
9481
     */
9482 47
    public static function str_truncate_safe(
9483
        string $str,
9484
        int $length,
9485
        string $substring = '',
9486
        string $encoding = 'UTF-8',
9487
        bool $ignore_do_not_split_words_for_one_word = false
9488
    ): string {
9489 47
        if ($str === '' || $length <= 0) {
9490 1
            return $substring;
9491
        }
9492
9493 47
        if ($encoding === 'UTF-8') {
9494 21
            if ($length >= (int) \mb_strlen($str)) {
9495 5
                return $str;
9496
            }
9497
9498
            // need to further trim the string so we can append the substring
9499 17
            $length -= (int) \mb_strlen($substring);
9500 17
            if ($length <= 0) {
9501 1
                return $substring;
9502
            }
9503
9504
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9505 17
            $truncated = \mb_substr($str, 0, $length);
9506 17
            if ($truncated === false) {
9507
                return '';
9508
            }
9509
9510
            // if the last word was truncated
9511 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9512 17
            if ($space_position !== $length) {
9513
                // find pos of the last occurrence of a space, get up to that
9514 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9515
9516
                if (
9517 13
                    $last_position !== false
9518
                    ||
9519
                    (
9520 3
                        $space_position !== false
9521
                        &&
9522 13
                         !$ignore_do_not_split_words_for_one_word
9523
                    )
9524
                ) {
9525 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9526
                }
9527
            }
9528
        } else {
9529 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9530
9531 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9532 4
                return $str;
9533
            }
9534
9535
            // need to further trim the string so we can append the substring
9536 22
            $length -= (int) self::strlen($substring, $encoding);
9537 22
            if ($length <= 0) {
9538
                return $substring;
9539
            }
9540
9541 22
            $truncated = self::substr($str, 0, $length, $encoding);
9542
9543 22
            if ($truncated === false) {
9544
                return '';
9545
            }
9546
9547
            // if the last word was truncated
9548 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9549 22
            if ($space_position !== $length) {
9550
                // find pos of the last occurrence of a space, get up to that
9551 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9552
9553
                if (
9554 12
                    $last_position !== false
9555
                    ||
9556
                    (
9557 4
                        $space_position !== false
9558
                        &&
9559 12
                        !$ignore_do_not_split_words_for_one_word
9560
                    )
9561
                ) {
9562 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9563
                }
9564
            }
9565
        }
9566
9567 39
        return $truncated . $substring;
9568
    }
9569
9570
    /**
9571
     * Returns a lowercase and trimmed string separated by underscores.
9572
     * Underscores are inserted before uppercase characters (with the exception
9573
     * of the first character of the string), and in place of spaces as well as
9574
     * dashes.
9575
     *
9576
     * @param string $str
9577
     *
9578
     * @psalm-pure
9579
     *
9580
     * @return string
9581
     *                <p>The underscored string.</p>
9582
     */
9583 16
    public static function str_underscored(string $str): string
9584
    {
9585 16
        return self::str_delimit($str, '_');
9586
    }
9587
9588
    /**
9589
     * Returns an UpperCamelCase version of the supplied string. It trims
9590
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9591
     * and underscores, and removes spaces, dashes, underscores.
9592
     *
9593
     * @param string      $str                           <p>The input string.</p>
9594
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9595
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9596
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9597
     *                                                   tr</p>
9598
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9599
     *                                                   -> ß</p>
9600
     *
9601
     * @psalm-pure
9602
     *
9603
     * @return string
9604
     *                <p>A string in UpperCamelCase.</p>
9605
     */
9606 13
    public static function str_upper_camelize(
9607
        string $str,
9608
        string $encoding = 'UTF-8',
9609
        bool $clean_utf8 = false,
9610
        string $lang = null,
9611
        bool $try_to_keep_the_string_length = false
9612
    ): string {
9613 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9614
    }
9615
9616
    /**
9617
     * alias for "UTF8::ucfirst()"
9618
     *
9619
     * @param string      $str
9620
     * @param string      $encoding
9621
     * @param bool        $clean_utf8
9622
     * @param string|null $lang
9623
     * @param bool        $try_to_keep_the_string_length
9624
     *
9625
     * @psalm-pure
9626
     *
9627
     * @return string
9628
     *
9629
     * @see        UTF8::ucfirst()
9630
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9631
     */
9632 5
    public static function str_upper_first(
9633
        string $str,
9634
        string $encoding = 'UTF-8',
9635
        bool $clean_utf8 = false,
9636
        string $lang = null,
9637
        bool $try_to_keep_the_string_length = false
9638
    ): string {
9639 5
        return self::ucfirst(
9640 5
            $str,
9641 5
            $encoding,
9642 5
            $clean_utf8,
9643 5
            $lang,
9644 5
            $try_to_keep_the_string_length
9645
        );
9646
    }
9647
9648
    /**
9649
     * Get the number of words in a specific string.
9650
     *
9651
     * EXAMPLES: <code>
9652
     * // format: 0 -> return only word count (int)
9653
     * //
9654
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9655
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9656
     *
9657
     * // format: 1 -> return words (array)
9658
     * //
9659
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9660
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9661
     *
9662
     * // format: 2 -> return words with offset (array)
9663
     * //
9664
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9665
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9666
     * </code>
9667
     *
9668
     * @param string $str       <p>The input string.</p>
9669
     * @param int    $format    [optional] <p>
9670
     *                          <strong>0</strong> => return a number of words (default)<br>
9671
     *                          <strong>1</strong> => return an array of words<br>
9672
     *                          <strong>2</strong> => return an array of words with word-offset as key
9673
     *                          </p>
9674
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9675
     *
9676
     * @psalm-pure
9677
     *
9678
     * @return int|string[]
9679
     *                      <p>The number of words in the string.</p>
9680
     */
9681 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9682
    {
9683 2
        $str_parts = self::str_to_words($str, $char_list);
9684
9685 2
        $len = \count($str_parts);
9686
9687 2
        if ($format === 1) {
9688 2
            $number_of_words = [];
9689 2
            for ($i = 1; $i < $len; $i += 2) {
9690 2
                $number_of_words[] = $str_parts[$i];
9691
            }
9692 2
        } elseif ($format === 2) {
9693 2
            $number_of_words = [];
9694 2
            $offset = (int) self::strlen($str_parts[0]);
9695 2
            for ($i = 1; $i < $len; $i += 2) {
9696 2
                $number_of_words[$offset] = $str_parts[$i];
9697 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9698
            }
9699
        } else {
9700 2
            $number_of_words = (int) (($len - 1) / 2);
9701
        }
9702
9703 2
        return $number_of_words;
9704
    }
9705
9706
    /**
9707
     * Case-insensitive string comparison.
9708
     *
9709
     * INFO: Case-insensitive version of UTF8::strcmp()
9710
     *
9711
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9712
     *
9713
     * @param string $str1     <p>The first string.</p>
9714
     * @param string $str2     <p>The second string.</p>
9715
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9716
     *
9717
     * @psalm-pure
9718
     *
9719
     * @return int
9720
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9721
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9722
     *             <strong>0</strong> if they are equal
9723
     */
9724 23
    public static function strcasecmp(
9725
        string $str1,
9726
        string $str2,
9727
        string $encoding = 'UTF-8'
9728
    ): int {
9729 23
        return self::strcmp(
9730 23
            self::strtocasefold(
9731 23
                $str1,
9732 23
                true,
9733 23
                false,
9734 23
                $encoding,
9735 23
                null,
9736 23
                false
9737
            ),
9738 23
            self::strtocasefold(
9739 23
                $str2,
9740 23
                true,
9741 23
                false,
9742 23
                $encoding,
9743 23
                null,
9744 23
                false
9745
            )
9746
        );
9747
    }
9748
9749
    /**
9750
     * alias for "UTF8::strstr()"
9751
     *
9752
     * @param string $haystack
9753
     * @param string $needle
9754
     * @param bool   $before_needle
9755
     * @param string $encoding
9756
     * @param bool   $clean_utf8
9757
     *
9758
     * @psalm-pure
9759
     *
9760
     * @return false|string
9761
     *
9762
     * @see        UTF8::strstr()
9763
     * @deprecated <p>please use "UTF8::strstr()"</p>
9764
     */
9765 2
    public static function strchr(
9766
        string $haystack,
9767
        string $needle,
9768
        bool $before_needle = false,
9769
        string $encoding = 'UTF-8',
9770
        bool $clean_utf8 = false
9771
    ) {
9772 2
        return self::strstr(
9773 2
            $haystack,
9774 2
            $needle,
9775 2
            $before_needle,
9776 2
            $encoding,
9777 2
            $clean_utf8
9778
        );
9779
    }
9780
9781
    /**
9782
     * Case-sensitive string comparison.
9783
     *
9784
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9785
     *
9786
     * @param string $str1 <p>The first string.</p>
9787
     * @param string $str2 <p>The second string.</p>
9788
     *
9789
     * @psalm-pure
9790
     *
9791
     * @return int
9792
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9793
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9794
     *             <strong>0</strong> if they are equal
9795
     */
9796 29
    public static function strcmp(string $str1, string $str2): int
9797
    {
9798 29
        if ($str1 === $str2) {
9799 21
            return 0;
9800
        }
9801
9802 24
        return \strcmp(
9803 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9804 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9805
        );
9806
    }
9807
9808
    /**
9809
     * Find length of initial segment not matching mask.
9810
     *
9811
     * @param string   $str
9812
     * @param string   $char_list
9813
     * @param int      $offset
9814
     * @param int|null $length
9815
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9816
     *
9817
     * @psalm-pure
9818
     *
9819
     * @return int
9820
     */
9821 12
    public static function strcspn(
9822
        string $str,
9823
        string $char_list,
9824
        int $offset = 0,
9825
        int $length = null,
9826
        string $encoding = 'UTF-8'
9827
    ): int {
9828 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9829
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9830
        }
9831
9832 12
        if ($char_list === '') {
9833 2
            return (int) self::strlen($str, $encoding);
9834
        }
9835
9836 11
        if ($offset || $length !== null) {
9837 3
            if ($encoding === 'UTF-8') {
9838 3
                if ($length === null) {
9839 2
                    $str_tmp = \mb_substr($str, $offset);
9840
                } else {
9841 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9842
                }
9843
            } else {
9844
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9845
            }
9846
9847 3
            if ($str_tmp === false) {
9848
                return 0;
9849
            }
9850
9851
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9852 3
            $str = $str_tmp;
9853
        }
9854
9855 11
        if ($str === '') {
9856 2
            return 0;
9857
        }
9858
9859 10
        $matches = [];
9860 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9861 9
            $return = self::strlen($matches[1], $encoding);
9862 9
            if ($return === false) {
9863
                return 0;
9864
            }
9865
9866 9
            return $return;
9867
        }
9868
9869 2
        return (int) self::strlen($str, $encoding);
9870
    }
9871
9872
    /**
9873
     * alias for "UTF8::stristr()"
9874
     *
9875
     * @param string $haystack
9876
     * @param string $needle
9877
     * @param bool   $before_needle
9878
     * @param string $encoding
9879
     * @param bool   $clean_utf8
9880
     *
9881
     * @psalm-pure
9882
     *
9883
     * @return false|string
9884
     *
9885
     * @see        UTF8::stristr()
9886
     * @deprecated <p>please use "UTF8::stristr()"</p>
9887
     */
9888 1
    public static function strichr(
9889
        string $haystack,
9890
        string $needle,
9891
        bool $before_needle = false,
9892
        string $encoding = 'UTF-8',
9893
        bool $clean_utf8 = false
9894
    ) {
9895 1
        return self::stristr(
9896 1
            $haystack,
9897 1
            $needle,
9898 1
            $before_needle,
9899 1
            $encoding,
9900 1
            $clean_utf8
9901
        );
9902
    }
9903
9904
    /**
9905
     * Create a UTF-8 string from code points.
9906
     *
9907
     * INFO: opposite to UTF8::codepoints()
9908
     *
9909
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9910
     *
9911
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9912
     *
9913
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9914
     *
9915
     * @psalm-pure
9916
     *
9917
     * @return string
9918
     *                <p>A UTF-8 encoded string.</p>
9919
     */
9920 4
    public static function string($intOrHex): string
9921
    {
9922 4
        if ($intOrHex === []) {
9923 4
            return '';
9924
        }
9925
9926 4
        if (!\is_array($intOrHex)) {
9927 1
            $intOrHex = [$intOrHex];
9928
        }
9929
9930 4
        $str = '';
9931 4
        foreach ($intOrHex as $strPart) {
9932 4
            $str .= '&#' . (int) $strPart . ';';
9933
        }
9934
9935 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9936
    }
9937
9938
    /**
9939
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9940
     *
9941
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9942
     *
9943
     * @param string $str <p>The input string.</p>
9944
     *
9945
     * @psalm-pure
9946
     *
9947
     * @return bool
9948
     *              <p>
9949
     *              <strong>true</strong> if the string has BOM at the start,<br>
9950
     *              <strong>false</strong> otherwise
9951
     *              </p>
9952
     */
9953 6
    public static function string_has_bom(string $str): bool
9954
    {
9955
        /** @noinspection PhpUnusedLocalVariableInspection */
9956 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9957 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9958 6
                return true;
9959
            }
9960
        }
9961
9962 6
        return false;
9963
    }
9964
9965
    /**
9966
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9967
     *
9968
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9969
     *
9970
     * @see http://php.net/manual/en/function.strip-tags.php
9971
     *
9972
     * @param string      $str            <p>
9973
     *                                    The input string.
9974
     *                                    </p>
9975
     * @param string|null $allowable_tags [optional] <p>
9976
     *                                    You can use the optional second parameter to specify tags which should
9977
     *                                    not be stripped.
9978
     *                                    </p>
9979
     *                                    <p>
9980
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9981
     *                                    can not be changed with allowable_tags.
9982
     *                                    </p>
9983
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9984
     *
9985
     * @psalm-pure
9986
     *
9987
     * @return string
9988
     *                <p>The stripped string.</p>
9989
     */
9990 4
    public static function strip_tags(
9991
        string $str,
9992
        string $allowable_tags = null,
9993
        bool $clean_utf8 = false
9994
    ): string {
9995 4
        if ($str === '') {
9996 1
            return '';
9997
        }
9998
9999 4
        if ($clean_utf8) {
10000 2
            $str = self::clean($str);
10001
        }
10002
10003 4
        if ($allowable_tags === null) {
10004 4
            return \strip_tags($str);
10005
        }
10006
10007 2
        return \strip_tags($str, $allowable_tags);
10008
    }
10009
10010
    /**
10011
     * Strip all whitespace characters. This includes tabs and newline
10012
     * characters, as well as multibyte whitespace such as the thin space
10013
     * and ideographic space.
10014
     *
10015
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10016
     *
10017
     * @param string $str
10018
     *
10019
     * @psalm-pure
10020
     *
10021
     * @return string
10022
     */
10023 36
    public static function strip_whitespace(string $str): string
10024
    {
10025 36
        if ($str === '') {
10026 3
            return '';
10027
        }
10028
10029 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10030
    }
10031
10032
    /**
10033
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10034
     *
10035
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10036
     *
10037
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10038
     *
10039
     * @see http://php.net/manual/en/function.mb-stripos.php
10040
     *
10041
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10042
     * @param string $needle     <p>The string to find in haystack.</p>
10043
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10044
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10045
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10046
     *
10047
     * @psalm-pure
10048
     *
10049
     * @return false|int
10050
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10051
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10052
     */
10053 25
    public static function stripos(
10054
        string $haystack,
10055
        string $needle,
10056
        int $offset = 0,
10057
        string $encoding = 'UTF-8',
10058
        bool $clean_utf8 = false
10059
    ) {
10060 25
        if ($haystack === '') {
10061 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10062
                return 0;
10063
            }
10064
10065 5
            return false;
10066
        }
10067
10068 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10069 2
            return false;
10070
        }
10071
10072 24
        if ($clean_utf8) {
10073
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10074
            // if invalid characters are found in $haystack before $needle
10075 1
            $haystack = self::clean($haystack);
10076 1
            $needle = self::clean($needle);
10077
        }
10078
10079 24
        if (self::$SUPPORT['mbstring'] === true) {
10080 24
            if ($encoding === 'UTF-8') {
10081 24
                return \mb_stripos($haystack, $needle, $offset);
10082
            }
10083
10084 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10085
10086 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10087
        }
10088
10089 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10090
10091
        if (
10092 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10093
            &&
10094 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10095
            &&
10096 2
            self::$SUPPORT['intl'] === true
10097
        ) {
10098
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10099
            if ($return_tmp !== false) {
10100
                return $return_tmp;
10101
            }
10102
        }
10103
10104
        //
10105
        // fallback for ascii only
10106
        //
10107
10108 2
        if (ASCII::is_ascii($haystack . $needle)) {
10109 2
            return \stripos($haystack, $needle, $offset);
10110
        }
10111
10112
        //
10113
        // fallback via vanilla php
10114
        //
10115
10116 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10117 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10118
10119 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10120
    }
10121
10122
    /**
10123
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10124
     *
10125
     * EXAMPLE: <code>
10126
     * $str = 'iñtërnâtiônàlizætiøn';
10127
     * $search = 'NÂT';
10128
     *
10129
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10130
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10131
     * </code>
10132
     *
10133
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10134
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10135
     * @param bool   $before_needle [optional] <p>
10136
     *                              If <b>TRUE</b>, it returns the part of the
10137
     *                              haystack before the first occurrence of the needle (excluding the needle).
10138
     *                              </p>
10139
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10140
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10141
     *
10142
     * @psalm-pure
10143
     *
10144
     * @return false|string
10145
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10146
     */
10147 13
    public static function stristr(
10148
        string $haystack,
10149
        string $needle,
10150
        bool $before_needle = false,
10151
        string $encoding = 'UTF-8',
10152
        bool $clean_utf8 = false
10153
    ) {
10154 13
        if ($haystack === '') {
10155 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10156
                return '';
10157
            }
10158
10159 3
            return false;
10160
        }
10161
10162 11
        if ($clean_utf8) {
10163
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10164
            // if invalid characters are found in $haystack before $needle
10165 1
            $needle = self::clean($needle);
10166 1
            $haystack = self::clean($haystack);
10167
        }
10168
10169 11
        if ($needle === '') {
10170 2
            if (\PHP_VERSION_ID >= 80000) {
10171
                return $haystack;
10172
            }
10173
10174 2
            return false;
10175
        }
10176
10177 10
        if (self::$SUPPORT['mbstring'] === true) {
10178 10
            if ($encoding === 'UTF-8') {
10179 10
                return \mb_stristr($haystack, $needle, $before_needle);
10180
            }
10181
10182 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10183
10184 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10185
        }
10186
10187
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10188
10189
        if (
10190
            $encoding !== 'UTF-8'
10191
            &&
10192
            self::$SUPPORT['mbstring'] === false
10193
        ) {
10194
            /**
10195
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10196
             */
10197
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10198
        }
10199
10200
        if (
10201
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10202
            &&
10203
            self::$SUPPORT['intl'] === true
10204
        ) {
10205
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10206
            if ($return_tmp !== false) {
10207
                return $return_tmp;
10208
            }
10209
        }
10210
10211
        if (ASCII::is_ascii($needle . $haystack)) {
10212
            return \stristr($haystack, $needle, $before_needle);
10213
        }
10214
10215
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10216
10217
        if (!isset($match[1])) {
10218
            return false;
10219
        }
10220
10221
        if ($before_needle) {
10222
            return $match[1];
10223
        }
10224
10225
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10226
    }
10227
10228
    /**
10229
     * Get the string length, not the byte-length!
10230
     *
10231
     * INFO: use UTF8::strwidth() for the char-length
10232
     *
10233
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10234
     *
10235
     * @see http://php.net/manual/en/function.mb-strlen.php
10236
     *
10237
     * @param string $str        <p>The string being checked for length.</p>
10238
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10239
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10240
     *
10241
     * @psalm-pure
10242
     *
10243
     * @return false|int
10244
     *                   <p>
10245
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10246
     *                   $encoding.
10247
     *                   (One multi-byte character counted as +1).
10248
     *                   <br>
10249
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10250
     *                   chars.
10251
     *                   </p>
10252
     */
10253 174
    public static function strlen(
10254
        string $str,
10255
        string $encoding = 'UTF-8',
10256
        bool $clean_utf8 = false
10257
    ) {
10258 174
        if ($str === '') {
10259 21
            return 0;
10260
        }
10261
10262 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10263 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10264
        }
10265
10266 172
        if ($clean_utf8) {
10267
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10268
            // if invalid characters are found in $str
10269 5
            $str = self::clean($str);
10270
        }
10271
10272
        //
10273
        // fallback via mbstring
10274
        //
10275
10276 172
        if (self::$SUPPORT['mbstring'] === true) {
10277 166
            if ($encoding === 'UTF-8') {
10278
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10279 166
                return @\mb_strlen($str);
10280
            }
10281
10282
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10283 4
            return @\mb_strlen($str, $encoding);
10284
        }
10285
10286
        //
10287
        // fallback for binary || ascii only
10288
        //
10289
10290
        if (
10291 8
            $encoding === 'CP850'
10292
            ||
10293 8
            $encoding === 'ASCII'
10294
        ) {
10295
            return \strlen($str);
10296
        }
10297
10298
        if (
10299 8
            $encoding !== 'UTF-8'
10300
            &&
10301 8
            self::$SUPPORT['mbstring'] === false
10302
            &&
10303 8
            self::$SUPPORT['iconv'] === false
10304
        ) {
10305
            /**
10306
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10307
             */
10308 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10309
        }
10310
10311
        //
10312
        // fallback via iconv
10313
        //
10314
10315 8
        if (self::$SUPPORT['iconv'] === true) {
10316
            $return_tmp = \iconv_strlen($str, $encoding);
10317
            if ($return_tmp !== false) {
10318
                return $return_tmp;
10319
            }
10320
        }
10321
10322
        //
10323
        // fallback via intl
10324
        //
10325
10326
        if (
10327 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10328
            &&
10329 8
            self::$SUPPORT['intl'] === true
10330
        ) {
10331
            $return_tmp = \grapheme_strlen($str);
10332
            if ($return_tmp !== null) {
10333
                return $return_tmp;
10334
            }
10335
        }
10336
10337
        //
10338
        // fallback for ascii only
10339
        //
10340
10341 8
        if (ASCII::is_ascii($str)) {
10342 4
            return \strlen($str);
10343
        }
10344
10345
        //
10346
        // fallback via vanilla php
10347
        //
10348
10349 8
        \preg_match_all('/./us', $str, $parts);
10350
10351 8
        $return_tmp = \count($parts[0]);
10352 8
        if ($return_tmp === 0) {
10353
            return false;
10354
        }
10355
10356 8
        return $return_tmp;
10357
    }
10358
10359
    /**
10360
     * Get string length in byte.
10361
     *
10362
     * @param string $str
10363
     *
10364
     * @psalm-pure
10365
     *
10366
     * @return int
10367
     */
10368 1
    public static function strlen_in_byte(string $str): int
10369
    {
10370 1
        if ($str === '') {
10371
            return 0;
10372
        }
10373
10374 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10375
            // "mb_" is available if overload is used, so use it ...
10376
            return \mb_strlen($str, 'CP850'); // 8-BIT
10377
        }
10378
10379 1
        return \strlen($str);
10380
    }
10381
10382
    /**
10383
     * Case-insensitive string comparisons using a "natural order" algorithm.
10384
     *
10385
     * INFO: natural order version of UTF8::strcasecmp()
10386
     *
10387
     * EXAMPLES: <code>
10388
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10389
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10390
     *
10391
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10392
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10393
     * </code>
10394
     *
10395
     * @param string $str1     <p>The first string.</p>
10396
     * @param string $str2     <p>The second string.</p>
10397
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10398
     *
10399
     * @psalm-pure
10400
     *
10401
     * @return int
10402
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10403
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10404
     *             <strong>0</strong> if they are equal
10405
     */
10406 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10407
    {
10408 2
        return self::strnatcmp(
10409 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10410 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10411
        );
10412
    }
10413
10414
    /**
10415
     * String comparisons using a "natural order" algorithm
10416
     *
10417
     * INFO: natural order version of UTF8::strcmp()
10418
     *
10419
     * EXAMPLES: <code>
10420
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10421
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10422
     *
10423
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10424
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10425
     * </code>
10426
     *
10427
     * @see http://php.net/manual/en/function.strnatcmp.php
10428
     *
10429
     * @param string $str1 <p>The first string.</p>
10430
     * @param string $str2 <p>The second string.</p>
10431
     *
10432
     * @psalm-pure
10433
     *
10434
     * @return int
10435
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10436
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10437
     *             <strong>0</strong> if they are equal
10438
     */
10439 4
    public static function strnatcmp(string $str1, string $str2): int
10440
    {
10441 4
        if ($str1 === $str2) {
10442 4
            return 0;
10443
        }
10444
10445 4
        return \strnatcmp(
10446 4
            (string) self::strtonatfold($str1),
10447 4
            (string) self::strtonatfold($str2)
10448
        );
10449
    }
10450
10451
    /**
10452
     * Case-insensitive string comparison of the first n characters.
10453
     *
10454
     * EXAMPLE: <code>
10455
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10456
     * </code>
10457
     *
10458
     * @see http://php.net/manual/en/function.strncasecmp.php
10459
     *
10460
     * @param string $str1     <p>The first string.</p>
10461
     * @param string $str2     <p>The second string.</p>
10462
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10463
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10464
     *
10465
     * @psalm-pure
10466
     *
10467
     * @return int
10468
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10469
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10470
     *             <strong>0</strong> if they are equal
10471
     */
10472 2
    public static function strncasecmp(
10473
        string $str1,
10474
        string $str2,
10475
        int $len,
10476
        string $encoding = 'UTF-8'
10477
    ): int {
10478 2
        return self::strncmp(
10479 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10480 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10481 2
            $len
10482
        );
10483
    }
10484
10485
    /**
10486
     * String comparison of the first n characters.
10487
     *
10488
     * EXAMPLE: <code>
10489
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10490
     * </code>
10491
     *
10492
     * @see http://php.net/manual/en/function.strncmp.php
10493
     *
10494
     * @param string $str1     <p>The first string.</p>
10495
     * @param string $str2     <p>The second string.</p>
10496
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10497
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10498
     *
10499
     * @psalm-pure
10500
     *
10501
     * @return int
10502
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10503
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10504
     *             <strong>0</strong> if they are equal
10505
     */
10506 4
    public static function strncmp(
10507
        string $str1,
10508
        string $str2,
10509
        int $len,
10510
        string $encoding = 'UTF-8'
10511
    ): int {
10512 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10513
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10514
        }
10515
10516 4
        if ($encoding === 'UTF-8') {
10517 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10518 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10519
        } else {
10520
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10521
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10522
        }
10523
10524 4
        return self::strcmp($str1, $str2);
10525
    }
10526
10527
    /**
10528
     * Search a string for any of a set of characters.
10529
     *
10530
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10531
     *
10532
     * @see http://php.net/manual/en/function.strpbrk.php
10533
     *
10534
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10535
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10536
     *
10537
     * @psalm-pure
10538
     *
10539
     * @return false|string
10540
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10541
     */
10542 2
    public static function strpbrk(string $haystack, string $char_list)
10543
    {
10544 2
        if ($haystack === '' || $char_list === '') {
10545 2
            return false;
10546
        }
10547
10548 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10549 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10550
        }
10551
10552 2
        return false;
10553
    }
10554
10555
    /**
10556
     * Find the position of the first occurrence of a substring in a string.
10557
     *
10558
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10559
     *
10560
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10561
     *
10562
     * @see http://php.net/manual/en/function.mb-strpos.php
10563
     *
10564
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10565
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10566
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10567
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10568
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10569
     *
10570
     * @psalm-pure
10571
     *
10572
     * @return false|int
10573
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10574
     *                   string.<br> If needle is not found it returns false.
10575
     */
10576 52
    public static function strpos(
10577
        string $haystack,
10578
        $needle,
10579
        int $offset = 0,
10580
        string $encoding = 'UTF-8',
10581
        bool $clean_utf8 = false
10582
    ) {
10583 52
        if ($haystack === '') {
10584 4
            if (\PHP_VERSION_ID >= 80000) {
10585
                if ($needle === '') {
10586
                    return 0;
10587
                }
10588
            } else {
10589 4
                return false;
10590
            }
10591
        }
10592
10593
        // iconv and mbstring do not support integer $needle
10594 51
        if ((int) $needle === $needle) {
10595
            $needle = (string) self::chr($needle);
10596
        }
10597 51
        $needle = (string) $needle;
10598
10599 51
        if ($haystack === '') {
10600
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10601
                return 0;
10602
            }
10603
10604
            return false;
10605
        }
10606
10607 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10608 2
            return false;
10609
        }
10610
10611 51
        if ($clean_utf8) {
10612
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10613
            // if invalid characters are found in $haystack before $needle
10614 3
            $needle = self::clean($needle);
10615 3
            $haystack = self::clean($haystack);
10616
        }
10617
10618 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10619 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10620
        }
10621
10622
        //
10623
        // fallback via mbstring
10624
        //
10625
10626 51
        if (self::$SUPPORT['mbstring'] === true) {
10627 49
            if ($encoding === 'UTF-8') {
10628
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10629 49
                return @\mb_strpos($haystack, $needle, $offset);
10630
            }
10631
10632
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10633 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10634
        }
10635
10636
        //
10637
        // fallback for binary || ascii only
10638
        //
10639
        if (
10640 4
            $encoding === 'CP850'
10641
            ||
10642 4
            $encoding === 'ASCII'
10643
        ) {
10644 2
            return \strpos($haystack, $needle, $offset);
10645
        }
10646
10647
        if (
10648 4
            $encoding !== 'UTF-8'
10649
            &&
10650 4
            self::$SUPPORT['iconv'] === false
10651
            &&
10652 4
            self::$SUPPORT['mbstring'] === false
10653
        ) {
10654
            /**
10655
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10656
             */
10657 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10658
        }
10659
10660
        //
10661
        // fallback via intl
10662
        //
10663
10664
        if (
10665 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10666
            &&
10667 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10668
            &&
10669 4
            self::$SUPPORT['intl'] === true
10670
        ) {
10671
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10672
            if ($return_tmp !== false) {
10673
                return $return_tmp;
10674
            }
10675
        }
10676
10677
        //
10678
        // fallback via iconv
10679
        //
10680
10681
        if (
10682 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10683
            &&
10684 4
            self::$SUPPORT['iconv'] === true
10685
        ) {
10686
            // ignore invalid negative offset to keep compatibility
10687
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10688
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10689
            if ($return_tmp !== false) {
10690
                return $return_tmp;
10691
            }
10692
        }
10693
10694
        //
10695
        // fallback for ascii only
10696
        //
10697
10698 4
        if (ASCII::is_ascii($haystack . $needle)) {
10699
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10700 2
            return @\strpos($haystack, $needle, $offset);
10701
        }
10702
10703
        //
10704
        // fallback via vanilla php
10705
        //
10706
10707 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10708 4
        if ($haystack_tmp === false) {
10709
            $haystack_tmp = '';
10710
        }
10711 4
        $haystack = (string) $haystack_tmp;
10712
10713 4
        if ($offset < 0) {
10714
            $offset = 0;
10715
        }
10716
10717 4
        $pos = \strpos($haystack, $needle);
10718 4
        if ($pos === false) {
10719 3
            return false;
10720
        }
10721
10722 4
        if ($pos) {
10723 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10724
        }
10725
10726 2
        return $offset + 0;
10727
    }
10728
10729
    /**
10730
     * Find the position of the first occurrence of a substring in a string.
10731
     *
10732
     * @param string $haystack <p>
10733
     *                         The string being checked.
10734
     *                         </p>
10735
     * @param string $needle   <p>
10736
     *                         The position counted from the beginning of haystack.
10737
     *                         </p>
10738
     * @param int    $offset   [optional] <p>
10739
     *                         The search offset. If it is not specified, 0 is used.
10740
     *                         </p>
10741
     *
10742
     * @psalm-pure
10743
     *
10744
     * @return false|int
10745
     *                   <p>The numeric position of the first occurrence of needle in the
10746
     *                   haystack string. If needle is not found, it returns false.</p>
10747
     */
10748 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10749
    {
10750 2
        if ($haystack === '' || $needle === '') {
10751
            return false;
10752
        }
10753
10754 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10755
            // "mb_" is available if overload is used, so use it ...
10756
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10757
        }
10758
10759 2
        return \strpos($haystack, $needle, $offset);
10760
    }
10761
10762
    /**
10763
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10764
     *
10765
     * @param string $haystack <p>
10766
     *                         The string being checked.
10767
     *                         </p>
10768
     * @param string $needle   <p>
10769
     *                         The position counted from the beginning of haystack.
10770
     *                         </p>
10771
     * @param int    $offset   [optional] <p>
10772
     *                         The search offset. If it is not specified, 0 is used.
10773
     *                         </p>
10774
     *
10775
     * @psalm-pure
10776
     *
10777
     * @return false|int
10778
     *                   <p>The numeric position of the first occurrence of needle in the
10779
     *                   haystack string. If needle is not found, it returns false.</p>
10780
     */
10781 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10782
    {
10783 2
        if ($haystack === '' || $needle === '') {
10784
            return false;
10785
        }
10786
10787 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10788
            // "mb_" is available if overload is used, so use it ...
10789
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10790
        }
10791
10792 2
        return \stripos($haystack, $needle, $offset);
10793
    }
10794
10795
    /**
10796
     * Find the last occurrence of a character in a string within another.
10797
     *
10798
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10799
     *
10800
     * @see http://php.net/manual/en/function.mb-strrchr.php
10801
     *
10802
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10803
     * @param string $needle        <p>The string to find in haystack</p>
10804
     * @param bool   $before_needle [optional] <p>
10805
     *                              Determines which portion of haystack
10806
     *                              this function returns.
10807
     *                              If set to true, it returns all of haystack
10808
     *                              from the beginning to the last occurrence of needle.
10809
     *                              If set to false, it returns all of haystack
10810
     *                              from the last occurrence of needle to the end,
10811
     *                              </p>
10812
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10813
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10814
     *
10815
     * @psalm-pure
10816
     *
10817
     * @return false|string
10818
     *                      <p>The portion of haystack or false if needle is not found.</p>
10819
     */
10820 2
    public static function strrchr(
10821
        string $haystack,
10822
        string $needle,
10823
        bool $before_needle = false,
10824
        string $encoding = 'UTF-8',
10825
        bool $clean_utf8 = false
10826
    ) {
10827 2
        if ($haystack === '' || $needle === '') {
10828 2
            return false;
10829
        }
10830
10831 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10832 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10833
        }
10834
10835 2
        if ($clean_utf8) {
10836
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10837
            // if invalid characters are found in $haystack before $needle
10838 2
            $needle = self::clean($needle);
10839 2
            $haystack = self::clean($haystack);
10840
        }
10841
10842
        //
10843
        // fallback via mbstring
10844
        //
10845
10846 2
        if (self::$SUPPORT['mbstring'] === true) {
10847 2
            if ($encoding === 'UTF-8') {
10848 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10849
            }
10850
10851 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10852
        }
10853
10854
        //
10855
        // fallback for binary || ascii only
10856
        //
10857
10858
        if (
10859
            !$before_needle
10860
            &&
10861
            (
10862
                $encoding === 'CP850'
10863
                ||
10864
                $encoding === 'ASCII'
10865
            )
10866
        ) {
10867
            return \strrchr($haystack, $needle);
10868
        }
10869
10870
        if (
10871
            $encoding !== 'UTF-8'
10872
            &&
10873
            self::$SUPPORT['mbstring'] === false
10874
        ) {
10875
            /**
10876
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10877
             */
10878
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10879
        }
10880
10881
        //
10882
        // fallback via iconv
10883
        //
10884
10885
        if (self::$SUPPORT['iconv'] === true) {
10886
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10887
            if ($needle_tmp === false) {
10888
                return false;
10889
            }
10890
            $needle = (string) $needle_tmp;
10891
10892
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10893
            if ($pos === false) {
10894
                return false;
10895
            }
10896
10897
            if ($before_needle) {
10898
                return self::substr($haystack, 0, $pos, $encoding);
10899
            }
10900
10901
            return self::substr($haystack, $pos, null, $encoding);
10902
        }
10903
10904
        //
10905
        // fallback via vanilla php
10906
        //
10907
10908
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10909
        if ($needle_tmp === false) {
10910
            return false;
10911
        }
10912
        $needle = (string) $needle_tmp;
10913
10914
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10915
        if ($pos === false) {
10916
            return false;
10917
        }
10918
10919
        if ($before_needle) {
10920
            return self::substr($haystack, 0, $pos, $encoding);
10921
        }
10922
10923
        return self::substr($haystack, $pos, null, $encoding);
10924
    }
10925
10926
    /**
10927
     * Reverses characters order in the string.
10928
     *
10929
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10930
     *
10931
     * @param string $str      <p>The input string.</p>
10932
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10933
     *
10934
     * @psalm-pure
10935
     *
10936
     * @return string
10937
     *                <p>The string with characters in the reverse sequence.</p>
10938
     */
10939 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10940
    {
10941 10
        if ($str === '') {
10942 4
            return '';
10943
        }
10944
10945
        // init
10946 8
        $reversed = '';
10947
10948 8
        $str = self::emoji_encode($str, true);
10949
10950 8
        if ($encoding === 'UTF-8') {
10951 8
            if (self::$SUPPORT['intl'] === true) {
10952
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10953 8
                $i = (int) \grapheme_strlen($str);
10954 8
                while ($i--) {
10955 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10956 8
                    if ($reversed_tmp !== false) {
10957 8
                        $reversed .= $reversed_tmp;
10958
                    }
10959
                }
10960
            } else {
10961
                $i = (int) \mb_strlen($str);
10962 8
                while ($i--) {
10963
                    $reversed_tmp = \mb_substr($str, $i, 1);
10964
                    if ($reversed_tmp !== false) {
10965
                        $reversed .= $reversed_tmp;
10966
                    }
10967
                }
10968
            }
10969
        } else {
10970
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10971
10972
            $i = (int) self::strlen($str, $encoding);
10973
            while ($i--) {
10974
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10975
                if ($reversed_tmp !== false) {
10976
                    $reversed .= $reversed_tmp;
10977
                }
10978
            }
10979
        }
10980
10981 8
        return self::emoji_decode($reversed, true);
10982
    }
10983
10984
    /**
10985
     * Find the last occurrence of a character in a string within another, case-insensitive.
10986
     *
10987
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10988
     *
10989
     * @see http://php.net/manual/en/function.mb-strrichr.php
10990
     *
10991
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10992
     * @param string $needle        <p>The string to find in haystack.</p>
10993
     * @param bool   $before_needle [optional] <p>
10994
     *                              Determines which portion of haystack
10995
     *                              this function returns.
10996
     *                              If set to true, it returns all of haystack
10997
     *                              from the beginning to the last occurrence of needle.
10998
     *                              If set to false, it returns all of haystack
10999
     *                              from the last occurrence of needle to the end,
11000
     *                              </p>
11001
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11002
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11003
     *
11004
     * @psalm-pure
11005
     *
11006
     * @return false|string
11007
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
11008
     */
11009 3
    public static function strrichr(
11010
        string $haystack,
11011
        string $needle,
11012
        bool $before_needle = false,
11013
        string $encoding = 'UTF-8',
11014
        bool $clean_utf8 = false
11015
    ) {
11016 3
        if ($haystack === '' || $needle === '') {
11017 2
            return false;
11018
        }
11019
11020 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11021 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11022
        }
11023
11024 3
        if ($clean_utf8) {
11025
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11026
            // if invalid characters are found in $haystack before $needle
11027 2
            $needle = self::clean($needle);
11028 2
            $haystack = self::clean($haystack);
11029
        }
11030
11031
        //
11032
        // fallback via mbstring
11033
        //
11034
11035 3
        if (self::$SUPPORT['mbstring'] === true) {
11036 3
            if ($encoding === 'UTF-8') {
11037 3
                return \mb_strrichr($haystack, $needle, $before_needle);
11038
            }
11039
11040 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11041
        }
11042
11043
        //
11044
        // fallback via vanilla php
11045
        //
11046
11047
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11048
        if ($needle_tmp === false) {
11049
            return false;
11050
        }
11051
        $needle = (string) $needle_tmp;
11052
11053
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11054
        if ($pos === false) {
11055
            return false;
11056
        }
11057
11058
        if ($before_needle) {
11059
            return self::substr($haystack, 0, $pos, $encoding);
11060
        }
11061
11062
        return self::substr($haystack, $pos, null, $encoding);
11063
    }
11064
11065
    /**
11066
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11067
     *
11068
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11069
     *
11070
     * @param string     $haystack   <p>The string to look in.</p>
11071
     * @param int|string $needle     <p>The string to look for.</p>
11072
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11073
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11074
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11075
     *
11076
     * @psalm-pure
11077
     *
11078
     * @return false|int
11079
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11080
     *                   string.<br>If needle is not found, it returns false.</p>
11081
     */
11082 14
    public static function strripos(
11083
        string $haystack,
11084
        $needle,
11085
        int $offset = 0,
11086
        string $encoding = 'UTF-8',
11087
        bool $clean_utf8 = false
11088
    ) {
11089 14
        if ($haystack === '') {
11090 3
            if (\PHP_VERSION_ID >= 80000) {
11091
                if ($needle === '') {
11092
                    return 0;
11093
                }
11094
            } else {
11095 3
                return false;
11096
            }
11097
        }
11098
11099
        // iconv and mbstring do not support integer $needle
11100 14
        if ((int) $needle === $needle && $needle >= 0) {
11101
            $needle = (string) self::chr($needle);
11102
        }
11103 14
        $needle = (string) $needle;
11104
11105 14
        if ($haystack === '') {
11106
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11107
                return 0;
11108
            }
11109
11110
            return false;
11111
        }
11112
11113 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11114 3
            return false;
11115
        }
11116
11117 14
        if ($clean_utf8) {
11118
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11119 3
            $needle = self::clean($needle);
11120 3
            $haystack = self::clean($haystack);
11121
        }
11122
11123 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11124 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11125
        }
11126
11127
        //
11128
        // fallback via mbstrig
11129
        //
11130
11131 14
        if (self::$SUPPORT['mbstring'] === true) {
11132 14
            if ($encoding === 'UTF-8') {
11133 14
                return \mb_strripos($haystack, $needle, $offset);
11134
            }
11135
11136
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11137
        }
11138
11139
        //
11140
        // fallback for binary || ascii only
11141
        //
11142
11143
        if (
11144
            $encoding === 'CP850'
11145
            ||
11146
            $encoding === 'ASCII'
11147
        ) {
11148
            return \strripos($haystack, $needle, $offset);
11149
        }
11150
11151
        if (
11152
            $encoding !== 'UTF-8'
11153
            &&
11154
            self::$SUPPORT['mbstring'] === false
11155
        ) {
11156
            /**
11157
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11158
             */
11159
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11160
        }
11161
11162
        //
11163
        // fallback via intl
11164
        //
11165
11166
        if (
11167
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11168
            &&
11169
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11170
            &&
11171
            self::$SUPPORT['intl'] === true
11172
        ) {
11173
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11174
            if ($return_tmp !== false) {
11175
                return $return_tmp;
11176
            }
11177
        }
11178
11179
        //
11180
        // fallback for ascii only
11181
        //
11182
11183
        if (ASCII::is_ascii($haystack . $needle)) {
11184
            return \strripos($haystack, $needle, $offset);
11185
        }
11186
11187
        //
11188
        // fallback via vanilla php
11189
        //
11190
11191
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11192
        $needle = self::strtocasefold($needle, true, false, $encoding);
11193
11194
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11195
    }
11196
11197
    /**
11198
     * Finds position of last occurrence of a string within another, case-insensitive.
11199
     *
11200
     * @param string $haystack <p>
11201
     *                         The string from which to get the position of the last occurrence
11202
     *                         of needle.
11203
     *                         </p>
11204
     * @param string $needle   <p>
11205
     *                         The string to find in haystack.
11206
     *                         </p>
11207
     * @param int    $offset   [optional] <p>
11208
     *                         The position in haystack
11209
     *                         to start searching.
11210
     *                         </p>
11211
     *
11212
     * @psalm-pure
11213
     *
11214
     * @return false|int
11215
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11216
     *                   haystack string, or false if needle is not found.</p>
11217
     */
11218 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11219
    {
11220 2
        if ($haystack === '' || $needle === '') {
11221
            return false;
11222
        }
11223
11224 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11225
            // "mb_" is available if overload is used, so use it ...
11226
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11227
        }
11228
11229 2
        return \strripos($haystack, $needle, $offset);
11230
    }
11231
11232
    /**
11233
     * Find the position of the last occurrence of a substring in a string.
11234
     *
11235
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11236
     *
11237
     * @see http://php.net/manual/en/function.mb-strrpos.php
11238
     *
11239
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11240
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11241
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11242
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11243
     *                               the end of the string.
11244
     *                               </p>
11245
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11246
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11247
     *
11248
     * @psalm-pure
11249
     *
11250
     * @return false|int
11251
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11252
     *                   string.<br>If needle is not found, it returns false.</p>
11253
     */
11254 35
    public static function strrpos(
11255
        string $haystack,
11256
        $needle,
11257
        int $offset = 0,
11258
        string $encoding = 'UTF-8',
11259
        bool $clean_utf8 = false
11260
    ) {
11261 35
        if ($haystack === '') {
11262 4
            if (\PHP_VERSION_ID >= 80000) {
11263
                if ($needle === '') {
11264
                    return 0;
11265
                }
11266
            } else {
11267 4
                return false;
11268
            }
11269
        }
11270
11271
        // iconv and mbstring do not support integer $needle
11272 34
        if ((int) $needle === $needle && $needle >= 0) {
11273 1
            $needle = (string) self::chr($needle);
11274
        }
11275 34
        $needle = (string) $needle;
11276
11277 34
        if ($haystack === '') {
11278
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11279
                return 0;
11280
            }
11281
11282
            return false;
11283
        }
11284
11285 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11286 2
            return false;
11287
        }
11288
11289 34
        if ($clean_utf8) {
11290
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11291 4
            $needle = self::clean($needle);
11292 4
            $haystack = self::clean($haystack);
11293
        }
11294
11295 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11296 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11297
        }
11298
11299
        //
11300
        // fallback via mbstring
11301
        //
11302
11303 34
        if (self::$SUPPORT['mbstring'] === true) {
11304 34
            if ($encoding === 'UTF-8') {
11305 34
                return \mb_strrpos($haystack, $needle, $offset);
11306
            }
11307
11308 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11309
        }
11310
11311
        //
11312
        // fallback for binary || ascii only
11313
        //
11314
11315
        if (
11316
            $encoding === 'CP850'
11317
            ||
11318
            $encoding === 'ASCII'
11319
        ) {
11320
            return \strrpos($haystack, $needle, $offset);
11321
        }
11322
11323
        if (
11324
            $encoding !== 'UTF-8'
11325
            &&
11326
            self::$SUPPORT['mbstring'] === false
11327
        ) {
11328
            /**
11329
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11330
             */
11331
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11332
        }
11333
11334
        //
11335
        // fallback via intl
11336
        //
11337
11338
        if (
11339
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11340
            &&
11341
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11342
            &&
11343
            self::$SUPPORT['intl'] === true
11344
        ) {
11345
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11346
            if ($return_tmp !== false) {
11347
                return $return_tmp;
11348
            }
11349
        }
11350
11351
        //
11352
        // fallback for ascii only
11353
        //
11354
11355
        if (ASCII::is_ascii($haystack . $needle)) {
11356
            return \strrpos($haystack, $needle, $offset);
11357
        }
11358
11359
        //
11360
        // fallback via vanilla php
11361
        //
11362
11363
        $haystack_tmp = null;
11364
        if ($offset > 0) {
11365
            $haystack_tmp = self::substr($haystack, $offset);
11366
        } elseif ($offset < 0) {
11367
            $haystack_tmp = self::substr($haystack, 0, $offset);
11368
            $offset = 0;
11369
        }
11370
11371
        if ($haystack_tmp !== null) {
11372
            if ($haystack_tmp === false) {
11373
                $haystack_tmp = '';
11374
            }
11375
            $haystack = (string) $haystack_tmp;
11376
        }
11377
11378
        $pos = \strrpos($haystack, $needle);
11379
        if ($pos === false) {
11380
            return false;
11381
        }
11382
11383
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11384
        $str_tmp = \substr($haystack, 0, $pos);
11385
        if ($str_tmp === false) {
11386
            return false;
11387
        }
11388
11389
        return $offset + (int) self::strlen($str_tmp);
11390
    }
11391
11392
    /**
11393
     * Find the position of the last occurrence of a substring in a string.
11394
     *
11395
     * @param string $haystack <p>
11396
     *                         The string being checked, for the last occurrence
11397
     *                         of needle.
11398
     *                         </p>
11399
     * @param string $needle   <p>
11400
     *                         The string to find in haystack.
11401
     *                         </p>
11402
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11403
     *                         the string. Negative values will stop searching at an arbitrary point
11404
     *                         prior to the end of the string.
11405
     *                         </p>
11406
     *
11407
     * @psalm-pure
11408
     *
11409
     * @return false|int
11410
     *                   <p>The numeric position of the last occurrence of needle in the
11411
     *                   haystack string. If needle is not found, it returns false.</p>
11412
     */
11413 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11414
    {
11415 2
        if ($haystack === '' || $needle === '') {
11416
            return false;
11417
        }
11418
11419 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11420
            // "mb_" is available if overload is used, so use it ...
11421
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11422
        }
11423
11424 2
        return \strrpos($haystack, $needle, $offset);
11425
    }
11426
11427
    /**
11428
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11429
     * mask.
11430
     *
11431
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11432
     *
11433
     * @param string   $str      <p>The input string.</p>
11434
     * @param string   $mask     <p>The mask of chars</p>
11435
     * @param int      $offset   [optional]
11436
     * @param int|null $length   [optional]
11437
     * @param string   $encoding [optional] <p>Set the charset.</p>
11438
     *
11439
     * @psalm-pure
11440
     *
11441
     * @return false|int
11442
     */
11443 10
    public static function strspn(
11444
        string $str,
11445
        string $mask,
11446
        int $offset = 0,
11447
        int $length = null,
11448
        string $encoding = 'UTF-8'
11449
    ) {
11450 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11451
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11452
        }
11453
11454 10
        if ($offset || $length !== null) {
11455 2
            if ($encoding === 'UTF-8') {
11456 2
                if ($length === null) {
11457
                    $str = (string) \mb_substr($str, $offset);
11458
                } else {
11459 2
                    $str = (string) \mb_substr($str, $offset, $length);
11460
                }
11461
            } else {
11462
                $str = (string) self::substr($str, $offset, $length, $encoding);
11463
            }
11464
        }
11465
11466 10
        if ($str === '' || $mask === '') {
11467 2
            return 0;
11468
        }
11469
11470 8
        $matches = [];
11471
11472 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11473
    }
11474
11475
    /**
11476
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11477
     *
11478
     * EXAMPLE: <code>
11479
     * $str = 'iñtërnâtiônàlizætiøn';
11480
     * $search = 'nât';
11481
     *
11482
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11483
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11484
     * </code>
11485
     *
11486
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11487
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11488
     * @param bool   $before_needle [optional] <p>
11489
     *                              If <b>TRUE</b>, strstr() returns the part of the
11490
     *                              haystack before the first occurrence of the needle (excluding the needle).
11491
     *                              </p>
11492
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11493
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11494
     *
11495
     * @psalm-pure
11496
     *
11497
     * @return false|string
11498
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11499
     */
11500 3
    public static function strstr(
11501
        string $haystack,
11502
        string $needle,
11503
        bool $before_needle = false,
11504
        string $encoding = 'UTF-8',
11505
        bool $clean_utf8 = false
11506
    ) {
11507 3
        if ($haystack === '') {
11508 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11509
                return '';
11510
            }
11511
11512 2
            return false;
11513
        }
11514
11515 3
        if ($clean_utf8) {
11516
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11517
            // if invalid characters are found in $haystack before $needle
11518
            $needle = self::clean($needle);
11519
            $haystack = self::clean($haystack);
11520
        }
11521
11522 3
        if ($needle === '') {
11523 1
            if (\PHP_VERSION_ID >= 80000) {
11524
                return $haystack;
11525
            }
11526
11527 1
            return false;
11528
        }
11529
11530 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11531 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11532
        }
11533
11534
        //
11535
        // fallback via mbstring
11536
        //
11537
11538 3
        if (self::$SUPPORT['mbstring'] === true) {
11539 3
            if ($encoding === 'UTF-8') {
11540 3
                return \mb_strstr($haystack, $needle, $before_needle);
11541
            }
11542
11543 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11544
        }
11545
11546
        //
11547
        // fallback for binary || ascii only
11548
        //
11549
11550
        if (
11551
            $encoding === 'CP850'
11552
            ||
11553
            $encoding === 'ASCII'
11554
        ) {
11555
            return \strstr($haystack, $needle, $before_needle);
11556
        }
11557
11558
        if (
11559
            $encoding !== 'UTF-8'
11560
            &&
11561
            self::$SUPPORT['mbstring'] === false
11562
        ) {
11563
            /**
11564
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11565
             */
11566
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11567
        }
11568
11569
        //
11570
        // fallback via intl
11571
        //
11572
11573
        if (
11574
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11575
            &&
11576
            self::$SUPPORT['intl'] === true
11577
        ) {
11578
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11579
            if ($return_tmp !== false) {
11580
                return $return_tmp;
11581
            }
11582
        }
11583
11584
        //
11585
        // fallback for ascii only
11586
        //
11587
11588
        if (ASCII::is_ascii($haystack . $needle)) {
11589
            return \strstr($haystack, $needle, $before_needle);
11590
        }
11591
11592
        //
11593
        // fallback via vanilla php
11594
        //
11595
11596
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11597
11598
        if (!isset($match[1])) {
11599
            return false;
11600
        }
11601
11602
        if ($before_needle) {
11603
            return $match[1];
11604
        }
11605
11606
        return self::substr($haystack, (int) self::strlen($match[1]));
11607
    }
11608
11609
    /**
11610
     * Finds first occurrence of a string within another.
11611
     *
11612
     * @param string $haystack      <p>
11613
     *                              The string from which to get the first occurrence
11614
     *                              of needle.
11615
     *                              </p>
11616
     * @param string $needle        <p>
11617
     *                              The string to find in haystack.
11618
     *                              </p>
11619
     * @param bool   $before_needle [optional] <p>
11620
     *                              Determines which portion of haystack
11621
     *                              this function returns.
11622
     *                              If set to true, it returns all of haystack
11623
     *                              from the beginning to the first occurrence of needle.
11624
     *                              If set to false, it returns all of haystack
11625
     *                              from the first occurrence of needle to the end,
11626
     *                              </p>
11627
     *
11628
     * @psalm-pure
11629
     *
11630
     * @return false|string
11631
     *                      <p>The portion of haystack,
11632
     *                      or false if needle is not found.</p>
11633
     */
11634 2
    public static function strstr_in_byte(
11635
        string $haystack,
11636
        string $needle,
11637
        bool $before_needle = false
11638
    ) {
11639 2
        if ($haystack === '' || $needle === '') {
11640
            return false;
11641
        }
11642
11643 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11644
            // "mb_" is available if overload is used, so use it ...
11645
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11646
        }
11647
11648 2
        return \strstr($haystack, $needle, $before_needle);
11649
    }
11650
11651
    /**
11652
     * Unicode transformation for case-less matching.
11653
     *
11654
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11655
     *
11656
     * @see http://unicode.org/reports/tr21/tr21-5.html
11657
     *
11658
     * @param string      $str        <p>The input string.</p>
11659
     * @param bool        $full       [optional] <p>
11660
     *                                <b>true</b>, replace full case folding chars (default)<br>
11661
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11662
     *                                </p>
11663
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11664
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11665
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11666
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11667
     *                                is for some languages better ...</p>
11668
     *
11669
     * @psalm-pure
11670
     *
11671
     * @return string
11672
     */
11673 32
    public static function strtocasefold(
11674
        string $str,
11675
        bool $full = true,
11676
        bool $clean_utf8 = false,
11677
        string $encoding = 'UTF-8',
11678
        string $lang = null,
11679
        bool $lower = true
11680
    ): string {
11681 32
        if ($str === '') {
11682 5
            return '';
11683
        }
11684
11685 31
        if ($clean_utf8) {
11686
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11687
            // if invalid characters are found in $haystack before $needle
11688 2
            $str = self::clean($str);
11689
        }
11690
11691 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11692
11693 31
        if ($lang === null && $encoding === 'UTF-8') {
11694 31
            if ($lower) {
11695 2
                return \mb_strtolower($str);
11696
            }
11697
11698 29
            return \mb_strtoupper($str);
11699
        }
11700
11701 2
        if ($lower) {
11702
            return self::strtolower($str, $encoding, false, $lang);
11703
        }
11704
11705 2
        return self::strtoupper($str, $encoding, false, $lang);
11706
    }
11707
11708
    /**
11709
     * Make a string lowercase.
11710
     *
11711
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11712
     *
11713
     * @see http://php.net/manual/en/function.mb-strtolower.php
11714
     *
11715
     * @param string      $str                           <p>The string being lowercased.</p>
11716
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11717
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11718
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11719
     *                                                   tr</p>
11720
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11721
     *                                                   -> ß</p>
11722
     *
11723
     * @psalm-pure
11724
     *
11725
     * @return string
11726
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11727
     */
11728 73
    public static function strtolower(
11729
        $str,
11730
        string $encoding = 'UTF-8',
11731
        bool $clean_utf8 = false,
11732
        string $lang = null,
11733
        bool $try_to_keep_the_string_length = false
11734
    ): string {
11735
        // init
11736 73
        $str = (string) $str;
11737
11738 73
        if ($str === '') {
11739 1
            return '';
11740
        }
11741
11742 72
        if ($clean_utf8) {
11743
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11744
            // if invalid characters are found in $haystack before $needle
11745 2
            $str = self::clean($str);
11746
        }
11747
11748
        // hack for old php version or for the polyfill ...
11749 72
        if ($try_to_keep_the_string_length) {
11750
            $str = self::fixStrCaseHelper($str, true);
11751
        }
11752
11753 72
        if ($lang === null && $encoding === 'UTF-8') {
11754 13
            return \mb_strtolower($str);
11755
        }
11756
11757 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11758
11759 61
        if ($lang !== null) {
11760 2
            if (self::$SUPPORT['intl'] === true) {
11761 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11762
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11763
                }
11764
11765 2
                $language_code = $lang . '-Lower';
11766 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11767
                    /**
11768
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11769
                     */
11770
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11771
11772
                    $language_code = 'Any-Lower';
11773
                }
11774
11775
                /** @noinspection PhpComposerExtensionStubsInspection */
11776
                /** @noinspection UnnecessaryCastingInspection */
11777 2
                return (string) \transliterator_transliterate($language_code, $str);
11778
            }
11779
11780
            /**
11781
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11782
             */
11783
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11784
        }
11785
11786
        // always fallback via symfony polyfill
11787 61
        return \mb_strtolower($str, $encoding);
11788
    }
11789
11790
    /**
11791
     * Make a string uppercase.
11792
     *
11793
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11794
     *
11795
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11796
     *
11797
     * @param string      $str                           <p>The string being uppercased.</p>
11798
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11799
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11800
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11801
     *                                                   tr</p>
11802
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11803
     *                                                   -> ß</p>
11804
     *
11805
     * @psalm-pure
11806
     *
11807
     * @return string
11808
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11809
     */
11810 17
    public static function strtoupper(
11811
        $str,
11812
        string $encoding = 'UTF-8',
11813
        bool $clean_utf8 = false,
11814
        string $lang = null,
11815
        bool $try_to_keep_the_string_length = false
11816
    ): string {
11817
        // init
11818 17
        $str = (string) $str;
11819
11820 17
        if ($str === '') {
11821 1
            return '';
11822
        }
11823
11824 16
        if ($clean_utf8) {
11825
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11826
            // if invalid characters are found in $haystack before $needle
11827 2
            $str = self::clean($str);
11828
        }
11829
11830
        // hack for old php version or for the polyfill ...
11831 16
        if ($try_to_keep_the_string_length) {
11832 2
            $str = self::fixStrCaseHelper($str);
11833
        }
11834
11835 16
        if ($lang === null && $encoding === 'UTF-8') {
11836 8
            return \mb_strtoupper($str);
11837
        }
11838
11839 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11840
11841 10
        if ($lang !== null) {
11842 2
            if (self::$SUPPORT['intl'] === true) {
11843 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11844
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11845
                }
11846
11847 2
                $language_code = $lang . '-Upper';
11848 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11849
                    /**
11850
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11851
                     */
11852
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11853
11854
                    $language_code = 'Any-Upper';
11855
                }
11856
11857
                /** @noinspection PhpComposerExtensionStubsInspection */
11858
                /** @noinspection UnnecessaryCastingInspection */
11859 2
                return (string) \transliterator_transliterate($language_code, $str);
11860
            }
11861
11862
            /**
11863
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11864
             */
11865
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11866
        }
11867
11868
        // always fallback via symfony polyfill
11869 10
        return \mb_strtoupper($str, $encoding);
11870
    }
11871
11872
    /**
11873
     * Translate characters or replace sub-strings.
11874
     *
11875
     * EXAMPLE:
11876
     * <code>
11877
     * $array = [
11878
     *     'Hello'   => '○●◎',
11879
     *     '中文空白' => 'earth',
11880
     * ];
11881
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11882
     * </code>
11883
     *
11884
     * @see http://php.net/manual/en/function.strtr.php
11885
     *
11886
     * @param string          $str  <p>The string being translated.</p>
11887
     * @param string|string[] $from <p>The string replacing from.</p>
11888
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11889
     *
11890
     * @psalm-pure
11891
     *
11892
     * @return string
11893
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11894
     *                to the corresponding character in "to".</p>
11895
     */
11896 2
    public static function strtr(string $str, $from, $to = ''): string
11897
    {
11898 2
        if ($str === '') {
11899
            return '';
11900
        }
11901
11902 2
        if ($from === $to) {
11903
            return $str;
11904
        }
11905
11906 2
        if ($to !== '') {
11907 2
            if (!\is_array($from)) {
11908 2
                $from = self::str_split($from);
11909
            }
11910
11911 2
            if (!\is_array($to)) {
11912 2
                $to = self::str_split($to);
11913
            }
11914
11915 2
            $count_from = \count($from);
11916 2
            $count_to = \count($to);
11917
11918 2
            if ($count_from !== $count_to) {
11919 2
                if ($count_from > $count_to) {
11920 2
                    $from = \array_slice($from, 0, $count_to);
11921 2
                } elseif ($count_from < $count_to) {
11922 2
                    $to = \array_slice($to, 0, $count_from);
11923
                }
11924
            }
11925
11926 2
            $from = \array_combine($from, $to);
11927
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11928 2
            if ($from === false) {
11929
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11930
            }
11931
        }
11932
11933 2
        if (\is_string($from)) {
11934 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11934
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11935
        }
11936
11937 2
        return \strtr($str, $from);
11938
    }
11939
11940
    /**
11941
     * Return the width of a string.
11942
     *
11943
     * INFO: use UTF8::strlen() for the byte-length
11944
     *
11945
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11946
     *
11947
     * @param string $str        <p>The input string.</p>
11948
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11949
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11950
     *
11951
     * @psalm-pure
11952
     *
11953
     * @return int
11954
     */
11955 2
    public static function strwidth(
11956
        string $str,
11957
        string $encoding = 'UTF-8',
11958
        bool $clean_utf8 = false
11959
    ): int {
11960 2
        if ($str === '') {
11961 2
            return 0;
11962
        }
11963
11964 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11965 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11966
        }
11967
11968 2
        if ($clean_utf8) {
11969
            // iconv and mbstring are not tolerant to invalid encoding
11970
            // further, their behaviour is inconsistent with that of PHP's substr
11971 2
            $str = self::clean($str);
11972
        }
11973
11974
        //
11975
        // fallback via mbstring
11976
        //
11977
11978 2
        if (self::$SUPPORT['mbstring'] === true) {
11979 2
            if ($encoding === 'UTF-8') {
11980 2
                return \mb_strwidth($str);
11981
            }
11982
11983
            return \mb_strwidth($str, $encoding);
11984
        }
11985
11986
        //
11987
        // fallback via vanilla php
11988
        //
11989
11990
        if ($encoding !== 'UTF-8') {
11991
            $str = self::encode('UTF-8', $str, false, $encoding);
11992
        }
11993
11994
        $wide = 0;
11995
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11996
11997
        return ($wide << 1) + (int) self::strlen($str);
11998
    }
11999
12000
    /**
12001
     * Get part of a string.
12002
     *
12003
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
12004
     *
12005
     * @see http://php.net/manual/en/function.mb-substr.php
12006
     *
12007
     * @param string   $str        <p>The string being checked.</p>
12008
     * @param int      $offset     <p>The first position used in str.</p>
12009
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12010
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12011
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12012
     *
12013
     * @psalm-pure
12014
     *
12015
     * @return false|string
12016
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12017
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12018
     *                      characters long, <b>FALSE</b> will be returned.
12019
     */
12020 172
    public static function substr(
12021
        string $str,
12022
        int $offset = 0,
12023
        int $length = null,
12024
        string $encoding = 'UTF-8',
12025
        bool $clean_utf8 = false
12026
    ) {
12027
        // empty string
12028 172
        if ($str === '' || $length === 0) {
12029 8
            return '';
12030
        }
12031
12032 168
        if ($clean_utf8) {
12033
            // iconv and mbstring are not tolerant to invalid encoding
12034
            // further, their behaviour is inconsistent with that of PHP's substr
12035 2
            $str = self::clean($str);
12036
        }
12037
12038
        // whole string
12039 168
        if (!$offset && $length === null) {
12040 7
            return $str;
12041
        }
12042
12043 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12044 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12045
        }
12046
12047
        //
12048
        // fallback via mbstring
12049
        //
12050
12051 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12052 161
            if ($length === null) {
12053 64
                return \mb_substr($str, $offset);
12054
            }
12055
12056 102
            return \mb_substr($str, $offset, $length);
12057
        }
12058
12059
        //
12060
        // fallback for binary || ascii only
12061
        //
12062
12063
        if (
12064 4
            $encoding === 'CP850'
12065
            ||
12066 4
            $encoding === 'ASCII'
12067
        ) {
12068
            if ($length === null) {
12069
                return \substr($str, $offset);
12070
            }
12071
12072
            return \substr($str, $offset, $length);
12073
        }
12074
12075
        // otherwise we need the string-length
12076 4
        $str_length = 0;
12077 4
        if ($offset || $length === null) {
12078 4
            $str_length = self::strlen($str, $encoding);
12079
        }
12080
12081
        // e.g.: invalid chars + mbstring not installed
12082 4
        if ($str_length === false) {
12083
            return false;
12084
        }
12085
12086
        // empty string
12087 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12088
            return '';
12089
        }
12090
12091
        // impossible
12092 4
        if ($offset && $offset > $str_length) {
12093
            return '';
12094
        }
12095
12096 4
        $length = $length ?? (int) $str_length;
12097
12098
        if (
12099 4
            $encoding !== 'UTF-8'
12100
            &&
12101 4
            self::$SUPPORT['mbstring'] === false
12102
        ) {
12103
            /**
12104
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12105
             */
12106 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12107
        }
12108
12109
        //
12110
        // fallback via intl
12111
        //
12112
12113
        if (
12114 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12115
            &&
12116 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12117
            &&
12118 4
            self::$SUPPORT['intl'] === true
12119
        ) {
12120
            $return_tmp = \grapheme_substr($str, $offset, $length);
12121
            if ($return_tmp !== false) {
12122
                return $return_tmp;
12123
            }
12124
        }
12125
12126
        //
12127
        // fallback via iconv
12128
        //
12129
12130
        if (
12131 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12132
            &&
12133 4
            self::$SUPPORT['iconv'] === true
12134
        ) {
12135
            $return_tmp = \iconv_substr($str, $offset, $length);
12136
            if ($return_tmp !== false) {
12137
                return $return_tmp;
12138
            }
12139
        }
12140
12141
        //
12142
        // fallback for ascii only
12143
        //
12144
12145 4
        if (ASCII::is_ascii($str)) {
12146
            return \substr($str, $offset, $length);
12147
        }
12148
12149
        //
12150
        // fallback via vanilla php
12151
        //
12152
12153
        // split to array, and remove invalid characters
12154 4
        $array = self::str_split($str);
12155
12156
        // extract relevant part, and join to make sting again
12157 4
        return \implode('', \array_slice($array, $offset, $length));
12158
    }
12159
12160
    /**
12161
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12162
     *
12163
     * EXAMPLE: <code>
12164
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12165
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12166
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12167
     * </code>
12168
     *
12169
     * @param string   $str1               <p>The main string being compared.</p>
12170
     * @param string   $str2               <p>The secondary string being compared.</p>
12171
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12172
     *                                     counting from the end of the string.</p>
12173
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12174
     *                                     of the length of the str compared to the length of main_str less the
12175
     *                                     offset.</p>
12176
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12177
     *                                     insensitive.</p>
12178
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12179
     *
12180
     * @psalm-pure
12181
     *
12182
     * @return int
12183
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12184
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12185
     *             <strong>0</strong> if they are equal
12186
     */
12187 2
    public static function substr_compare(
12188
        string $str1,
12189
        string $str2,
12190
        int $offset = 0,
12191
        int $length = null,
12192
        bool $case_insensitivity = false,
12193
        string $encoding = 'UTF-8'
12194
    ): int {
12195
        if (
12196 2
            $offset !== 0
12197
            ||
12198 2
            $length !== null
12199
        ) {
12200 2
            if ($encoding === 'UTF-8') {
12201 2
                if ($length === null) {
12202 2
                    $str1 = (string) \mb_substr($str1, $offset);
12203
                } else {
12204 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12205
                }
12206 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12207
            } else {
12208
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12209
12210
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12211
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12212
            }
12213
        }
12214
12215 2
        if ($case_insensitivity) {
12216 2
            return self::strcasecmp($str1, $str2, $encoding);
12217
        }
12218
12219 2
        return self::strcmp($str1, $str2);
12220
    }
12221
12222
    /**
12223
     * Count the number of substring occurrences.
12224
     *
12225
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12226
     *
12227
     * @see http://php.net/manual/en/function.substr-count.php
12228
     *
12229
     * @param string   $haystack   <p>The string to search in.</p>
12230
     * @param string   $needle     <p>The substring to search for.</p>
12231
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12232
     * @param int|null $length     [optional] <p>
12233
     *                             The maximum length after the specified offset to search for the
12234
     *                             substring. It outputs a warning if the offset plus the length is
12235
     *                             greater than the haystack length.
12236
     *                             </p>
12237
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12238
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12239
     *
12240
     * @psalm-pure
12241
     *
12242
     * @return false|int
12243
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12244
     */
12245 5
    public static function substr_count(
12246
        string $haystack,
12247
        string $needle,
12248
        int $offset = 0,
12249
        int $length = null,
12250
        string $encoding = 'UTF-8',
12251
        bool $clean_utf8 = false
12252
    ) {
12253 5
        if ($needle === '') {
12254 2
            return false;
12255
        }
12256
12257 5
        if ($haystack === '') {
12258 2
            if (\PHP_VERSION_ID >= 80000) {
12259
                return 0;
12260
            }
12261
12262 2
            return 0;
12263
        }
12264
12265 5
        if ($length === 0) {
12266 2
            return 0;
12267
        }
12268
12269 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12270 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12271
        }
12272
12273 5
        if ($clean_utf8) {
12274
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12275
            // if invalid characters are found in $haystack before $needle
12276
            $needle = self::clean($needle);
12277
            $haystack = self::clean($haystack);
12278
        }
12279
12280 5
        if ($offset || $length > 0) {
12281 2
            if ($length === null) {
12282 2
                $length_tmp = self::strlen($haystack, $encoding);
12283 2
                if ($length_tmp === false) {
12284
                    return false;
12285
                }
12286 2
                $length = (int) $length_tmp;
12287
            }
12288
12289 2
            if ($encoding === 'UTF-8') {
12290 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12291
            } else {
12292 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12293
            }
12294
        }
12295
12296
        if (
12297 5
            $encoding !== 'UTF-8'
12298
            &&
12299 5
            self::$SUPPORT['mbstring'] === false
12300
        ) {
12301
            /**
12302
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12303
             */
12304
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12305
        }
12306
12307 5
        if (self::$SUPPORT['mbstring'] === true) {
12308 5
            if ($encoding === 'UTF-8') {
12309 5
                return \mb_substr_count($haystack, $needle);
12310
            }
12311
12312 2
            return \mb_substr_count($haystack, $needle, $encoding);
12313
        }
12314
12315
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12316
12317
        return \count($matches);
12318
    }
12319
12320
    /**
12321
     * Count the number of substring occurrences.
12322
     *
12323
     * @param string   $haystack <p>
12324
     *                           The string being checked.
12325
     *                           </p>
12326
     * @param string   $needle   <p>
12327
     *                           The string being found.
12328
     *                           </p>
12329
     * @param int      $offset   [optional] <p>
12330
     *                           The offset where to start counting
12331
     *                           </p>
12332
     * @param int|null $length   [optional] <p>
12333
     *                           The maximum length after the specified offset to search for the
12334
     *                           substring. It outputs a warning if the offset plus the length is
12335
     *                           greater than the haystack length.
12336
     *                           </p>
12337
     *
12338
     * @psalm-pure
12339
     *
12340
     * @return false|int
12341
     *                   <p>The number of times the
12342
     *                   needle substring occurs in the
12343
     *                   haystack string.</p>
12344
     */
12345 4
    public static function substr_count_in_byte(
12346
        string $haystack,
12347
        string $needle,
12348
        int $offset = 0,
12349
        int $length = null
12350
    ) {
12351 4
        if ($haystack === '' || $needle === '') {
12352 1
            return 0;
12353
        }
12354
12355
        if (
12356 3
            ($offset || $length !== null)
12357
            &&
12358 3
            self::$SUPPORT['mbstring_func_overload'] === true
12359
        ) {
12360
            if ($length === null) {
12361
                $length_tmp = self::strlen($haystack);
12362
                if ($length_tmp === false) {
12363
                    return false;
12364
                }
12365
                $length = (int) $length_tmp;
12366
            }
12367
12368
            if (
12369
                (
12370
                    $length !== 0
12371
                    &&
12372
                    $offset !== 0
12373
                )
12374
                &&
12375
                ($length + $offset) <= 0
12376
                &&
12377
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12378
            ) {
12379
                return false;
12380
            }
12381
12382
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12383
            $haystack_tmp = \substr($haystack, $offset, $length);
12384
            if ($haystack_tmp === false) {
12385
                $haystack_tmp = '';
12386
            }
12387
            $haystack = (string) $haystack_tmp;
12388
        }
12389
12390 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12391
            // "mb_" is available if overload is used, so use it ...
12392
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12393
        }
12394
12395 3
        if ($length === null) {
12396 3
            return \substr_count($haystack, $needle, $offset);
12397
        }
12398
12399
        return \substr_count($haystack, $needle, $offset, $length);
12400
    }
12401
12402
    /**
12403
     * Returns the number of occurrences of $substring in the given string.
12404
     * By default, the comparison is case-sensitive, but can be made insensitive
12405
     * by setting $case_sensitive to false.
12406
     *
12407
     * @param string $str            <p>The input string.</p>
12408
     * @param string $substring      <p>The substring to search for.</p>
12409
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12410
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12411
     *
12412
     * @psalm-pure
12413
     *
12414
     * @return int
12415
     */
12416 15
    public static function substr_count_simple(
12417
        string $str,
12418
        string $substring,
12419
        bool $case_sensitive = true,
12420
        string $encoding = 'UTF-8'
12421
    ): int {
12422 15
        if ($str === '' || $substring === '') {
12423 2
            return 0;
12424
        }
12425
12426 13
        if ($encoding === 'UTF-8') {
12427 7
            if ($case_sensitive) {
12428
                return (int) \mb_substr_count($str, $substring);
12429
            }
12430
12431 7
            return (int) \mb_substr_count(
12432 7
                \mb_strtoupper($str),
12433 7
                \mb_strtoupper($substring)
12434
            );
12435
        }
12436
12437 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12438
12439 6
        if ($case_sensitive) {
12440 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12441
        }
12442
12443 3
        return (int) \mb_substr_count(
12444 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12445 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12446 3
            $encoding
12447
        );
12448
    }
12449
12450
    /**
12451
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12452
     *
12453
     * EXMAPLE: <code>
12454
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12455
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12456
     * </code>
12457
     *
12458
     * @param string $haystack <p>The string to search in.</p>
12459
     * @param string $needle   <p>The substring to search for.</p>
12460
     *
12461
     * @psalm-pure
12462
     *
12463
     * @return string
12464
     *                <p>Return the sub-string.</p>
12465
     */
12466 2
    public static function substr_ileft(string $haystack, string $needle): string
12467
    {
12468 2
        if ($haystack === '') {
12469 2
            return '';
12470
        }
12471
12472 2
        if ($needle === '') {
12473 2
            return $haystack;
12474
        }
12475
12476 2
        if (self::str_istarts_with($haystack, $needle)) {
12477 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12478
        }
12479
12480 2
        return $haystack;
12481
    }
12482
12483
    /**
12484
     * Get part of a string process in bytes.
12485
     *
12486
     * @param string   $str    <p>The string being checked.</p>
12487
     * @param int      $offset <p>The first position used in str.</p>
12488
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12489
     *
12490
     * @psalm-pure
12491
     *
12492
     * @return false|string
12493
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12494
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12495
     *                      characters long, <b>FALSE</b> will be returned.
12496
     */
12497 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12498
    {
12499
        // empty string
12500 1
        if ($str === '' || $length === 0) {
12501
            return '';
12502
        }
12503
12504
        // whole string
12505 1
        if (!$offset && $length === null) {
12506
            return $str;
12507
        }
12508
12509 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12510
            // "mb_" is available if overload is used, so use it ...
12511
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12512
        }
12513
12514 1
        return \substr($str, $offset, $length ?? 2147483647);
12515
    }
12516
12517
    /**
12518
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12519
     *
12520
     * EXAMPLE: <code>
12521
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12522
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12523
     * </code>
12524
     *
12525
     * @param string $haystack <p>The string to search in.</p>
12526
     * @param string $needle   <p>The substring to search for.</p>
12527
     *
12528
     * @psalm-pure
12529
     *
12530
     * @return string
12531
     *                <p>Return the sub-string.<p>
12532
     */
12533 2
    public static function substr_iright(string $haystack, string $needle): string
12534
    {
12535 2
        if ($haystack === '') {
12536 2
            return '';
12537
        }
12538
12539 2
        if ($needle === '') {
12540 2
            return $haystack;
12541
        }
12542
12543 2
        if (self::str_iends_with($haystack, $needle)) {
12544 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12545
        }
12546
12547 2
        return $haystack;
12548
    }
12549
12550
    /**
12551
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12552
     *
12553
     * EXAMPLE: <code>
12554
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12555
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12556
     * </code>
12557
     *
12558
     * @param string $haystack <p>The string to search in.</p>
12559
     * @param string $needle   <p>The substring to search for.</p>
12560
     *
12561
     * @psalm-pure
12562
     *
12563
     * @return string
12564
     *                <p>Return the sub-string.</p>
12565
     */
12566 2
    public static function substr_left(string $haystack, string $needle): string
12567
    {
12568 2
        if ($haystack === '') {
12569 2
            return '';
12570
        }
12571
12572 2
        if ($needle === '') {
12573 2
            return $haystack;
12574
        }
12575
12576 2
        if (self::str_starts_with($haystack, $needle)) {
12577 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12578
        }
12579
12580 2
        return $haystack;
12581
    }
12582
12583
    /**
12584
     * Replace text within a portion of a string.
12585
     *
12586
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12587
     *
12588
     * source: https://gist.github.com/stemar/8287074
12589
     *
12590
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12591
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12592
     * @param int|int[]       $offset      <p>
12593
     *                                     If start is positive, the replacing will begin at the start'th offset
12594
     *                                     into string.
12595
     *                                     <br><br>
12596
     *                                     If start is negative, the replacing will begin at the start'th character
12597
     *                                     from the end of string.
12598
     *                                     </p>
12599
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12600
     *                                     portion of string which is to be replaced. If it is negative, it
12601
     *                                     represents the number of characters from the end of string at which to
12602
     *                                     stop replacing. If it is not given, then it will default to strlen(
12603
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12604
     *                                     length is zero then this function will have the effect of inserting
12605
     *                                     replacement into string at the given start offset.</p>
12606
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12607
     *
12608
     * @psalm-pure
12609
     *
12610
     * @return string|string[]
12611
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12612
     */
12613 10
    public static function substr_replace(
12614
        $str,
12615
        $replacement,
12616
        $offset,
12617
        $length = null,
12618
        string $encoding = 'UTF-8'
12619
    ) {
12620 10
        if (\is_array($str)) {
12621 1
            $num = \count($str);
12622
12623
            // the replacement
12624 1
            if (\is_array($replacement)) {
12625 1
                $replacement = \array_slice($replacement, 0, $num);
12626
            } else {
12627 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12628
            }
12629
12630
            // the offset
12631 1
            if (\is_array($offset)) {
12632 1
                $offset = \array_slice($offset, 0, $num);
12633 1
                foreach ($offset as &$value_tmp) {
12634 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12635
                }
12636 1
                unset($value_tmp);
12637
            } else {
12638 1
                $offset = \array_pad([$offset], $num, $offset);
12639
            }
12640
12641
            // the length
12642 1
            if ($length === null) {
12643 1
                $length = \array_fill(0, $num, 0);
12644 1
            } elseif (\is_array($length)) {
12645 1
                $length = \array_slice($length, 0, $num);
12646 1
                foreach ($length as &$value_tmp_V2) {
12647 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12648
                }
12649 1
                unset($value_tmp_V2);
12650
            } else {
12651 1
                $length = \array_pad([$length], $num, $length);
12652
            }
12653
12654
            // recursive call
12655 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12656
        }
12657
12658 10
        if (\is_array($replacement)) {
12659 1
            if ($replacement !== []) {
12660 1
                $replacement = $replacement[0];
12661
            } else {
12662 1
                $replacement = '';
12663
            }
12664
        }
12665
12666
        // init
12667 10
        $str = (string) $str;
12668 10
        $replacement = (string) $replacement;
12669
12670 10
        if (\is_array($length)) {
12671
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12672
        }
12673
12674 10
        if (\is_array($offset)) {
12675
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12676
        }
12677
12678 10
        if ($str === '') {
12679 1
            return $replacement;
12680
        }
12681
12682 9
        if (self::$SUPPORT['mbstring'] === true) {
12683 9
            $string_length = (int) self::strlen($str, $encoding);
12684
12685 9
            if ($offset < 0) {
12686 1
                $offset = (int) \max(0, $string_length + $offset);
12687 9
            } elseif ($offset > $string_length) {
12688 1
                $offset = $string_length;
12689
            }
12690
12691 9
            if ($length !== null && $length < 0) {
12692 1
                $length = (int) \max(0, $string_length - $offset + $length);
12693 9
            } elseif ($length === null || $length > $string_length) {
12694 4
                $length = $string_length;
12695
            }
12696
12697
            /** @noinspection AdditionOperationOnArraysInspection */
12698 9
            if (($offset + $length) > $string_length) {
12699 4
                $length = $string_length - $offset;
12700
            }
12701
12702
            /** @noinspection AdditionOperationOnArraysInspection */
12703 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12704 9
                   $replacement .
12705 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12706
        }
12707
12708
        //
12709
        // fallback for ascii only
12710
        //
12711
12712
        if (ASCII::is_ascii($str)) {
12713
            return ($length === null) ?
12714
                \substr_replace($str, $replacement, $offset) :
12715
                \substr_replace($str, $replacement, $offset, $length);
12716
        }
12717
12718
        //
12719
        // fallback via vanilla php
12720
        //
12721
12722
        \preg_match_all('/./us', $str, $str_matches);
12723
        \preg_match_all('/./us', $replacement, $replacement_matches);
12724
12725
        if ($length === null) {
12726
            $length_tmp = self::strlen($str, $encoding);
12727
            if ($length_tmp === false) {
12728
                // e.g.: non mbstring support + invalid chars
12729
                return '';
12730
            }
12731
            $length = (int) $length_tmp;
12732
        }
12733
12734
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12735
12736
        return \implode('', $str_matches[0]);
12737
    }
12738
12739
    /**
12740
     * Removes a suffix ($needle) from the end of the string ($haystack).
12741
     *
12742
     * EXAMPLE: <code>
12743
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12744
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12745
     * </code>
12746
     *
12747
     * @param string $haystack <p>The string to search in.</p>
12748
     * @param string $needle   <p>The substring to search for.</p>
12749
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12750
     *
12751
     * @psalm-pure
12752
     *
12753
     * @return string
12754
     *                <p>Return the sub-string.</p>
12755
     */
12756 2
    public static function substr_right(
12757
        string $haystack,
12758
        string $needle,
12759
        string $encoding = 'UTF-8'
12760
    ): string {
12761 2
        if ($haystack === '') {
12762 2
            return '';
12763
        }
12764
12765 2
        if ($needle === '') {
12766 2
            return $haystack;
12767
        }
12768
12769
        if (
12770 2
            $encoding === 'UTF-8'
12771
            &&
12772 2
            \substr($haystack, -\strlen($needle)) === $needle
12773
        ) {
12774 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12775
        }
12776
12777 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12778
            return (string) self::substr(
12779
                $haystack,
12780
                0,
12781
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12782
                $encoding
12783
            );
12784
        }
12785
12786 2
        return $haystack;
12787
    }
12788
12789
    /**
12790
     * Returns a case swapped version of the string.
12791
     *
12792
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12793
     *
12794
     * @param string $str        <p>The input string.</p>
12795
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12796
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12797
     *
12798
     * @psalm-pure
12799
     *
12800
     * @return string
12801
     *                <p>Each character's case swapped.</p>
12802
     */
12803 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12804
    {
12805 6
        if ($str === '') {
12806 1
            return '';
12807
        }
12808
12809 6
        if ($clean_utf8) {
12810
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12811
            // if invalid characters are found in $haystack before $needle
12812 2
            $str = self::clean($str);
12813
        }
12814
12815 6
        if ($encoding === 'UTF-8') {
12816 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12817
        }
12818
12819 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12820
    }
12821
12822
    /**
12823
     * Checks whether symfony-polyfills are used.
12824
     *
12825
     * @psalm-pure
12826
     *
12827
     * @return bool
12828
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12829
     *
12830
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12831
     */
12832
    public static function symfony_polyfill_used(): bool
12833
    {
12834
        // init
12835
        $return = false;
12836
12837
        $return_tmp = \extension_loaded('mbstring');
12838
        if (!$return_tmp && \function_exists('mb_strlen')) {
12839
            $return = true;
12840
        }
12841
12842
        $return_tmp = \extension_loaded('iconv');
12843
        if (!$return_tmp && \function_exists('iconv')) {
12844
            $return = true;
12845
        }
12846
12847
        return $return;
12848
    }
12849
12850
    /**
12851
     * @param string $str
12852
     * @param int    $tab_length
12853
     *
12854
     * @psalm-pure
12855
     *
12856
     * @return string
12857
     */
12858 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12859
    {
12860 6
        if ($tab_length === 4) {
12861 3
            $spaces = '    ';
12862 3
        } elseif ($tab_length === 2) {
12863 1
            $spaces = '  ';
12864
        } else {
12865 2
            $spaces = \str_repeat(' ', $tab_length);
12866
        }
12867
12868 6
        return \str_replace("\t", $spaces, $str);
12869
    }
12870
12871
    /**
12872
     * Converts the first character of each word in the string to uppercase
12873
     * and all other chars to lowercase.
12874
     *
12875
     * @param string      $str                           <p>The input string.</p>
12876
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12877
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12878
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12879
     *                                                   tr</p>
12880
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12881
     *                                                   -> ß</p>
12882
     *
12883
     * @psalm-pure
12884
     *
12885
     * @return string
12886
     *                <p>A string with all characters of $str being title-cased.</p>
12887
     */
12888 5
    public static function titlecase(
12889
        string $str,
12890
        string $encoding = 'UTF-8',
12891
        bool $clean_utf8 = false,
12892
        string $lang = null,
12893
        bool $try_to_keep_the_string_length = false
12894
    ): string {
12895 5
        if ($clean_utf8) {
12896
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12897
            // if invalid characters are found in $haystack before $needle
12898
            $str = self::clean($str);
12899
        }
12900
12901
        if (
12902 5
            $lang === null
12903
            &&
12904 5
            !$try_to_keep_the_string_length
12905
        ) {
12906 5
            if ($encoding === 'UTF-8') {
12907 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12908
            }
12909
12910 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12911
12912 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12913
        }
12914
12915
        return self::str_titleize(
12916
            $str,
12917
            null,
12918
            $encoding,
12919
            false,
12920
            $lang,
12921
            $try_to_keep_the_string_length,
12922
            false
12923
        );
12924
    }
12925
12926
    /**
12927
     * alias for "UTF8::to_ascii()"
12928
     *
12929
     * @param string $str
12930
     * @param string $subst_chr
12931
     * @param bool   $strict
12932
     *
12933
     * @psalm-pure
12934
     *
12935
     * @return string
12936
     *
12937
     * @see        UTF8::to_ascii()
12938
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12939
     */
12940 7
    public static function toAscii(
12941
        string $str,
12942
        string $subst_chr = '?',
12943
        bool $strict = false
12944
    ): string {
12945 7
        return self::to_ascii($str, $subst_chr, $strict);
12946
    }
12947
12948
    /**
12949
     * alias for "UTF8::to_iso8859()"
12950
     *
12951
     * @param string|string[] $str
12952
     *
12953
     * @psalm-pure
12954
     *
12955
     * @return string|string[]
12956
     *
12957
     * @see        UTF8::to_iso8859()
12958
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12959
     */
12960 2
    public static function toIso8859($str)
12961
    {
12962 2
        return self::to_iso8859($str);
12963
    }
12964
12965
    /**
12966
     * alias for "UTF8::to_latin1()"
12967
     *
12968
     * @param string|string[] $str
12969
     *
12970
     * @psalm-pure
12971
     *
12972
     * @return string|string[]
12973
     *
12974
     * @see        UTF8::to_iso8859()
12975
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12976
     */
12977 2
    public static function toLatin1($str)
12978
    {
12979 2
        return self::to_iso8859($str);
12980
    }
12981
12982
    /**
12983
     * alias for "UTF8::to_utf8()"
12984
     *
12985
     * @param string|string[] $str
12986
     *
12987
     * @psalm-pure
12988
     *
12989
     * @return string|string[]
12990
     *
12991
     * @see        UTF8::to_utf8()
12992
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12993
     */
12994 2
    public static function toUTF8($str)
12995
    {
12996 2
        return self::to_utf8($str);
12997
    }
12998
12999
    /**
13000
     * Convert a string into ASCII.
13001
     *
13002
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
13003
     *
13004
     * @param string $str     <p>The input string.</p>
13005
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
13006
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
13007
     *                        performance</p>
13008
     *
13009
     * @psalm-pure
13010
     *
13011
     * @return string
13012
     */
13013 37
    public static function to_ascii(
13014
        string $str,
13015
        string $unknown = '?',
13016
        bool $strict = false
13017
    ): string {
13018 37
        return ASCII::to_transliterate($str, $unknown, $strict);
13019
    }
13020
13021
    /**
13022
     * @param bool|int|string $str
13023
     *
13024
     * @phpstan-param bool|int|numeric-string $str
13025
     *
13026
     * @psalm-pure
13027
     *
13028
     * @return bool
13029
     */
13030 19
    public static function to_boolean($str): bool
13031
    {
13032
        // init
13033 19
        $str = (string) $str;
13034
13035 19
        if ($str === '') {
13036 2
            return false;
13037
        }
13038
13039
        // Info: http://php.net/manual/en/filter.filters.validate.php
13040
        $map = [
13041 17
            'true'  => true,
13042
            '1'     => true,
13043
            'on'    => true,
13044
            'yes'   => true,
13045
            'false' => false,
13046
            '0'     => false,
13047
            'off'   => false,
13048
            'no'    => false,
13049
        ];
13050
13051 17
        if (isset($map[$str])) {
13052 11
            return $map[$str];
13053
        }
13054
13055 6
        $key = \strtolower($str);
13056 6
        if (isset($map[$key])) {
13057 2
            return $map[$key];
13058
        }
13059
13060 4
        if (\is_numeric($str)) {
13061 2
            return ((float) $str + 0) > 0;
13062
        }
13063
13064 2
        return (bool) \trim($str);
13065
    }
13066
13067
    /**
13068
     * Convert given string to safe filename (and keep string case).
13069
     *
13070
     * @param string $str
13071
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13072
     *                                  simply replaced with hyphen.
13073
     * @param string $fallback_char
13074
     *
13075
     * @psalm-pure
13076
     *
13077
     * @return string
13078
     */
13079 1
    public static function to_filename(
13080
        string $str,
13081
        bool $use_transliterate = false,
13082
        string $fallback_char = '-'
13083
    ): string {
13084 1
        return ASCII::to_filename(
13085 1
            $str,
13086 1
            $use_transliterate,
13087 1
            $fallback_char
13088
        );
13089
    }
13090
13091
    /**
13092
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13093
     *
13094
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13095
     *
13096
     * @param string|string[] $str
13097
     *
13098
     * @psalm-pure
13099
     *
13100
     * @return string|string[]
13101
     */
13102 8
    public static function to_iso8859($str)
13103
    {
13104 8
        if (\is_array($str)) {
13105 2
            foreach ($str as $k => &$v) {
13106 2
                $v = self::to_iso8859($v);
13107
            }
13108
13109 2
            return $str;
13110
        }
13111
13112 8
        $str = (string) $str;
13113 8
        if ($str === '') {
13114 2
            return '';
13115
        }
13116
13117 8
        return self::utf8_decode($str);
13118
    }
13119
13120
    /**
13121
     * alias for "UTF8::to_iso8859()"
13122
     *
13123
     * @param string|string[] $str
13124
     *
13125
     * @psalm-pure
13126
     *
13127
     * @return string|string[]
13128
     *
13129
     * @see        UTF8::to_iso8859()
13130
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13131
     */
13132 2
    public static function to_latin1($str)
13133
    {
13134 2
        return self::to_iso8859($str);
13135
    }
13136
13137
    /**
13138
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13139
     *
13140
     * <ul>
13141
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13142
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13143
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13144
     * case.</li>
13145
     * </ul>
13146
     *
13147
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13148
     *
13149
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13150
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13151
     *
13152
     * @psalm-pure
13153
     *
13154
     * @return string|string[]
13155
     *                         <p>The UTF-8 encoded string</p>
13156
     *
13157
     * @template TToUtf8
13158
     * @phpstan-param TToUtf8 $str
13159
     * @phpstan-return TToUtf8
13160
     *
13161
     * @noinspection SuspiciousBinaryOperationInspection
13162
     */
13163 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13164
    {
13165 44
        if (\is_array($str)) {
13166 4
            foreach ($str as $k => &$v) {
13167 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13168
            }
13169
13170 4
            return $str;
13171
        }
13172
13173
        /** @phpstan-var TToUtf8 $str */
13174 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13175
13176 44
        return $str;
13177
    }
13178
13179
    /**
13180
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13181
     *
13182
     * <ul>
13183
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13184
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13185
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13186
     * case.</li>
13187
     * </ul>
13188
     *
13189
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13190
     *
13191
     * @param string $str                        <p>Any string.</p>
13192
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13193
     *
13194
     * @psalm-pure
13195
     *
13196
     * @return string
13197
     *                <p>The UTF-8 encoded string</p>
13198
     *
13199
     * @noinspection SuspiciousBinaryOperationInspection
13200
     */
13201 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13202
    {
13203 44
        if ($str === '') {
13204 7
            return $str;
13205
        }
13206
13207 44
        $max = \strlen($str);
13208 44
        $buf = '';
13209
13210 44
        for ($i = 0; $i < $max; ++$i) {
13211 44
            $c1 = $str[$i];
13212
13213 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13214
13215 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13216
13217 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13218
13219 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13220 22
                        $buf .= $c1 . $c2;
13221 22
                        ++$i;
13222
                    } else { // not valid UTF8 - convert it
13223 36
                        $buf .= self::to_utf8_convert_helper($c1);
13224
                    }
13225 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13226
13227 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13228 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13229
13230 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13231 17
                        $buf .= $c1 . $c2 . $c3;
13232 17
                        $i += 2;
13233
                    } else { // not valid UTF8 - convert it
13234 36
                        $buf .= self::to_utf8_convert_helper($c1);
13235
                    }
13236 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13237
13238 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13239 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13240 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13241
13242 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13243 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13244 10
                        $i += 3;
13245
                    } else { // not valid UTF8 - convert it
13246 28
                        $buf .= self::to_utf8_convert_helper($c1);
13247
                    }
13248
                } else { // doesn't look like UTF8, but should be converted
13249
13250 40
                    $buf .= self::to_utf8_convert_helper($c1);
13251
                }
13252 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13253
13254 4
                $buf .= self::to_utf8_convert_helper($c1);
13255
            } else { // it doesn't need conversion
13256
13257 41
                $buf .= $c1;
13258
            }
13259
        }
13260
13261
        // decode unicode escape sequences + unicode surrogate pairs
13262 44
        $buf = \preg_replace_callback(
13263 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13264
            /**
13265
             * @param array $matches
13266
             *
13267
             * @psalm-pure
13268
             *
13269
             * @return string
13270
             */
13271
            static function (array $matches): string {
13272 13
                if (isset($matches[3])) {
13273 13
                    $cp = (int) \hexdec($matches[3]);
13274
                } else {
13275
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13276 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13277 1
                          + (int) \hexdec($matches[2])
13278 1
                          + 0x10000
13279 1
                          - (0xD800 << 10)
13280 1
                          - 0xDC00;
13281
                }
13282
13283
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13284
                //
13285
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13286
13287 13
                if ($cp < 0x80) {
13288 8
                    return (string) self::chr($cp);
13289
                }
13290
13291 10
                if ($cp < 0xA0) {
13292
                    /** @noinspection UnnecessaryCastingInspection */
13293
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13294
                }
13295
13296 10
                return self::decimal_to_chr($cp);
13297 44
            },
13298 44
            $buf
13299
        );
13300
13301 44
        if ($buf === null) {
13302
            return '';
13303
        }
13304
13305
        // decode UTF-8 codepoints
13306 44
        if ($decode_html_entity_to_utf8) {
13307 3
            $buf = self::html_entity_decode($buf);
13308
        }
13309
13310 44
        return $buf;
13311
    }
13312
13313
    /**
13314
     * Returns the given string as an integer, or null if the string isn't numeric.
13315
     *
13316
     * @param string $str
13317
     *
13318
     * @psalm-pure
13319
     *
13320
     * @return int|null
13321
     *                  <p>null if the string isn't numeric</p>
13322
     */
13323 1
    public static function to_int(string $str)
13324
    {
13325 1
        if (\is_numeric($str)) {
13326 1
            return (int) $str;
13327
        }
13328
13329 1
        return null;
13330
    }
13331
13332
    /**
13333
     * Returns the given input as string, or null if the input isn't int|float|string
13334
     * and do not implement the "__toString()" method.
13335
     *
13336
     * @param float|int|object|string|null $input
13337
     *
13338
     * @psalm-pure
13339
     *
13340
     * @return string|null
13341
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13342
     */
13343 1
    public static function to_string($input)
13344
    {
13345 1
        if ($input === null) {
13346
            return null;
13347
        }
13348
13349
        /** @var string $input_type - hack for psalm */
13350 1
        $input_type = \gettype($input);
13351
13352
        if (
13353 1
            $input_type === 'string'
13354
            ||
13355 1
            $input_type === 'integer'
13356
            ||
13357 1
            $input_type === 'float'
13358
            ||
13359 1
            $input_type === 'double'
13360
        ) {
13361 1
            return (string) $input;
13362
        }
13363
13364 1
        if ($input_type === 'object') {
13365
            /** @noinspection PhpSillyAssignmentInspection */
13366
            /** @var object $input - hack for psalm / phpstan */
13367 1
            $input = $input;
13368
            /** @noinspection NestedPositiveIfStatementsInspection */
13369
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13370 1
            if (\method_exists($input, '__toString')) {
13371 1
                return (string) $input;
13372
            }
13373
        }
13374
13375 1
        return null;
13376
    }
13377
13378
    /**
13379
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13380
     *
13381
     * INFO: This is slower then "trim()"
13382
     *
13383
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13384
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13385
     *
13386
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13387
     *
13388
     * @param string      $str   <p>The string to be trimmed</p>
13389
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13390
     *
13391
     * @psalm-pure
13392
     *
13393
     * @return string
13394
     *                <p>The trimmed string.</p>
13395
     */
13396 57
    public static function trim(string $str = '', string $chars = null): string
13397
    {
13398 57
        if ($str === '') {
13399 9
            return '';
13400
        }
13401
13402 50
        if (self::$SUPPORT['mbstring'] === true) {
13403 50
            if ($chars !== null) {
13404
                /** @noinspection PregQuoteUsageInspection */
13405 28
                $chars = \preg_quote($chars);
13406 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13407
            } else {
13408 22
                $pattern = '^[\\s]+|[\\s]+$';
13409
            }
13410
13411
            /** @noinspection PhpComposerExtensionStubsInspection */
13412 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13413
        }
13414
13415 8
        if ($chars !== null) {
13416
            $chars = \preg_quote($chars, '/');
13417
            $pattern = "^[${chars}]+|[${chars}]+\$";
13418
        } else {
13419 8
            $pattern = '^[\\s]+|[\\s]+$';
13420
        }
13421
13422 8
        return self::regex_replace($str, $pattern, '');
13423
    }
13424
13425
    /**
13426
     * Makes string's first char uppercase.
13427
     *
13428
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13429
     *
13430
     * @param string      $str                           <p>The input string.</p>
13431
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13432
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13433
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13434
     *                                                   tr</p>
13435
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13436
     *                                                   -> ß</p>
13437
     *
13438
     * @psalm-pure
13439
     *
13440
     * @return string
13441
     *                <p>The resulting string with with char uppercase.</p>
13442
     */
13443 69
    public static function ucfirst(
13444
        string $str,
13445
        string $encoding = 'UTF-8',
13446
        bool $clean_utf8 = false,
13447
        string $lang = null,
13448
        bool $try_to_keep_the_string_length = false
13449
    ): string {
13450 69
        if ($str === '') {
13451 3
            return '';
13452
        }
13453
13454 68
        if ($clean_utf8) {
13455
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13456
            // if invalid characters are found in $haystack before $needle
13457 1
            $str = self::clean($str);
13458
        }
13459
13460 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13461
13462 68
        if ($encoding === 'UTF-8') {
13463 22
            $str_part_two = (string) \mb_substr($str, 1);
13464
13465 22
            if ($use_mb_functions) {
13466 22
                $str_part_one = \mb_strtoupper(
13467 22
                    (string) \mb_substr($str, 0, 1)
13468
                );
13469
            } else {
13470
                $str_part_one = self::strtoupper(
13471
                    (string) \mb_substr($str, 0, 1),
13472
                    $encoding,
13473
                    false,
13474
                    $lang,
13475 22
                    $try_to_keep_the_string_length
13476
                );
13477
            }
13478
        } else {
13479 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13480
13481 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13482
13483 47
            if ($use_mb_functions) {
13484 47
                $str_part_one = \mb_strtoupper(
13485 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13486 47
                    $encoding
13487
                );
13488
            } else {
13489
                $str_part_one = self::strtoupper(
13490
                    (string) self::substr($str, 0, 1, $encoding),
13491
                    $encoding,
13492
                    false,
13493
                    $lang,
13494
                    $try_to_keep_the_string_length
13495
                );
13496
            }
13497
        }
13498
13499 68
        return $str_part_one . $str_part_two;
13500
    }
13501
13502
    /**
13503
     * alias for "UTF8::ucfirst()"
13504
     *
13505
     * @param string $str
13506
     * @param string $encoding
13507
     * @param bool   $clean_utf8
13508
     *
13509
     * @psalm-pure
13510
     *
13511
     * @return string
13512
     *
13513
     * @see        UTF8::ucfirst()
13514
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13515
     */
13516 1
    public static function ucword(
13517
        string $str,
13518
        string $encoding = 'UTF-8',
13519
        bool $clean_utf8 = false
13520
    ): string {
13521 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13522
    }
13523
13524
    /**
13525
     * Uppercase for all words in the string.
13526
     *
13527
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13528
     *
13529
     * @param string   $str        <p>The input string.</p>
13530
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13531
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13532
     *                             word.</p>
13533
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13534
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13535
     *
13536
     * @psalm-pure
13537
     *
13538
     * @return string
13539
     */
13540 8
    public static function ucwords(
13541
        string $str,
13542
        array $exceptions = [],
13543
        string $char_list = '',
13544
        string $encoding = 'UTF-8',
13545
        bool $clean_utf8 = false
13546
    ): string {
13547 8
        if (!$str) {
13548 2
            return '';
13549
        }
13550
13551
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13552
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13553
13554 7
        if ($clean_utf8) {
13555
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13556
            // if invalid characters are found in $haystack before $needle
13557 1
            $str = self::clean($str);
13558
        }
13559
13560 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13561
13562
        if (
13563 7
            $use_php_default_functions
13564
            &&
13565 7
            ASCII::is_ascii($str)
13566
        ) {
13567
            return \ucwords($str);
13568
        }
13569
13570 7
        $words = self::str_to_words($str, $char_list);
13571 7
        $use_exceptions = $exceptions !== [];
13572
13573 7
        $words_str = '';
13574 7
        foreach ($words as &$word) {
13575 7
            if (!$word) {
13576 7
                continue;
13577
            }
13578
13579
            if (
13580 7
                !$use_exceptions
13581
                ||
13582 7
                !\in_array($word, $exceptions, true)
13583
            ) {
13584 7
                $words_str .= self::ucfirst($word, $encoding);
13585
            } else {
13586 7
                $words_str .= $word;
13587
            }
13588
        }
13589
13590 7
        return $words_str;
13591
    }
13592
13593
    /**
13594
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13595
     *
13596
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13597
     *
13598
     * e.g:
13599
     * 'test+test'                     => 'test test'
13600
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13601
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13602
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13603
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13604
     * 'Düsseldorf'                   => 'Düsseldorf'
13605
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13606
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13607
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13608
     *
13609
     * @param string $str          <p>The input string.</p>
13610
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13611
     *
13612
     * @psalm-pure
13613
     *
13614
     * @return string
13615
     */
13616 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13617
    {
13618 4
        if ($str === '') {
13619 3
            return '';
13620
        }
13621
13622
        if (
13623 4
            \strpos($str, '&') === false
13624
            &&
13625 4
            \strpos($str, '%') === false
13626
            &&
13627 4
            \strpos($str, '+') === false
13628
            &&
13629 4
            \strpos($str, '\u') === false
13630
        ) {
13631 3
            return self::fix_simple_utf8($str);
13632
        }
13633
13634 4
        $str = self::urldecode_unicode_helper($str);
13635
13636 4
        if ($multi_decode) {
13637
            do {
13638 3
                $str_compare = $str;
13639
13640
                /**
13641
                 * @psalm-suppress PossiblyInvalidArgument
13642
                 */
13643 3
                $str = self::fix_simple_utf8(
13644 3
                    \urldecode(
13645 3
                        self::html_entity_decode(
13646 3
                            self::to_utf8($str),
13647 3
                            \ENT_QUOTES | \ENT_HTML5
13648
                        )
13649
                    )
13650
                );
13651 3
            } while ($str_compare !== $str);
13652
        } else {
13653
            /**
13654
             * @psalm-suppress PossiblyInvalidArgument
13655
             */
13656 1
            $str = self::fix_simple_utf8(
13657 1
                \urldecode(
13658 1
                    self::html_entity_decode(
13659 1
                        self::to_utf8($str),
13660 1
                        \ENT_QUOTES | \ENT_HTML5
13661
                    )
13662
                )
13663
            );
13664
        }
13665
13666 4
        return $str;
13667
    }
13668
13669
    /**
13670
     * Return a array with "urlencoded"-win1252 -> UTF-8
13671
     *
13672
     * @psalm-pure
13673
     *
13674
     * @return string[]
13675
     *
13676
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13677
     */
13678 2
    public static function urldecode_fix_win1252_chars(): array
13679
    {
13680
        return [
13681 2
            '%20' => ' ',
13682
            '%21' => '!',
13683
            '%22' => '"',
13684
            '%23' => '#',
13685
            '%24' => '$',
13686
            '%25' => '%',
13687
            '%26' => '&',
13688
            '%27' => "'",
13689
            '%28' => '(',
13690
            '%29' => ')',
13691
            '%2A' => '*',
13692
            '%2B' => '+',
13693
            '%2C' => ',',
13694
            '%2D' => '-',
13695
            '%2E' => '.',
13696
            '%2F' => '/',
13697
            '%30' => '0',
13698
            '%31' => '1',
13699
            '%32' => '2',
13700
            '%33' => '3',
13701
            '%34' => '4',
13702
            '%35' => '5',
13703
            '%36' => '6',
13704
            '%37' => '7',
13705
            '%38' => '8',
13706
            '%39' => '9',
13707
            '%3A' => ':',
13708
            '%3B' => ';',
13709
            '%3C' => '<',
13710
            '%3D' => '=',
13711
            '%3E' => '>',
13712
            '%3F' => '?',
13713
            '%40' => '@',
13714
            '%41' => 'A',
13715
            '%42' => 'B',
13716
            '%43' => 'C',
13717
            '%44' => 'D',
13718
            '%45' => 'E',
13719
            '%46' => 'F',
13720
            '%47' => 'G',
13721
            '%48' => 'H',
13722
            '%49' => 'I',
13723
            '%4A' => 'J',
13724
            '%4B' => 'K',
13725
            '%4C' => 'L',
13726
            '%4D' => 'M',
13727
            '%4E' => 'N',
13728
            '%4F' => 'O',
13729
            '%50' => 'P',
13730
            '%51' => 'Q',
13731
            '%52' => 'R',
13732
            '%53' => 'S',
13733
            '%54' => 'T',
13734
            '%55' => 'U',
13735
            '%56' => 'V',
13736
            '%57' => 'W',
13737
            '%58' => 'X',
13738
            '%59' => 'Y',
13739
            '%5A' => 'Z',
13740
            '%5B' => '[',
13741
            '%5C' => '\\',
13742
            '%5D' => ']',
13743
            '%5E' => '^',
13744
            '%5F' => '_',
13745
            '%60' => '`',
13746
            '%61' => 'a',
13747
            '%62' => 'b',
13748
            '%63' => 'c',
13749
            '%64' => 'd',
13750
            '%65' => 'e',
13751
            '%66' => 'f',
13752
            '%67' => 'g',
13753
            '%68' => 'h',
13754
            '%69' => 'i',
13755
            '%6A' => 'j',
13756
            '%6B' => 'k',
13757
            '%6C' => 'l',
13758
            '%6D' => 'm',
13759
            '%6E' => 'n',
13760
            '%6F' => 'o',
13761
            '%70' => 'p',
13762
            '%71' => 'q',
13763
            '%72' => 'r',
13764
            '%73' => 's',
13765
            '%74' => 't',
13766
            '%75' => 'u',
13767
            '%76' => 'v',
13768
            '%77' => 'w',
13769
            '%78' => 'x',
13770
            '%79' => 'y',
13771
            '%7A' => 'z',
13772
            '%7B' => '{',
13773
            '%7C' => '|',
13774
            '%7D' => '}',
13775
            '%7E' => '~',
13776
            '%7F' => '',
13777
            '%80' => '`',
13778
            '%81' => '',
13779
            '%82' => '‚',
13780
            '%83' => 'ƒ',
13781
            '%84' => '„',
13782
            '%85' => '…',
13783
            '%86' => '†',
13784
            '%87' => '‡',
13785
            '%88' => 'ˆ',
13786
            '%89' => '‰',
13787
            '%8A' => 'Š',
13788
            '%8B' => '‹',
13789
            '%8C' => 'Œ',
13790
            '%8D' => '',
13791
            '%8E' => 'Ž',
13792
            '%8F' => '',
13793
            '%90' => '',
13794
            '%91' => '‘',
13795
            '%92' => '’',
13796
            '%93' => '“',
13797
            '%94' => '”',
13798
            '%95' => '•',
13799
            '%96' => '–',
13800
            '%97' => '—',
13801
            '%98' => '˜',
13802
            '%99' => '™',
13803
            '%9A' => 'š',
13804
            '%9B' => '›',
13805
            '%9C' => 'œ',
13806
            '%9D' => '',
13807
            '%9E' => 'ž',
13808
            '%9F' => 'Ÿ',
13809
            '%A0' => '',
13810
            '%A1' => '¡',
13811
            '%A2' => '¢',
13812
            '%A3' => '£',
13813
            '%A4' => '¤',
13814
            '%A5' => '¥',
13815
            '%A6' => '¦',
13816
            '%A7' => '§',
13817
            '%A8' => '¨',
13818
            '%A9' => '©',
13819
            '%AA' => 'ª',
13820
            '%AB' => '«',
13821
            '%AC' => '¬',
13822
            '%AD' => '',
13823
            '%AE' => '®',
13824
            '%AF' => '¯',
13825
            '%B0' => '°',
13826
            '%B1' => '±',
13827
            '%B2' => '²',
13828
            '%B3' => '³',
13829
            '%B4' => '´',
13830
            '%B5' => 'µ',
13831
            '%B6' => '¶',
13832
            '%B7' => '·',
13833
            '%B8' => '¸',
13834
            '%B9' => '¹',
13835
            '%BA' => 'º',
13836
            '%BB' => '»',
13837
            '%BC' => '¼',
13838
            '%BD' => '½',
13839
            '%BE' => '¾',
13840
            '%BF' => '¿',
13841
            '%C0' => 'À',
13842
            '%C1' => 'Á',
13843
            '%C2' => 'Â',
13844
            '%C3' => 'Ã',
13845
            '%C4' => 'Ä',
13846
            '%C5' => 'Å',
13847
            '%C6' => 'Æ',
13848
            '%C7' => 'Ç',
13849
            '%C8' => 'È',
13850
            '%C9' => 'É',
13851
            '%CA' => 'Ê',
13852
            '%CB' => 'Ë',
13853
            '%CC' => 'Ì',
13854
            '%CD' => 'Í',
13855
            '%CE' => 'Î',
13856
            '%CF' => 'Ï',
13857
            '%D0' => 'Ð',
13858
            '%D1' => 'Ñ',
13859
            '%D2' => 'Ò',
13860
            '%D3' => 'Ó',
13861
            '%D4' => 'Ô',
13862
            '%D5' => 'Õ',
13863
            '%D6' => 'Ö',
13864
            '%D7' => '×',
13865
            '%D8' => 'Ø',
13866
            '%D9' => 'Ù',
13867
            '%DA' => 'Ú',
13868
            '%DB' => 'Û',
13869
            '%DC' => 'Ü',
13870
            '%DD' => 'Ý',
13871
            '%DE' => 'Þ',
13872
            '%DF' => 'ß',
13873
            '%E0' => 'à',
13874
            '%E1' => 'á',
13875
            '%E2' => 'â',
13876
            '%E3' => 'ã',
13877
            '%E4' => 'ä',
13878
            '%E5' => 'å',
13879
            '%E6' => 'æ',
13880
            '%E7' => 'ç',
13881
            '%E8' => 'è',
13882
            '%E9' => 'é',
13883
            '%EA' => 'ê',
13884
            '%EB' => 'ë',
13885
            '%EC' => 'ì',
13886
            '%ED' => 'í',
13887
            '%EE' => 'î',
13888
            '%EF' => 'ï',
13889
            '%F0' => 'ð',
13890
            '%F1' => 'ñ',
13891
            '%F2' => 'ò',
13892
            '%F3' => 'ó',
13893
            '%F4' => 'ô',
13894
            '%F5' => 'õ',
13895
            '%F6' => 'ö',
13896
            '%F7' => '÷',
13897
            '%F8' => 'ø',
13898
            '%F9' => 'ù',
13899
            '%FA' => 'ú',
13900
            '%FB' => 'û',
13901
            '%FC' => 'ü',
13902
            '%FD' => 'ý',
13903
            '%FE' => 'þ',
13904
            '%FF' => 'ÿ',
13905
        ];
13906
    }
13907
13908
    /**
13909
     * Decodes a UTF-8 string to ISO-8859-1.
13910
     *
13911
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13912
     *
13913
     * @param string $str             <p>The input string.</p>
13914
     * @param bool   $keep_utf8_chars
13915
     *
13916
     * @psalm-pure
13917
     *
13918
     * @return string
13919
     *
13920
     * @noinspection SuspiciousBinaryOperationInspection
13921
     */
13922 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13923
    {
13924 14
        if ($str === '') {
13925 6
            return '';
13926
        }
13927
13928
        // save for later comparision
13929 14
        $str_backup = $str;
13930 14
        $len = \strlen($str);
13931
13932 14
        if (self::$ORD === null) {
13933
            self::$ORD = self::getData('ord');
13934
        }
13935
13936 14
        if (self::$CHR === null) {
13937
            self::$CHR = self::getData('chr');
13938
        }
13939
13940 14
        $no_char_found = '?';
13941
        /** @noinspection ForeachInvariantsInspection */
13942 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13943 14
            switch ($str[$i] & "\xF0") {
13944 14
                case "\xC0":
13945 13
                case "\xD0":
13946 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13947 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13948
13949 13
                    break;
13950
13951
                /** @noinspection PhpMissingBreakStatementInspection */
13952 13
                case "\xF0":
13953
                    ++$i;
13954
13955
                // no break
13956
13957 13
                case "\xE0":
13958 11
                    $str[$j] = $no_char_found;
13959 11
                    $i += 2;
13960
13961 11
                    break;
13962
13963
                default:
13964 12
                    $str[$j] = $str[$i];
13965
            }
13966
        }
13967
13968
        /** @var false|string $return - needed for PhpStan (stubs error) */
13969 14
        $return = \substr($str, 0, $j);
13970 14
        if ($return === false) {
13971
            $return = '';
13972
        }
13973
13974
        if (
13975 14
            $keep_utf8_chars
13976
            &&
13977 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13978
        ) {
13979 2
            return $str_backup;
13980
        }
13981
13982 14
        return $return;
13983
    }
13984
13985
    /**
13986
     * Encodes an ISO-8859-1 string to UTF-8.
13987
     *
13988
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13989
     *
13990
     * @param string $str <p>The input string.</p>
13991
     *
13992
     * @psalm-pure
13993
     *
13994
     * @return string
13995
     */
13996 16
    public static function utf8_encode(string $str): string
13997
    {
13998 16
        if ($str === '') {
13999 14
            return '';
14000
        }
14001
14002
        /** @var false|string $str - the polyfill maybe return false */
14003 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

14003
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
14004
14005
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
14006
        /** @psalm-suppress TypeDoesNotContainType */
14007 16
        if ($str === false) {
14008
            return '';
14009
        }
14010
14011 16
        return $str;
14012
    }
14013
14014
    /**
14015
     * fix -> utf8-win1252 chars
14016
     *
14017
     * @param string $str <p>The input string.</p>
14018
     *
14019
     * @psalm-pure
14020
     *
14021
     * @return string
14022
     *
14023
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
14024
     */
14025 2
    public static function utf8_fix_win1252_chars(string $str): string
14026
    {
14027 2
        return self::fix_simple_utf8($str);
14028
    }
14029
14030
    /**
14031
     * Returns an array with all utf8 whitespace characters.
14032
     *
14033
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14034
     *
14035
     * @psalm-pure
14036
     *
14037
     * @return string[]
14038
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14039
     *                  as defined in above URL
14040
     */
14041 2
    public static function whitespace_table(): array
14042
    {
14043 2
        return self::$WHITESPACE_TABLE;
14044
    }
14045
14046
    /**
14047
     * Limit the number of words in a string.
14048
     *
14049
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14050
     *
14051
     * @param string $str        <p>The input string.</p>
14052
     * @param int    $limit      <p>The limit of words as integer.</p>
14053
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14054
     *
14055
     * @psalm-pure
14056
     *
14057
     * @return string
14058
     */
14059 2
    public static function words_limit(
14060
        string $str,
14061
        int $limit = 100,
14062
        string $str_add_on = '…'
14063
    ): string {
14064 2
        if ($str === '' || $limit < 1) {
14065 2
            return '';
14066
        }
14067
14068 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14069
14070
        if (
14071 2
            !isset($matches[0])
14072
            ||
14073 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14074
        ) {
14075 2
            return $str;
14076
        }
14077
14078 2
        return \rtrim($matches[0]) . $str_add_on;
14079
    }
14080
14081
    /**
14082
     * Wraps a string to a given number of characters
14083
     *
14084
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14085
     *
14086
     * @see http://php.net/manual/en/function.wordwrap.php
14087
     *
14088
     * @param string $str   <p>The input string.</p>
14089
     * @param int    $width [optional] <p>The column width.</p>
14090
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14091
     * @param bool   $cut   [optional] <p>
14092
     *                      If the cut is set to true, the string is
14093
     *                      always wrapped at or before the specified width. So if you have
14094
     *                      a word that is larger than the given width, it is broken apart.
14095
     *                      </p>
14096
     *
14097
     * @psalm-pure
14098
     *
14099
     * @return string
14100
     *                <p>The given string wrapped at the specified column.</p>
14101
     */
14102 12
    public static function wordwrap(
14103
        string $str,
14104
        int $width = 75,
14105
        string $break = "\n",
14106
        bool $cut = false
14107
    ): string {
14108 12
        if ($str === '' || $break === '') {
14109 4
            return '';
14110
        }
14111
14112 10
        $str_split = \explode($break, $str);
14113 10
        if ($str_split === false) {
14114
            return '';
14115
        }
14116
14117
        /** @var string[] $charsArray */
14118 10
        $charsArray = [];
14119 10
        $word_split = '';
14120 10
        foreach ($str_split as $i => $i_value) {
14121 10
            if ($i) {
14122 3
                $charsArray[] = $break;
14123 3
                $word_split .= '#';
14124
            }
14125
14126 10
            foreach (self::str_split($i_value) as $c) {
14127 10
                $charsArray[] = $c;
14128 10
                if ($c === ' ') {
14129 3
                    $word_split .= ' ';
14130
                } else {
14131 10
                    $word_split .= '?';
14132
                }
14133
            }
14134
        }
14135
14136 10
        $str_return = '';
14137 10
        $j = 0;
14138 10
        $b = -1;
14139 10
        $i = -1;
14140 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14141
14142 10
        $max = \mb_strlen($word_split);
14143 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14144 8
            for (++$i; $i < $b; ++$i) {
14145 8
                if (isset($charsArray[$j])) {
14146 8
                    $str_return .= $charsArray[$j];
14147 8
                    unset($charsArray[$j]);
14148
                }
14149 8
                ++$j;
14150
14151
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14152 8
                if ($i > $max) {
14153
                    break 2;
14154
                }
14155
            }
14156
14157
            if (
14158 8
                $break === $charsArray[$j]
14159
                ||
14160 8
                $charsArray[$j] === ' '
14161
            ) {
14162 5
                unset($charsArray[$j++]);
14163
            }
14164
14165 8
            $str_return .= $break;
14166
14167
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14168 8
            if ($b > $max) {
14169
                break;
14170
            }
14171
        }
14172
14173 10
        return $str_return . \implode('', $charsArray);
14174
    }
14175
14176
    /**
14177
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14178
     *    ... so that we wrap the per line.
14179
     *
14180
     * @param string      $str             <p>The input string.</p>
14181
     * @param int         $width           [optional] <p>The column width.</p>
14182
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14183
     * @param bool        $cut             [optional] <p>
14184
     *                                     If the cut is set to true, the string is
14185
     *                                     always wrapped at or before the specified width. So if you have
14186
     *                                     a word that is larger than the given width, it is broken apart.
14187
     *                                     </p>
14188
     * @param bool        $add_final_break [optional] <p>
14189
     *                                     If this flag is true, then the method will add a $break at the end
14190
     *                                     of the result string.
14191
     *                                     </p>
14192
     * @param string|null $delimiter       [optional] <p>
14193
     *                                     You can change the default behavior, where we split the string by newline.
14194
     *                                     </p>
14195
     *
14196
     * @psalm-pure
14197
     *
14198
     * @return string
14199
     */
14200 1
    public static function wordwrap_per_line(
14201
        string $str,
14202
        int $width = 75,
14203
        string $break = "\n",
14204
        bool $cut = false,
14205
        bool $add_final_break = true,
14206
        string $delimiter = null
14207
    ): string {
14208 1
        if ($delimiter === null) {
14209 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14210
        } else {
14211 1
            $strings = \explode($delimiter, $str);
14212
        }
14213
14214 1
        $string_helper_array = [];
14215 1
        if ($strings !== false) {
14216 1
            foreach ($strings as $value) {
14217 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14218
            }
14219
        }
14220
14221 1
        if ($add_final_break) {
14222 1
            $final_break = $break;
14223
        } else {
14224 1
            $final_break = '';
14225
        }
14226
14227 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14228
    }
14229
14230
    /**
14231
     * Returns an array of Unicode White Space characters.
14232
     *
14233
     * @psalm-pure
14234
     *
14235
     * @return string[]
14236
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14237
     */
14238 2
    public static function ws(): array
14239
    {
14240 2
        return self::$WHITESPACE;
14241
    }
14242
14243
    /**
14244
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14245
     *
14246
     * EXAMPLE: <code>
14247
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14248
     * //
14249
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14250
     * </code>
14251
     *
14252
     * @see          http://hsivonen.iki.fi/php-utf8/
14253
     *
14254
     * @param string $str    <p>The string to be checked.</p>
14255
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14256
     *
14257
     * @psalm-pure
14258
     *
14259
     * @return bool
14260
     *
14261
     * @noinspection ReturnTypeCanBeDeclaredInspection
14262
     */
14263 110
    private static function is_utf8_string(string $str, bool $strict = false)
14264
    {
14265 110
        if ($str === '') {
14266 15
            return true;
14267
        }
14268
14269 103
        if ($strict) {
14270 2
            $is_binary = self::is_binary($str, true);
14271
14272 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14273 2
                return false;
14274
            }
14275
14276
            if ($is_binary && self::is_utf32($str, false) !== false) {
14277
                return false;
14278
            }
14279
        }
14280
14281 103
        if (self::$SUPPORT['pcre_utf8']) {
14282
            // If even just the first character can be matched, when the /u
14283
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14284
            // invalid, nothing at all will match, even if the string contains
14285
            // some valid sequences
14286 103
            return \preg_match('/^./us', $str) === 1;
14287
        }
14288
14289 2
        $mState = 0; // cached expected number of octets after the current octet
14290
        // until the beginning of the next UTF8 character sequence
14291 2
        $mUcs4 = 0; // cached Unicode character
14292 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14293
14294 2
        if (self::$ORD === null) {
14295
            self::$ORD = self::getData('ord');
14296
        }
14297
14298 2
        $len = \strlen($str);
14299
        /** @noinspection ForeachInvariantsInspection */
14300 2
        for ($i = 0; $i < $len; ++$i) {
14301 2
            $in = self::$ORD[$str[$i]];
14302
14303 2
            if ($mState === 0) {
14304
                // When mState is zero we expect either a US-ASCII character or a
14305
                // multi-octet sequence.
14306 2
                if ((0x80 & $in) === 0) {
14307
                    // US-ASCII, pass straight through.
14308 2
                    $mBytes = 1;
14309 2
                } elseif ((0xE0 & $in) === 0xC0) {
14310
                    // First octet of 2 octet sequence.
14311 2
                    $mUcs4 = $in;
14312 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14313 2
                    $mState = 1;
14314 2
                    $mBytes = 2;
14315 2
                } elseif ((0xF0 & $in) === 0xE0) {
14316
                    // First octet of 3 octet sequence.
14317 2
                    $mUcs4 = $in;
14318 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14319 2
                    $mState = 2;
14320 2
                    $mBytes = 3;
14321
                } elseif ((0xF8 & $in) === 0xF0) {
14322
                    // First octet of 4 octet sequence.
14323
                    $mUcs4 = $in;
14324
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14325
                    $mState = 3;
14326
                    $mBytes = 4;
14327
                } elseif ((0xFC & $in) === 0xF8) {
14328
                    /* First octet of 5 octet sequence.
14329
                     *
14330
                     * This is illegal because the encoded codepoint must be either
14331
                     * (a) not the shortest form or
14332
                     * (b) outside the Unicode range of 0-0x10FFFF.
14333
                     * Rather than trying to resynchronize, we will carry on until the end
14334
                     * of the sequence and let the later error handling code catch it.
14335
                     */
14336
                    $mUcs4 = $in;
14337
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14338
                    $mState = 4;
14339
                    $mBytes = 5;
14340
                } elseif ((0xFE & $in) === 0xFC) {
14341
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14342
                    $mUcs4 = $in;
14343
                    $mUcs4 = ($mUcs4 & 1) << 30;
14344
                    $mState = 5;
14345
                    $mBytes = 6;
14346
                } else {
14347
                    // Current octet is neither in the US-ASCII range nor a legal first
14348
                    // octet of a multi-octet sequence.
14349 2
                    return false;
14350
                }
14351 2
            } elseif ((0xC0 & $in) === 0x80) {
14352
14353
                // When mState is non-zero, we expect a continuation of the multi-octet
14354
                // sequence
14355
14356
                // Legal continuation.
14357 2
                $shift = ($mState - 1) * 6;
14358 2
                $tmp = $in;
14359 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14360 2
                $mUcs4 |= $tmp;
14361
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14362
                // Unicode code point to be output.
14363 2
                if (--$mState === 0) {
14364
                    // Check for illegal sequences and code points.
14365
                    //
14366
                    // From Unicode 3.1, non-shortest form is illegal
14367
                    if (
14368 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14369
                        ||
14370 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14371
                        ||
14372 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14373
                        ||
14374 2
                        ($mBytes > 4)
14375
                        ||
14376
                        // From Unicode 3.2, surrogate characters are illegal.
14377 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14378
                        ||
14379
                        // Code points outside the Unicode range are illegal.
14380 2
                        ($mUcs4 > 0x10FFFF)
14381
                    ) {
14382
                        return false;
14383
                    }
14384
                    // initialize UTF8 cache
14385 2
                    $mState = 0;
14386 2
                    $mUcs4 = 0;
14387 2
                    $mBytes = 1;
14388
                }
14389
            } else {
14390
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14391
                // Incomplete multi-octet sequence.
14392
                return false;
14393
            }
14394
        }
14395
14396 2
        return $mState === 0;
14397
    }
14398
14399
    /**
14400
     * @param string $str
14401
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14402
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14403
     *
14404
     * @psalm-pure
14405
     *
14406
     * @return string
14407
     *
14408
     * @noinspection ReturnTypeCanBeDeclaredInspection
14409
     */
14410 33
    private static function fixStrCaseHelper(
14411
        string $str,
14412
        bool $use_lowercase = false,
14413
        bool $use_full_case_fold = false
14414
    ) {
14415 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14416 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14417
14418 33
        if ($use_lowercase) {
14419 2
            $str = \str_replace(
14420 2
                $upper,
14421 2
                $lower,
14422 2
                $str
14423
            );
14424
        } else {
14425 31
            $str = \str_replace(
14426 31
                $lower,
14427 31
                $upper,
14428 31
                $str
14429
            );
14430
        }
14431
14432 33
        if ($use_full_case_fold) {
14433
            /**
14434
             * @psalm-suppress ImpureStaticVariable
14435
             *
14436
             * @var array<mixed>|null
14437
             */
14438 31
            static $FULL_CASE_FOLD = null;
14439 31
            if ($FULL_CASE_FOLD === null) {
14440 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14441
            }
14442
14443 31
            if ($use_lowercase) {
14444 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14445
            } else {
14446 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14447
            }
14448
        }
14449
14450 33
        return $str;
14451
    }
14452
14453
    /**
14454
     * get data from "/data/*.php"
14455
     *
14456
     * @param string $file
14457
     *
14458
     * @psalm-pure
14459
     *
14460
     * @return array
14461
     *
14462
     * @noinspection ReturnTypeCanBeDeclaredInspection
14463
     */
14464 6
    private static function getData(string $file)
14465
    {
14466
        /** @noinspection PhpIncludeInspection */
14467
        /** @noinspection UsingInclusionReturnValueInspection */
14468
        /** @psalm-suppress UnresolvableInclude */
14469 6
        return include __DIR__ . '/data/' . $file . '.php';
14470
    }
14471
14472
    /**
14473
     * @psalm-pure
14474
     *
14475
     * @return true|null
14476
     */
14477 12
    private static function initEmojiData()
14478
    {
14479 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14480 1
            if (self::$EMOJI === null) {
14481 1
                self::$EMOJI = self::getData('emoji');
14482
            }
14483
14484
            /**
14485
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14486
             */
14487 1
            \uksort(
14488 1
                self::$EMOJI,
14489
                static function (string $a, string $b): int {
14490 1
                    return \strlen($b) <=> \strlen($a);
14491 1
                }
14492
            );
14493
14494 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14495 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14496
14497 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14498 1
                $tmp_key = \crc32($key);
14499 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14500
            }
14501
14502 1
            return true;
14503
        }
14504
14505 12
        return null;
14506
    }
14507
14508
    /**
14509
     * Checks whether mbstring "overloaded" is active on the server.
14510
     *
14511
     * @psalm-pure
14512
     *
14513
     * @return bool
14514
     *
14515
     * @noinspection ReturnTypeCanBeDeclaredInspection
14516
     */
14517
    private static function mbstring_overloaded()
14518
    {
14519
        /**
14520
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14521
         */
14522
14523
        /** @noinspection PhpComposerExtensionStubsInspection */
14524
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14525
        return \defined('MB_OVERLOAD_STRING')
14526
               &&
14527
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14528
    }
14529
14530
    /**
14531
     * @param array    $strings
14532
     * @param bool     $remove_empty_values
14533
     * @param int|null $remove_short_values
14534
     *
14535
     * @psalm-pure
14536
     *
14537
     * @return array
14538
     *
14539
     * @noinspection ReturnTypeCanBeDeclaredInspection
14540
     */
14541 2
    private static function reduce_string_array(
14542
        array $strings,
14543
        bool $remove_empty_values,
14544
        int $remove_short_values = null
14545
    ) {
14546
        // init
14547 2
        $return = [];
14548
14549 2
        foreach ($strings as &$str) {
14550
            if (
14551 2
                $remove_short_values !== null
14552
                &&
14553 2
                \mb_strlen($str) <= $remove_short_values
14554
            ) {
14555 2
                continue;
14556
            }
14557
14558
            if (
14559 2
                $remove_empty_values
14560
                &&
14561 2
                \trim($str) === ''
14562
            ) {
14563 2
                continue;
14564
            }
14565
14566 2
            $return[] = $str;
14567
        }
14568
14569 2
        return $return;
14570
    }
14571
14572
    /**
14573
     * rxClass
14574
     *
14575
     * @param string $s
14576
     * @param string $class
14577
     *
14578
     * @psalm-pure
14579
     *
14580
     * @return string
14581
     *
14582
     * @noinspection ReturnTypeCanBeDeclaredInspection
14583
     */
14584 33
    private static function rxClass(string $s, string $class = '')
14585
    {
14586
        /**
14587
         * @psalm-suppress ImpureStaticVariable
14588
         *
14589
         * @var array<string,string>
14590
         */
14591 33
        static $RX_CLASS_CACHE = [];
14592
14593 33
        $cache_key = $s . '_' . $class;
14594
14595 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14596 21
            return $RX_CLASS_CACHE[$cache_key];
14597
        }
14598
14599
        /** @var string[] $class_array */
14600 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14601
14602
        /** @noinspection SuspiciousLoopInspection */
14603
        /** @noinspection AlterInForeachInspection */
14604 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14605 15
            if ($s === '-') {
14606
                $class_array[0] = '-' . $class_array[0];
14607 15
            } elseif (!isset($s[2])) {
14608 15
                $class_array[0] .= \preg_quote($s, '/');
14609 1
            } elseif (self::strlen($s) === 1) {
14610 1
                $class_array[0] .= $s;
14611
            } else {
14612 15
                $class_array[] = $s;
14613
            }
14614
        }
14615
14616 16
        if ($class_array[0]) {
14617 16
            $class_array[0] = '[' . $class_array[0] . ']';
14618
        }
14619
14620 16
        if (\count($class_array) === 1) {
14621 16
            $return = $class_array[0];
14622
        } else {
14623
            $return = '(?:' . \implode('|', $class_array) . ')';
14624
        }
14625
14626 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14627
14628 16
        return $return;
14629
    }
14630
14631
    /**
14632
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14633
     *
14634
     * @param string $names
14635
     * @param string $delimiter
14636
     * @param string $encoding
14637
     *
14638
     * @psalm-pure
14639
     *
14640
     * @return string
14641
     *
14642
     * @noinspection ReturnTypeCanBeDeclaredInspection
14643
     */
14644 1
    private static function str_capitalize_name_helper(
14645
        string $names,
14646
        string $delimiter,
14647
        string $encoding = 'UTF-8'
14648
    ) {
14649
        // init
14650 1
        $name_helper_array = \explode($delimiter, $names);
14651 1
        if ($name_helper_array === false) {
14652
            return '';
14653
        }
14654
14655
        $special_cases = [
14656 1
            'names' => [
14657
                'ab',
14658
                'af',
14659
                'al',
14660
                'and',
14661
                'ap',
14662
                'bint',
14663
                'binte',
14664
                'da',
14665
                'de',
14666
                'del',
14667
                'den',
14668
                'der',
14669
                'di',
14670
                'dit',
14671
                'ibn',
14672
                'la',
14673
                'mac',
14674
                'nic',
14675
                'of',
14676
                'ter',
14677
                'the',
14678
                'und',
14679
                'van',
14680
                'von',
14681
                'y',
14682
                'zu',
14683
            ],
14684
            'prefixes' => [
14685
                'al-',
14686
                "d'",
14687
                'ff',
14688
                "l'",
14689
                'mac',
14690
                'mc',
14691
                'nic',
14692
            ],
14693
        ];
14694
14695 1
        foreach ($name_helper_array as &$name) {
14696 1
            if (\in_array($name, $special_cases['names'], true)) {
14697 1
                continue;
14698
            }
14699
14700 1
            $continue = false;
14701
14702 1
            if ($delimiter === '-') {
14703
                /** @noinspection AlterInForeachInspection */
14704 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14705 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14706 1
                        $continue = true;
14707
14708 1
                        break;
14709
                    }
14710
                }
14711
            }
14712
14713
            /** @noinspection AlterInForeachInspection */
14714 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14715 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14716 1
                    $continue = true;
14717
14718 1
                    break;
14719
                }
14720
            }
14721
14722 1
            if ($continue) {
14723 1
                continue;
14724
            }
14725
14726 1
            $name = self::ucfirst($name);
14727
        }
14728
14729 1
        return \implode($delimiter, $name_helper_array);
14730
    }
14731
14732
    /**
14733
     * Generic case-sensitive transformation for collation matching.
14734
     *
14735
     * @param string $str <p>The input string</p>
14736
     *
14737
     * @psalm-pure
14738
     *
14739
     * @return string|null
14740
     */
14741 6
    private static function strtonatfold(string $str)
14742
    {
14743 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14744
        /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
14745 6
        if ($str === false) {
14746 2
            return '';
14747
        }
14748
14749
        /** @noinspection PhpUndefinedClassInspection */
14750 6
        return \preg_replace(
14751 6
            '/\p{Mn}+/u',
14752 6
            '',
14753 6
            $str
14754
        );
14755
    }
14756
14757
    /**
14758
     * @param int|string $input
14759
     *
14760
     * @psalm-pure
14761
     *
14762
     * @return string
14763
     *
14764
     * @noinspection ReturnTypeCanBeDeclaredInspection
14765
     * @noinspection SuspiciousBinaryOperationInspection
14766
     */
14767 32
    private static function to_utf8_convert_helper($input)
14768
    {
14769
        // init
14770 32
        $buf = '';
14771
14772 32
        if (self::$ORD === null) {
14773 1
            self::$ORD = self::getData('ord');
14774
        }
14775
14776 32
        if (self::$CHR === null) {
14777 1
            self::$CHR = self::getData('chr');
14778
        }
14779
14780 32
        if (self::$WIN1252_TO_UTF8 === null) {
14781 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14782
        }
14783
14784 32
        $ordC1 = self::$ORD[$input];
14785 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14786 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14787
        } else {
14788
            /** @noinspection OffsetOperationsInspection */
14789 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14790 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14791 1
            $buf .= $cc1 . $cc2;
14792
        }
14793
14794 32
        return $buf;
14795
    }
14796
14797
    /**
14798
     * @param string $str
14799
     *
14800
     * @psalm-pure
14801
     *
14802
     * @return string
14803
     *
14804
     * @noinspection ReturnTypeCanBeDeclaredInspection
14805
     */
14806 10
    private static function urldecode_unicode_helper(string $str)
14807
    {
14808 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14809 10
        if (\preg_match($pattern, $str)) {
14810 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14811
        }
14812
14813 10
        return $str;
14814
    }
14815
}
14816