Passed
Push — master ( 68aebc...f14ed6 )
by Lars
06:31
created

UTF8   F

Complexity

Total Complexity 1782

Size/Duplication

Total Lines 14814
Duplicated Lines 0 %

Test Coverage

Coverage 80.45%

Importance

Changes 108
Bugs 53 Features 6
Metric Value
eloc 4538
c 108
b 53
f 6
dl 0
loc 14814
ccs 3065
cts 3810
cp 0.8045
rs 0.8
wmc 1782

310 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 4 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A get_unique_string() 0 22 3
A encode_mimeheader() 0 26 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A isBinary() 0 3 1
A emoji_decode() 0 18 2
D chr() 0 109 19
A html_escape() 0 6 1
B get_file_type() 0 65 7
A chr_to_int() 0 3 1
A isHtml() 0 3 1
C filter() 0 57 12
A isBase64() 0 3 1
A decode_mimeheader() 0 8 3
A html_decode() 0 6 1
A isUtf32() 0 3 1
A chunk_split() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A css_identifier() 0 56 6
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A decimal_to_chr() 0 3 1
A has_whitespace() 0 8 2
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A intlChar_loaded() 0 3 1
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
F extract_text() 0 175 34
A isBom() 0 3 1
A int_to_chr() 0 3 1
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A isAscii() 0 3 1
A filter_var() 0 15 2
A isUtf16() 0 3 1
F encode() 0 147 37
A is_alphanumeric() 0 8 2
A fix_simple_utf8() 0 32 4
A checkForSupport() 0 48 4
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A isJson() 0 3 1
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A is_punctuation() 0 3 1
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 59 13
B file_get_contents() 0 56 11
A emoji_from_country_code() 0 17 3
A chr_size_list() 0 17 3
B html_encode() 0 54 11
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A is_utf8() 0 13 4
A lcword() 0 13 1
C is_utf16() 0 71 16
A is_html() 0 14 2
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A is_blank() 0 8 2
A lowerCaseFirst() 0 13 1
B is_binary() 0 38 9
A lcfirst() 0 44 5
B is_url() 0 44 7
A is_binary_file() 0 16 4
A json_loaded() 0 3 1
A is_lowercase() 0 8 2
A lcwords() 0 34 6
A is_empty() 0 3 1
C is_utf32() 0 71 16
A json_decode() 0 14 2
A is_printable() 0 3 1
B is_json() 0 27 8
A json_encode() 0 10 2
A is_base64() 0 17 5
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 72 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
B str_obfuscate() 0 47 8
A string() 0 16 4
D normalize_encoding() 0 147 16
B rxClass() 0 45 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 31 5
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 72 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 51 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 138 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 136 31
A remove_html_breaks() 0 3 1
A showSupport() 0 17 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 15 3
A to_utf8() 0 14 3
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 13 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C ord() 0 77 16
B to_string() 0 33 8
A strtonatfold() 0 13 2
C strcspn() 0 49 12
A fixStrCaseHelper() 0 41 5
B str_split_pattern() 0 49 11
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 138 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 7 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @noinspection OffsetOperationsInspection */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089 19
                $clean_utf8,
1090 19
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        self::initEmojiData();
1300
1301 9
        if ($use_reversible_string_mappings) {
1302 9
            return (string) \str_replace(
1303 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1304 9
                (array) self::$EMOJI_VALUES_CACHE,
1305 9
                $str
1306
            );
1307
        }
1308
1309 1
        return (string) \str_replace(
1310 1
            (array) self::$EMOJI_KEYS_CACHE,
1311 1
            (array) self::$EMOJI_VALUES_CACHE,
1312 1
            $str
1313
        );
1314
    }
1315
1316
    /**
1317
     * Encode a string with emoji chars into a non-emoji string.
1318
     *
1319
     * INFO: opposite to UTF8::emoji_decode()
1320
     *
1321
     * EXAMPLE: <code>
1322
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1323
     * //
1324
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1325
     * </code>
1326
     *
1327
     * @param string $str                            <p>The input string</p>
1328
     * @param bool   $use_reversible_string_mappings [optional] <p>
1329
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1330
     *                                               between "emoji_encode" and "emoji_decode"</p>
1331
     *
1332
     * @psalm-pure
1333
     *
1334
     * @return string
1335
     */
1336 12
    public static function emoji_encode(
1337
        string $str,
1338
        bool $use_reversible_string_mappings = false
1339
    ): string {
1340 12
        self::initEmojiData();
1341
1342 12
        if ($use_reversible_string_mappings) {
1343 9
            return (string) \str_replace(
1344 9
                (array) self::$EMOJI_VALUES_CACHE,
1345 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1346 9
                $str
1347
            );
1348
        }
1349
1350 4
        return (string) \str_replace(
1351 4
            (array) self::$EMOJI_VALUES_CACHE,
1352 4
            (array) self::$EMOJI_KEYS_CACHE,
1353 4
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Encode a string with a new charset-encoding.
1359
     *
1360
     * INFO:  This function will also try to fix broken / double encoding,
1361
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1362
     *
1363
     * EXAMPLE: <code>
1364
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1365
     * //
1366
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1367
     * //
1368
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1369
     * //
1370
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1371
     * </code>
1372
     *
1373
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1374
     * @param string $str                           <p>The input string</p>
1375
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1376
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1377
     *                                              string-encoding</p>
1378
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1379
     *                                              A empty string will trigger the autodetect anyway.</p>
1380
     *
1381
     * @psalm-pure
1382
     *
1383
     * @return string
1384
     *
1385
     * @psalm-suppress InvalidReturnStatement
1386
     */
1387 29
    public static function encode(
1388
        string $to_encoding,
1389
        string $str,
1390
        bool $auto_detect_the_from_encoding = true,
1391
        string $from_encoding = ''
1392
    ): string {
1393 29
        if ($str === '' || $to_encoding === '') {
1394 13
            return $str;
1395
        }
1396
1397 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1398 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1399
        }
1400
1401 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1402 2
            $from_encoding = self::normalize_encoding($from_encoding);
1403
        }
1404
1405
        if (
1406 29
            $to_encoding
1407
            &&
1408 29
            $from_encoding
1409
            &&
1410 29
            $from_encoding === $to_encoding
1411
        ) {
1412
            return $str;
1413
        }
1414
1415 29
        if ($to_encoding === 'JSON') {
1416 1
            $return = self::json_encode($str);
1417 1
            if ($return === false) {
1418
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1419
            }
1420
1421 1
            return $return;
1422
        }
1423 29
        if ($from_encoding === 'JSON') {
1424 1
            $str = self::json_decode($str);
1425 1
            $from_encoding = '';
1426
        }
1427
1428 29
        if ($to_encoding === 'BASE64') {
1429 2
            return \base64_encode($str);
1430
        }
1431 29
        if ($from_encoding === 'BASE64') {
1432 2
            $str = \base64_decode($str, true);
1433 2
            $from_encoding = '';
1434
        }
1435
1436 29
        if ($to_encoding === 'HTML-ENTITIES') {
1437 2
            return self::html_encode($str, true);
1438
        }
1439 29
        if ($from_encoding === 'HTML-ENTITIES') {
1440 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1441 2
            $from_encoding = '';
1442
        }
1443
1444 29
        $from_encoding_auto_detected = false;
1445
        if (
1446 29
            $auto_detect_the_from_encoding
1447
            ||
1448 29
            !$from_encoding
1449
        ) {
1450 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1451
        }
1452
1453
        // DEBUG
1454
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1455
1456 29
        if ($from_encoding_auto_detected !== false) {
1457
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1458 25
            $from_encoding = $from_encoding_auto_detected;
1459 7
        } elseif ($auto_detect_the_from_encoding) {
1460
            // fallback for the "autodetect"-mode
1461 7
            return self::to_utf8($str);
1462
        }
1463
1464
        if (
1465 25
            !$from_encoding
1466
            ||
1467 25
            $from_encoding === $to_encoding
1468
        ) {
1469 15
            return $str;
1470
        }
1471
1472
        if (
1473 20
            $to_encoding === 'UTF-8'
1474
            &&
1475
            (
1476 18
                $from_encoding === 'WINDOWS-1252'
1477
                ||
1478 20
                $from_encoding === 'ISO-8859-1'
1479
            )
1480
        ) {
1481 14
            return self::to_utf8($str);
1482
        }
1483
1484
        if (
1485 12
            $to_encoding === 'ISO-8859-1'
1486
            &&
1487
            (
1488 6
                $from_encoding === 'WINDOWS-1252'
1489
                ||
1490 12
                $from_encoding === 'UTF-8'
1491
            )
1492
        ) {
1493 6
            return self::to_iso8859($str);
1494
        }
1495
1496
        /** @noinspection InArrayCanBeUsedInspection */
1497
        if (
1498 10
            $to_encoding !== 'UTF-8'
1499
            &&
1500 10
            $to_encoding !== 'ISO-8859-1'
1501
            &&
1502 10
            $to_encoding !== 'WINDOWS-1252'
1503
            &&
1504 10
            self::$SUPPORT['mbstring'] === false
1505
        ) {
1506
            /**
1507
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1508
             */
1509
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1510
        }
1511
1512 10
        if (self::$SUPPORT['mbstring'] === true) {
1513
            // warning: do not use the symfony polyfill here
1514 10
            $str_encoded = \mb_convert_encoding(
1515 10
                $str,
1516 10
                $to_encoding,
1517 10
                $from_encoding
1518
            );
1519
1520 10
            if ($str_encoded) {
1521 10
                \assert(\is_string($str_encoded));
1522
1523 10
                return $str_encoded;
1524
            }
1525
        }
1526
1527
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1528
        $return = @\iconv($from_encoding, $to_encoding, $str);
1529
        if ($return !== false) {
1530
            return $return;
1531
        }
1532
1533
        return $str;
1534
    }
1535
1536
    /**
1537
     * @param string $str
1538
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1539
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1540
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1541
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1542
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1543
     *
1544
     * @psalm-pure
1545
     *
1546
     * @return false|string
1547
     *                      <p>An encoded MIME field on success,
1548
     *                      or false if an error occurs during the encoding.</p>
1549
     */
1550 1
    public static function encode_mimeheader(
1551
        string $str,
1552
        string $from_charset = 'UTF-8',
1553
        string $to_charset = 'UTF-8',
1554
        string $transfer_encoding = 'Q',
1555
        string $linefeed = "\r\n",
1556
        int $indent = 76
1557
    ) {
1558 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1559
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1560
        }
1561
1562 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1563 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1564
        }
1565
1566
        // always fallback via symfony polyfill
1567 1
        return \iconv_mime_encode(
1568 1
            '',
1569 1
            $str,
1570
            [
1571 1
                'scheme'           => $transfer_encoding,
1572 1
                'line-length'      => $indent,
1573 1
                'input-charset'    => $from_charset,
1574 1
                'output-charset'   => $to_charset,
1575 1
                'line-break-chars' => $linefeed,
1576
            ]
1577
        );
1578
    }
1579
1580
    /**
1581
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1582
     *
1583
     * @param string   $str                       <p>The input string.</p>
1584
     * @param string   $search                    <p>The searched string.</p>
1585
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1586
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1587
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1588
     *
1589
     * @psalm-pure
1590
     *
1591
     * @return string
1592
     */
1593 1
    public static function extract_text(
1594
        string $str,
1595
        string $search = '',
1596
        int $length = null,
1597
        string $replacer_for_skipped_text = '…',
1598
        string $encoding = 'UTF-8'
1599
    ): string {
1600 1
        if ($str === '') {
1601 1
            return '';
1602
        }
1603
1604 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1605
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1606
        }
1607
1608 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1609
1610 1
        if ($length === null) {
1611 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1612
        }
1613
1614 1
        if ($search === '') {
1615 1
            if ($encoding === 'UTF-8') {
1616 1
                if ($length > 0) {
1617 1
                    $string_length = (int) \mb_strlen($str);
1618 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1619
                } else {
1620 1
                    $end = 0;
1621
                }
1622
1623 1
                $pos = (int) \min(
1624 1
                    \mb_strpos($str, ' ', $end),
1625 1
                    \mb_strpos($str, '.', $end)
1626
                );
1627
            } else {
1628
                if ($length > 0) {
1629
                    $string_length = (int) self::strlen($str, $encoding);
1630
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1631
                } else {
1632
                    $end = 0;
1633
                }
1634
1635
                $pos = (int) \min(
1636
                    self::strpos($str, ' ', $end, $encoding),
1637
                    self::strpos($str, '.', $end, $encoding)
1638
                );
1639
            }
1640
1641 1
            if ($pos) {
1642 1
                if ($encoding === 'UTF-8') {
1643 1
                    $str_sub = \mb_substr($str, 0, $pos);
1644
                } else {
1645
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1646
                }
1647
1648 1
                if ($str_sub === false) {
1649
                    return '';
1650
                }
1651
1652 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1653
            }
1654
1655
            return $str;
1656
        }
1657
1658 1
        if ($encoding === 'UTF-8') {
1659 1
            $word_position = (int) \mb_stripos($str, $search);
1660 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1661
        } else {
1662
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1663
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1664
        }
1665
1666 1
        $pos_start = 0;
1667 1
        if ($half_side > 0) {
1668 1
            if ($encoding === 'UTF-8') {
1669 1
                $half_text = \mb_substr($str, 0, $half_side);
1670
            } else {
1671
                $half_text = self::substr($str, 0, $half_side, $encoding);
1672
            }
1673 1
            if ($half_text !== false) {
1674 1
                if ($encoding === 'UTF-8') {
1675 1
                    $pos_start = (int) \max(
1676 1
                        \mb_strrpos($half_text, ' '),
1677 1
                        \mb_strrpos($half_text, '.')
1678
                    );
1679
                } else {
1680
                    $pos_start = (int) \max(
1681
                        self::strrpos($half_text, ' ', 0, $encoding),
1682
                        self::strrpos($half_text, '.', 0, $encoding)
1683
                    );
1684
                }
1685
            }
1686
        }
1687
1688 1
        if ($word_position && $half_side > 0) {
1689 1
            $offset = $pos_start + $length - 1;
1690 1
            $real_length = (int) self::strlen($str, $encoding);
1691
1692 1
            if ($offset > $real_length) {
1693
                $offset = $real_length;
1694
            }
1695
1696 1
            if ($encoding === 'UTF-8') {
1697 1
                $pos_end = (int) \min(
1698 1
                    \mb_strpos($str, ' ', $offset),
1699 1
                    \mb_strpos($str, '.', $offset)
1700 1
                ) - $pos_start;
1701
            } else {
1702
                $pos_end = (int) \min(
1703
                    self::strpos($str, ' ', $offset, $encoding),
1704
                    self::strpos($str, '.', $offset, $encoding)
1705
                ) - $pos_start;
1706
            }
1707
1708 1
            if (!$pos_end || $pos_end <= 0) {
1709 1
                if ($encoding === 'UTF-8') {
1710 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1711
                } else {
1712
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1713
                }
1714 1
                if ($str_sub !== false) {
1715 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1716
                } else {
1717 1
                    $extract = '';
1718
                }
1719
            } else {
1720 1
                if ($encoding === 'UTF-8') {
1721 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1722
                } else {
1723
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1724
                }
1725 1
                if ($str_sub !== false) {
1726 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1727
                } else {
1728 1
                    $extract = '';
1729
                }
1730
            }
1731
        } else {
1732 1
            $offset = $length - 1;
1733 1
            $true_length = (int) self::strlen($str, $encoding);
1734
1735 1
            if ($offset > $true_length) {
1736
                $offset = $true_length;
1737
            }
1738
1739 1
            if ($encoding === 'UTF-8') {
1740 1
                $pos_end = (int) \min(
1741 1
                    \mb_strpos($str, ' ', $offset),
1742 1
                    \mb_strpos($str, '.', $offset)
1743
                );
1744
            } else {
1745
                $pos_end = (int) \min(
1746
                    self::strpos($str, ' ', $offset, $encoding),
1747
                    self::strpos($str, '.', $offset, $encoding)
1748
                );
1749
            }
1750
1751 1
            if ($pos_end) {
1752 1
                if ($encoding === 'UTF-8') {
1753 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1754
                } else {
1755
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1756
                }
1757 1
                if ($str_sub !== false) {
1758 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1759
                } else {
1760 1
                    $extract = '';
1761
                }
1762
            } else {
1763 1
                $extract = $str;
1764
            }
1765
        }
1766
1767 1
        return $extract;
1768
    }
1769
1770
    /**
1771
     * Reads entire file into a string.
1772
     *
1773
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1774
     *
1775
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1776
     *
1777
     * @see http://php.net/manual/en/function.file-get-contents.php
1778
     *
1779
     * @param string        $filename         <p>
1780
     *                                        Name of the file to read.
1781
     *                                        </p>
1782
     * @param bool          $use_include_path [optional] <p>
1783
     *                                        Prior to PHP 5, this parameter is called
1784
     *                                        use_include_path and is a bool.
1785
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1786
     *                                        to trigger include path
1787
     *                                        search.
1788
     *                                        </p>
1789
     * @param resource|null $context          [optional] <p>
1790
     *                                        A valid context resource created with
1791
     *                                        stream_context_create. If you don't need to use a
1792
     *                                        custom context, you can skip this parameter by &null;.
1793
     *                                        </p>
1794
     * @param int|null      $offset           [optional] <p>
1795
     *                                        The offset where the reading starts.
1796
     *                                        </p>
1797
     * @param int|null      $max_length       [optional] <p>
1798
     *                                        Maximum length of data read. The default is to read until end
1799
     *                                        of file is reached.
1800
     *                                        </p>
1801
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1802
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1803
     *                                        some files, because they used non default utf-8 chars. Binary files
1804
     *                                        like images or pdf will not be converted.</p>
1805
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1806
     *                                        A empty string will trigger the autodetect anyway.</p>
1807
     *
1808
     * @psalm-pure
1809
     *
1810
     * @return false|string
1811
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1812
     *
1813
     * @noinspection PhpTooManyParametersInspection
1814
     */
1815 12
    public static function file_get_contents(
1816
        string $filename,
1817
        bool $use_include_path = false,
1818
        $context = null,
1819
        int $offset = null,
1820
        int $max_length = null,
1821
        int $timeout = 10,
1822
        bool $convert_to_utf8 = true,
1823
        string $from_encoding = ''
1824
    ) {
1825
        // init
1826 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1827
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1828 12
        if ($filename === false) {
1829
            return false;
1830
        }
1831
1832 12
        if ($timeout && $context === null) {
1833 9
            $context = \stream_context_create(
1834
                [
1835
                    'http' => [
1836 9
                        'timeout' => $timeout,
1837
                    ],
1838
                ]
1839
            );
1840
        }
1841
1842 12
        if ($offset === null) {
1843 12
            $offset = 0;
1844
        }
1845
1846 12
        if (\is_int($max_length)) {
1847 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1848
        } else {
1849 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1850
        }
1851
1852
        // return false on error
1853 12
        if ($data === false) {
1854
            return false;
1855
        }
1856
1857 12
        if ($convert_to_utf8) {
1858
            if (
1859 12
                !self::is_binary($data, true)
1860
                ||
1861 9
                self::is_utf16($data, false) !== false
1862
                ||
1863 12
                self::is_utf32($data, false) !== false
1864
            ) {
1865 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1866 9
                $data = self::cleanup($data);
1867
            }
1868
        }
1869
1870 12
        return $data;
1871
    }
1872
1873
    /**
1874
     * Checks if a file starts with BOM (Byte Order Mark) character.
1875
     *
1876
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1877
     *
1878
     * @param string $file_path <p>Path to a valid file.</p>
1879
     *
1880
     * @throws \RuntimeException if file_get_contents() returned false
1881
     *
1882
     * @return bool
1883
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1884
     *
1885
     * @psalm-pure
1886
     */
1887 2
    public static function file_has_bom(string $file_path): bool
1888
    {
1889 2
        $file_content = \file_get_contents($file_path);
1890 2
        if ($file_content === false) {
1891
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1892
        }
1893
1894 2
        return self::string_has_bom($file_content);
1895
    }
1896
1897
    /**
1898
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1899
     *
1900
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1901
     *
1902
     * @param array|object|string $var
1903
     * @param int                 $normalization_form
1904
     * @param string              $leading_combining
1905
     *
1906
     * @psalm-pure
1907
     *
1908
     * @return mixed
1909
     *
1910
     * @template TFilter
1911
     * @phpstan-param TFilter $var
1912
     * @phpstan-return TFilter
1913
     */
1914 65
    public static function filter(
1915
        $var,
1916
        int $normalization_form = \Normalizer::NFC,
1917
        string $leading_combining = '◌'
1918
    ) {
1919 65
        switch (\gettype($var)) {
1920 65
            case 'object':
1921 65
            case 'array':
1922 6
                foreach ($var as $k => &$v) {
1923 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1924
                }
1925 6
                unset($v);
1926
1927 6
                break;
1928 65
            case 'string':
1929
1930 63
                if (\strpos($var, "\r") !== false) {
1931 3
                    $var = self::normalize_line_ending($var);
1932
                }
1933
1934 63
                if (!ASCII::is_ascii($var)) {
1935 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1936 27
                        $n = '-';
1937
                    } else {
1938 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1939
1940 13
                        if (isset($n[0])) {
1941 7
                            $var = $n;
1942
                        } else {
1943 9
                            $var = self::encode('UTF-8', $var);
1944
                        }
1945
                    }
1946
1947 33
                    \assert(\is_string($var));
1948
                    if (
1949 33
                        $var[0] >= "\x80"
1950
                        &&
1951 33
                        isset($n[0], $leading_combining[0])
1952
                        &&
1953 33
                        \preg_match('/^\\p{Mn}/u', $var)
1954
                    ) {
1955
                        // Prevent leading combining chars
1956
                        // for NFC-safe concatenations.
1957 3
                        $var = $leading_combining . $var;
1958
                    }
1959
                }
1960
1961 63
                break;
1962
            default:
1963
                // nothing
1964
        }
1965
1966
        /** @noinspection PhpSillyAssignmentInspection */
1967
        /** @phpstan-var TFilter $var */
1968 65
        $var = $var;
1969
1970 65
        return $var;
1971
    }
1972
1973
    /**
1974
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1975
     *
1976
     * Gets a specific external variable by name and optionally filters it.
1977
     *
1978
     * EXAMPLE: <code>
1979
     * // _GET['foo'] = 'bar';
1980
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1981
     * </code>
1982
     *
1983
     * @see http://php.net/manual/en/function.filter-input.php
1984
     *
1985
     * @param int            $type          <p>
1986
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1987
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1988
     *                                      <b>INPUT_ENV</b>.
1989
     *                                      </p>
1990
     * @param string         $variable_name <p>
1991
     *                                      Name of a variable to get.
1992
     *                                      </p>
1993
     * @param int            $filter        [optional] <p>
1994
     *                                      The ID of the filter to apply. The
1995
     *                                      manual page lists the available filters.
1996
     *                                      </p>
1997
     * @param int|int[]|null $options       [optional] <p>
1998
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1999
     *                                      accepts options, flags can be provided in "flags" field of array.
2000
     *                                      </p>
2001
     *
2002
     * @psalm-pure
2003
     *
2004
     * @return mixed
2005
     *               <p>
2006
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2007
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2008
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2009
     *               </p>
2010
     */
2011 1
    public static function filter_input(
2012
        int $type,
2013
        string $variable_name,
2014
        int $filter = \FILTER_DEFAULT,
2015
        $options = null
2016
    ) {
2017
        /**
2018
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2019
         */
2020 1
        if ($options === null || \func_num_args() < 4) {
2021 1
            $var = \filter_input($type, $variable_name, $filter);
2022
        } else {
2023
            $var = \filter_input($type, $variable_name, $filter, $options);
2024
        }
2025
2026 1
        return self::filter($var);
2027
    }
2028
2029
    /**
2030
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2031
     *
2032
     * Gets external variables and optionally filters them.
2033
     *
2034
     * EXAMPLE: <code>
2035
     * // _GET['foo'] = 'bar';
2036
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2037
     * </code>
2038
     *
2039
     * @see http://php.net/manual/en/function.filter-input-array.php
2040
     *
2041
     * @param int        $type       <p>
2042
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2043
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2044
     *                               <b>INPUT_ENV</b>.
2045
     *                               </p>
2046
     * @param array|null $definition [optional] <p>
2047
     *                               An array defining the arguments. A valid key is a string
2048
     *                               containing a variable name and a valid value is either a filter type, or an array
2049
     *                               optionally specifying the filter, flags and options. If the value is an
2050
     *                               array, valid keys are filter which specifies the
2051
     *                               filter type,
2052
     *                               flags which specifies any flags that apply to the
2053
     *                               filter, and options which specifies any options that
2054
     *                               apply to the filter. See the example below for a better understanding.
2055
     *                               </p>
2056
     *                               <p>
2057
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2058
     *                               input array are filtered by this filter.
2059
     *                               </p>
2060
     * @param bool       $add_empty  [optional] <p>
2061
     *                               Add missing keys as <b>NULL</b> to the return value.
2062
     *                               </p>
2063
     *
2064
     * @psalm-pure
2065
     *
2066
     * @return mixed
2067
     *               <p>
2068
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2069
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2070
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2071
     *               is not set and <b>NULL</b> if the filter fails.
2072
     *               </p>
2073
     */
2074 1
    public static function filter_input_array(
2075
        int $type,
2076
        $definition = null,
2077
        bool $add_empty = true
2078
    ) {
2079
        /**
2080
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2081
         */
2082 1
        if ($definition === null || \func_num_args() < 2) {
2083
            $a = \filter_input_array($type);
2084
        } else {
2085 1
            $a = \filter_input_array($type, $definition, $add_empty);
2086
        }
2087
2088 1
        return self::filter($a);
2089
    }
2090
2091
    /**
2092
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2093
     *
2094
     * Filters a variable with a specified filter.
2095
     *
2096
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2097
     *
2098
     * @see http://php.net/manual/en/function.filter-var.php
2099
     *
2100
     * @param float|int|string|null $variable <p>
2101
     *                                        Value to filter.
2102
     *                                        </p>
2103
     * @param int                   $filter   [optional] <p>
2104
     *                                        The ID of the filter to apply. The
2105
     *                                        manual page lists the available filters.
2106
     *                                        </p>
2107
     * @param int|int[]|null        $options  [optional] <p>
2108
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2109
     *                                        accepts options, flags can be provided in "flags" field of array. For
2110
     *                                        the "callback" filter, callable type should be passed. The
2111
     *                                        callback must accept one argument, the value to be filtered, and return
2112
     *                                        the value after filtering/sanitizing it.
2113
     *                                        </p>
2114
     *                                        <p>
2115
     *                                        <code>
2116
     *                                        // for filters that accept options, use this format
2117
     *                                        $options = array(
2118
     *                                        'options' => array(
2119
     *                                        'default' => 3, // value to return if the filter fails
2120
     *                                        // other options here
2121
     *                                        'min_range' => 0
2122
     *                                        ),
2123
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2124
     *                                        );
2125
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2126
     *                                        // for filter that only accept flags, you can pass them directly
2127
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2128
     *                                        // for filter that only accept flags, you can also pass as an array
2129
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2130
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2131
     *                                        // callback validate filter
2132
     *                                        function foo($value)
2133
     *                                        {
2134
     *                                        // Expected format: Surname, GivenNames
2135
     *                                        if (strpos($value, ", ") === false) return false;
2136
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2137
     *                                        $empty = (empty($surname) || empty($givennames));
2138
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2139
     *                                        if ($empty || $notstrings) {
2140
     *                                        return false;
2141
     *                                        } else {
2142
     *                                        return $value;
2143
     *                                        }
2144
     *                                        }
2145
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2146
     *                                        </code>
2147
     *                                        </p>
2148
     *
2149
     * @psalm-pure
2150
     *
2151
     * @return mixed
2152
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2153
     */
2154 2
    public static function filter_var(
2155
        $variable,
2156
        int $filter = \FILTER_DEFAULT,
2157
        $options = null
2158
    ) {
2159
        /**
2160
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2161
         */
2162 2
        if (\func_num_args() < 3) {
2163 2
            $variable = \filter_var($variable, $filter);
2164
        } else {
2165 2
            $variable = \filter_var($variable, $filter, $options);
2166
        }
2167
2168 2
        return self::filter($variable);
2169
    }
2170
2171
    /**
2172
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2173
     *
2174
     * Gets multiple variables and optionally filters them.
2175
     *
2176
     * EXAMPLE: <code>
2177
     * $filters = [
2178
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2179
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2180
     *     'email' => FILTER_VALIDATE_EMAIL,
2181
     * ];
2182
     *
2183
     * $data = [
2184
     *     'name' => 'κόσμε',
2185
     *     'age' => '18',
2186
     *     'email' => '[email protected]'
2187
     * ];
2188
     *
2189
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2190
     * </code>
2191
     *
2192
     * @see http://php.net/manual/en/function.filter-var-array.php
2193
     *
2194
     * @param array<mixed>   $data       <p>
2195
     *                                   An array with string keys containing the data to filter.
2196
     *                                   </p>
2197
     * @param array|int|null $definition [optional] <p>
2198
     *                                   An array defining the arguments. A valid key is a string
2199
     *                                   containing a variable name and a valid value is either a
2200
     *                                   filter type, or an
2201
     *                                   array optionally specifying the filter, flags and options.
2202
     *                                   If the value is an array, valid keys are filter
2203
     *                                   which specifies the filter type,
2204
     *                                   flags which specifies any flags that apply to the
2205
     *                                   filter, and options which specifies any options that
2206
     *                                   apply to the filter. See the example below for a better understanding.
2207
     *                                   </p>
2208
     *                                   <p>
2209
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2210
     *                                   in the input array are filtered by this filter.
2211
     *                                   </p>
2212
     * @param bool           $add_empty  [optional] <p>
2213
     *                                   Add missing keys as <b>NULL</b> to the return value.
2214
     *                                   </p>
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return mixed
2219
     *               <p>
2220
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2221
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2222
     *               set.
2223
     *               </p>
2224
     */
2225 2
    public static function filter_var_array(
2226
        array $data,
2227
        $definition = null,
2228
        bool $add_empty = true
2229
    ) {
2230
        /**
2231
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2232
         */
2233 2
        if (\func_num_args() < 2) {
2234 2
            $a = \filter_var_array($data);
2235
        } else {
2236 2
            $a = \filter_var_array($data, $definition, $add_empty);
2237
        }
2238
2239 2
        return self::filter($a);
2240
    }
2241
2242
    /**
2243
     * Checks whether finfo is available on the server.
2244
     *
2245
     * @psalm-pure
2246
     *
2247
     * @return bool
2248
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2249
     *
2250
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2251
     */
2252
    public static function finfo_loaded(): bool
2253
    {
2254
        return \class_exists('finfo');
2255
    }
2256
2257
    /**
2258
     * Returns the first $n characters of the string.
2259
     *
2260
     * @param string $str      <p>The input string.</p>
2261
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2262
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2263
     *
2264
     * @psalm-pure
2265
     *
2266
     * @return string
2267
     */
2268 13
    public static function first_char(
2269
        string $str,
2270
        int $n = 1,
2271
        string $encoding = 'UTF-8'
2272
    ): string {
2273 13
        if ($str === '' || $n <= 0) {
2274 5
            return '';
2275
        }
2276
2277 8
        if ($encoding === 'UTF-8') {
2278 4
            return (string) \mb_substr($str, 0, $n);
2279
        }
2280
2281 4
        return (string) self::substr($str, 0, $n, $encoding);
2282
    }
2283
2284
    /**
2285
     * Check if the number of Unicode characters isn't greater than the specified integer.
2286
     *
2287
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2288
     *
2289
     * @param string $str      the original string to be checked
2290
     * @param int    $box_size the size in number of chars to be checked against string
2291
     *
2292
     * @psalm-pure
2293
     *
2294
     * @return bool
2295
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2296
     */
2297 2
    public static function fits_inside(string $str, int $box_size): bool
2298
    {
2299 2
        return (int) self::strlen($str) <= $box_size;
2300
    }
2301
2302
    /**
2303
     * Try to fix simple broken UTF-8 strings.
2304
     *
2305
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2306
     *
2307
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2308
     *
2309
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2310
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2311
     * See: http://en.wikipedia.org/wiki/Windows-1252
2312
     *
2313
     * @param string $str <p>The input string</p>
2314
     *
2315
     * @psalm-pure
2316
     *
2317
     * @return string
2318
     */
2319 47
    public static function fix_simple_utf8(string $str): string
2320
    {
2321 47
        if ($str === '') {
2322 4
            return '';
2323
        }
2324
2325
        /**
2326
         * @psalm-suppress ImpureStaticVariable
2327
         *
2328
         * @var array<mixed>|null
2329
         */
2330 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2331
2332
        /**
2333
         * @psalm-suppress ImpureStaticVariable
2334
         *
2335
         * @var array<mixed>|null
2336
         */
2337 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2338
2339 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2340 1
            if (self::$BROKEN_UTF8_FIX === null) {
2341 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2342
            }
2343
2344 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2345 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2346
        }
2347
2348 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2349
2350 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2351
    }
2352
2353
    /**
2354
     * Fix a double (or multiple) encoded UTF8 string.
2355
     *
2356
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2357
     *
2358
     * @param string|string[] $str you can use a string or an array of strings
2359
     *
2360
     * @psalm-pure
2361
     *
2362
     * @return string|string[]
2363
     *                         Will return the fixed input-"array" or
2364
     *                         the fixed input-"string"
2365
     *
2366
     * @psalm-suppress InvalidReturnType
2367
     */
2368 2
    public static function fix_utf8($str)
2369
    {
2370 2
        if (\is_array($str)) {
2371 2
            foreach ($str as $k => &$v) {
2372 2
                $v = self::fix_utf8($v);
2373
            }
2374 2
            unset($v);
2375
2376
            /**
2377
             * @psalm-suppress InvalidReturnStatement
2378
             */
2379 2
            return $str;
2380
        }
2381
2382 2
        $str = (string) $str;
2383 2
        $last = '';
2384 2
        while ($last !== $str) {
2385 2
            $last = $str;
2386
            /**
2387
             * @psalm-suppress PossiblyInvalidArgument
2388
             */
2389 2
            $str = self::to_utf8(
2390 2
                self::utf8_decode($str, true)
2391
            );
2392
        }
2393
2394
        /**
2395
         * @psalm-suppress InvalidReturnStatement
2396
         */
2397 2
        return $str;
2398
    }
2399
2400
    /**
2401
     * Get character of a specific character.
2402
     *
2403
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2404
     *
2405
     * @param string $char
2406
     *
2407
     * @psalm-pure
2408
     *
2409
     * @return string
2410
     *                <p>'RTL' or 'LTR'.</p>
2411
     */
2412 2
    public static function getCharDirection(string $char): string
2413
    {
2414 2
        if (self::$SUPPORT['intlChar'] === true) {
2415
            /** @noinspection PhpComposerExtensionStubsInspection */
2416 2
            $tmp_return = \IntlChar::charDirection($char);
2417
2418
            // from "IntlChar"-Class
2419
            $char_direction = [
2420 2
                'RTL' => [1, 13, 14, 15, 21],
2421
                'LTR' => [0, 11, 12, 20],
2422
            ];
2423
2424 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2425
                return 'LTR';
2426
            }
2427
2428 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2429 2
                return 'RTL';
2430
            }
2431
        }
2432
2433 2
        $c = static::chr_to_decimal($char);
2434
2435 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2436 2
            return 'LTR';
2437
        }
2438
2439 2
        if ($c <= 0x85e) {
2440 2
            if ($c === 0x5be ||
2441 2
                $c === 0x5c0 ||
2442 2
                $c === 0x5c3 ||
2443 2
                $c === 0x5c6 ||
2444 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2445 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2446 2
                $c === 0x608 ||
2447 2
                $c === 0x60b ||
2448 2
                $c === 0x60d ||
2449 2
                $c === 0x61b ||
2450 2
                ($c >= 0x61e && $c <= 0x64a) ||
2451
                ($c >= 0x66d && $c <= 0x66f) ||
2452
                ($c >= 0x671 && $c <= 0x6d5) ||
2453
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2454
                ($c >= 0x6ee && $c <= 0x6ef) ||
2455
                ($c >= 0x6fa && $c <= 0x70d) ||
2456
                $c === 0x710 ||
2457
                ($c >= 0x712 && $c <= 0x72f) ||
2458
                ($c >= 0x74d && $c <= 0x7a5) ||
2459
                $c === 0x7b1 ||
2460
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2461
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2462
                $c === 0x7fa ||
2463
                ($c >= 0x800 && $c <= 0x815) ||
2464
                $c === 0x81a ||
2465
                $c === 0x824 ||
2466
                $c === 0x828 ||
2467
                ($c >= 0x830 && $c <= 0x83e) ||
2468
                ($c >= 0x840 && $c <= 0x858) ||
2469 2
                $c === 0x85e
2470
            ) {
2471 2
                return 'RTL';
2472
            }
2473 2
        } elseif ($c === 0x200f) {
2474
            return 'RTL';
2475 2
        } elseif ($c >= 0xfb1d) {
2476 2
            if ($c === 0xfb1d ||
2477 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2478 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2479 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2480 2
                $c === 0xfb3e ||
2481 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2482 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2483 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2484 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2485 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2486 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2487 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2488 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2489 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2490 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2491 2
                $c === 0x10808 ||
2492 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2493 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2494 2
                $c === 0x1083c ||
2495 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2496 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2497 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2498 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2499 2
                $c === 0x1093f ||
2500 2
                $c === 0x10a00 ||
2501 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2502 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2503 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2504 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2505 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2506 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2507 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2508 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2509 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2510 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2511
            ) {
2512 2
                return 'RTL';
2513
            }
2514
        }
2515
2516 2
        return 'LTR';
2517
    }
2518
2519
    /**
2520
     * Check for php-support.
2521
     *
2522
     * @param string|null $key
2523
     *
2524
     * @psalm-pure
2525
     *
2526
     * @return mixed
2527
     *               Return the full support-"array", if $key === null<br>
2528
     *               return bool-value, if $key is used and available<br>
2529
     *               otherwise return <strong>null</strong>
2530
     */
2531 27
    public static function getSupportInfo(string $key = null)
2532
    {
2533 27
        if ($key === null) {
2534 4
            return self::$SUPPORT;
2535
        }
2536
2537 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2538 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2539
        }
2540
        // compatibility fix for old versions
2541 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2542
2543 25
        return self::$SUPPORT[$key] ?? null;
2544
    }
2545
2546
    /**
2547
     * Warning: this method only works for some file-types (png, jpg)
2548
     *          if you need more supported types, please use e.g. "finfo"
2549
     *
2550
     * @param string $str
2551
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2552
     *
2553
     * @psalm-pure
2554
     *
2555
     * @return null[]|string[]
2556
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2557
     *
2558
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2559
     */
2560 40
    public static function get_file_type(
2561
        string $str,
2562
        array $fallback = [
2563
            'ext'  => null,
2564
            'mime' => 'application/octet-stream',
2565
            'type' => null,
2566
        ]
2567
    ): array {
2568 40
        if ($str === '') {
2569
            return $fallback;
2570
        }
2571
2572
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2573 40
        $str_info = \substr($str, 0, 2);
2574 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2575 11
            return $fallback;
2576
        }
2577
2578
        // DEBUG
2579
        //var_dump($str_info);
2580
2581 36
        $str_info = \unpack('C2chars', $str_info);
2582
2583
        /** @noinspection PhpSillyAssignmentInspection */
2584
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2585 36
        $str_info = $str_info;
2586
2587 36
        if ($str_info === false) {
2588
            return $fallback;
2589
        }
2590
        /** @noinspection OffsetOperationsInspection */
2591 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2592
2593
        // DEBUG
2594
        //var_dump($type_code);
2595
2596
        //
2597
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2598
        //
2599
        switch ($type_code) {
2600
            // WARNING: do not add too simple comparisons, because of false-positive results:
2601
            //
2602
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2603
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2604
            //
2605 36
            case 255216:
2606
                $ext = 'jpg';
2607
                $mime = 'image/jpeg';
2608
                $type = 'binary';
2609
2610
                break;
2611 36
            case 13780:
2612 7
                $ext = 'png';
2613 7
                $mime = 'image/png';
2614 7
                $type = 'binary';
2615
2616 7
                break;
2617
            default:
2618 35
                return $fallback;
2619
        }
2620
2621
        return [
2622 7
            'ext'  => $ext,
2623 7
            'mime' => $mime,
2624 7
            'type' => $type,
2625
        ];
2626
    }
2627
2628
    /**
2629
     * @param int    $length         <p>Length of the random string.</p>
2630
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2631
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2632
     *
2633
     * @return string
2634
     */
2635 1
    public static function get_random_string(
2636
        int $length,
2637
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2638
        string $encoding = 'UTF-8'
2639
    ): string {
2640
        // init
2641 1
        $i = 0;
2642 1
        $str = '';
2643
2644
        //
2645
        // add random chars
2646
        //
2647
2648 1
        if ($encoding === 'UTF-8') {
2649 1
            $max_length = (int) \mb_strlen($possible_chars);
2650 1
            if ($max_length === 0) {
2651 1
                return '';
2652
            }
2653
2654 1
            while ($i < $length) {
2655
                try {
2656 1
                    $rand_int = \random_int(0, $max_length - 1);
2657
                } catch (\Exception $e) {
2658
                    /** @noinspection RandomApiMigrationInspection */
2659
                    $rand_int = \mt_rand(0, $max_length - 1);
2660
                }
2661 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2662 1
                if ($char !== false) {
2663 1
                    $str .= $char;
2664 1
                    ++$i;
2665
                }
2666
            }
2667
        } else {
2668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2669
2670
            $max_length = (int) self::strlen($possible_chars, $encoding);
2671
            if ($max_length === 0) {
2672
                return '';
2673
            }
2674
2675
            while ($i < $length) {
2676
                try {
2677
                    $rand_int = \random_int(0, $max_length - 1);
2678
                } catch (\Exception $e) {
2679
                    /** @noinspection RandomApiMigrationInspection */
2680
                    $rand_int = \mt_rand(0, $max_length - 1);
2681
                }
2682
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2683
                if ($char !== false) {
2684
                    $str .= $char;
2685
                    ++$i;
2686
                }
2687
            }
2688
        }
2689
2690 1
        return $str;
2691
    }
2692
2693
    /**
2694
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2695
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2696
     *
2697
     * @return string
2698
     */
2699 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2700
    {
2701
        try {
2702 1
            $rand_int = \random_int(0, \mt_getrandmax());
2703
        } catch (\Exception $e) {
2704
            /** @noinspection RandomApiMigrationInspection */
2705
            $rand_int = \mt_rand(0, \mt_getrandmax());
2706
        }
2707
2708
        $unique_helper = $rand_int .
2709 1
                         \session_id() .
2710 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2711 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2712 1
                         $extra_entropy;
2713
2714 1
        $unique_string = \uniqid($unique_helper, true);
2715
2716 1
        if ($use_md5) {
2717 1
            $unique_string = \md5($unique_string . $unique_helper);
2718
        }
2719
2720 1
        return $unique_string;
2721
    }
2722
2723
    /**
2724
     * alias for "UTF8::string_has_bom()"
2725
     *
2726
     * @param string $str
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *
2732
     * @see        UTF8::string_has_bom()
2733
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2734
     */
2735 2
    public static function hasBom(string $str): bool
2736
    {
2737 2
        return self::string_has_bom($str);
2738
    }
2739
2740
    /**
2741
     * Returns true if the string contains a lower case char, false otherwise.
2742
     *
2743
     * @param string $str <p>The input string.</p>
2744
     *
2745
     * @psalm-pure
2746
     *
2747
     * @return bool
2748
     *              <p>Whether or not the string contains a lower case character.</p>
2749
     */
2750 47
    public static function has_lowercase(string $str): bool
2751
    {
2752 47
        if (self::$SUPPORT['mbstring'] === true) {
2753
            /** @noinspection PhpComposerExtensionStubsInspection */
2754 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2755
        }
2756
2757
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2758
    }
2759
2760
    /**
2761
     * Returns true if the string contains whitespace, false otherwise.
2762
     *
2763
     * @param string $str <p>The input string.</p>
2764
     *
2765
     * @psalm-pure
2766
     *
2767
     * @return bool
2768
     *              <p>Whether or not the string contains whitespace.</p>
2769
     */
2770 11
    public static function has_whitespace(string $str): bool
2771
    {
2772 11
        if (self::$SUPPORT['mbstring'] === true) {
2773
            /** @noinspection PhpComposerExtensionStubsInspection */
2774 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2775
        }
2776
2777
        return self::str_matches_pattern($str, '.*[[:space:]]');
2778
    }
2779
2780
    /**
2781
     * Returns true if the string contains an upper case char, false otherwise.
2782
     *
2783
     * @param string $str <p>The input string.</p>
2784
     *
2785
     * @psalm-pure
2786
     *
2787
     * @return bool
2788
     *              <p>Whether or not the string contains an upper case character.</p>
2789
     */
2790 12
    public static function has_uppercase(string $str): bool
2791
    {
2792 12
        if (self::$SUPPORT['mbstring'] === true) {
2793
            /** @noinspection PhpComposerExtensionStubsInspection */
2794 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2795
        }
2796
2797
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2798
    }
2799
2800
    /**
2801
     * Converts a hexadecimal value into a UTF-8 character.
2802
     *
2803
     * INFO: opposite to UTF8::chr_to_hex()
2804
     *
2805
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2806
     *
2807
     * @param string $hexdec <p>The hexadecimal value.</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return false|string one single UTF-8 character
2812
     */
2813 4
    public static function hex_to_chr(string $hexdec)
2814
    {
2815
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2816 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2817
    }
2818
2819
    /**
2820
     * Converts hexadecimal U+xxxx code point representation to integer.
2821
     *
2822
     * INFO: opposite to UTF8::int_to_hex()
2823
     *
2824
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2825
     *
2826
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2827
     *
2828
     * @psalm-pure
2829
     *
2830
     * @return false|int
2831
     *                   <p>The code point, or false on failure.</p>
2832
     */
2833 2
    public static function hex_to_int($hexdec)
2834
    {
2835
        // init
2836 2
        $hexdec = (string) $hexdec;
2837
2838 2
        if ($hexdec === '') {
2839 2
            return false;
2840
        }
2841
2842 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2843 2
            return \intval($match[1], 16);
2844
        }
2845
2846 2
        return false;
2847
    }
2848
2849
    /**
2850
     * alias for "UTF8::html_entity_decode()"
2851
     *
2852
     * @param string   $str
2853
     * @param int|null $flags
2854
     * @param string   $encoding
2855
     *
2856
     * @psalm-pure
2857
     *
2858
     * @return string
2859
     *
2860
     * @see        UTF8::html_entity_decode()
2861
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2862
     */
2863 2
    public static function html_decode(
2864
        string $str,
2865
        int $flags = null,
2866
        string $encoding = 'UTF-8'
2867
    ): string {
2868 2
        return self::html_entity_decode($str, $flags, $encoding);
2869
    }
2870
2871
    /**
2872
     * Converts a UTF-8 string to a series of HTML numbered entities.
2873
     *
2874
     * INFO: opposite to UTF8::html_decode()
2875
     *
2876
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2877
     *
2878
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2879
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2880
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2881
     *
2882
     * @psalm-pure
2883
     *
2884
     * @return string HTML numbered entities
2885
     */
2886 14
    public static function html_encode(
2887
        string $str,
2888
        bool $keep_ascii_chars = false,
2889
        string $encoding = 'UTF-8'
2890
    ): string {
2891 14
        if ($str === '') {
2892 4
            return '';
2893
        }
2894
2895 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2896 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2897
        }
2898
2899
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2900 14
        if (self::$SUPPORT['mbstring'] === true) {
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            } else {
2904 3
                $start_code = 0x00;
2905
            }
2906
2907 14
            if ($encoding === 'UTF-8') {
2908
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2909 14
                $return = \mb_encode_numericentity(
2910 14
                    $str,
2911 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2912
                );
2913 14
                if ($return !== null && $return !== false) {
2914 14
                    return $return;
2915
                }
2916
            }
2917
2918
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2919 4
            $return = \mb_encode_numericentity(
2920 4
                $str,
2921 4
                [$start_code, 0xfffff, 0, 0xfffff],
2922 4
                $encoding
2923
            );
2924 4
            if ($return !== null && $return !== false) {
2925 4
                return $return;
2926
            }
2927
        }
2928
2929
        //
2930
        // fallback via vanilla php
2931
        //
2932
2933
        return \implode(
2934
            '',
2935
            \array_map(
2936
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2937
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2938
                },
2939
                self::str_split($str)
2940
            )
2941
        );
2942
    }
2943
2944
    /**
2945
     * UTF-8 version of html_entity_decode()
2946
     *
2947
     * The reason we are not using html_entity_decode() by itself is because
2948
     * while it is not technically correct to leave out the semicolon
2949
     * at the end of an entity most browsers will still interpret the entity
2950
     * correctly. html_entity_decode() does not convert entities without
2951
     * semicolons, so we are left with our own little solution here. Bummer.
2952
     *
2953
     * Convert all HTML entities to their applicable characters.
2954
     *
2955
     * INFO: opposite to UTF8::html_encode()
2956
     *
2957
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2958
     *
2959
     * @see http://php.net/manual/en/function.html-entity-decode.php
2960
     *
2961
     * @param string   $str      <p>
2962
     *                           The input string.
2963
     *                           </p>
2964
     * @param int|null $flags    [optional] <p>
2965
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2966
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2967
     *                           <table>
2968
     *                           Available <i>flags</i> constants
2969
     *                           <tr valign="top">
2970
     *                           <td>Constant Name</td>
2971
     *                           <td>Description</td>
2972
     *                           </tr>
2973
     *                           <tr valign="top">
2974
     *                           <td><b>ENT_COMPAT</b></td>
2975
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2976
     *                           </tr>
2977
     *                           <tr valign="top">
2978
     *                           <td><b>ENT_QUOTES</b></td>
2979
     *                           <td>Will convert both double and single quotes.</td>
2980
     *                           </tr>
2981
     *                           <tr valign="top">
2982
     *                           <td><b>ENT_NOQUOTES</b></td>
2983
     *                           <td>Will leave both double and single quotes unconverted.</td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_HTML401</b></td>
2987
     *                           <td>
2988
     *                           Handle code as HTML 4.01.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XML1</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XML 1.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_XHTML</b></td>
2999
     *                           <td>
3000
     *                           Handle code as XHTML.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           <tr valign="top">
3004
     *                           <td><b>ENT_HTML5</b></td>
3005
     *                           <td>
3006
     *                           Handle code as HTML 5.
3007
     *                           </td>
3008
     *                           </tr>
3009
     *                           </table>
3010
     *                           </p>
3011
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3012
     *
3013
     * @psalm-pure
3014
     *
3015
     * @return string the decoded string
3016
     */
3017 51
    public static function html_entity_decode(
3018
        string $str,
3019
        int $flags = null,
3020
        string $encoding = 'UTF-8'
3021
    ): string {
3022
        if (
3023 51
            !isset($str[3]) // examples: &; || &x;
3024
            ||
3025 51
            \strpos($str, '&') === false // no "&"
3026
        ) {
3027 24
            return $str;
3028
        }
3029
3030 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3031 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3032
        }
3033
3034 49
        if ($flags === null) {
3035 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3036
        }
3037
3038
        /** @noinspection InArrayCanBeUsedInspection */
3039
        if (
3040 49
            $encoding !== 'UTF-8'
3041
            &&
3042 49
            $encoding !== 'ISO-8859-1'
3043
            &&
3044 49
            $encoding !== 'WINDOWS-1252'
3045
            &&
3046 49
            self::$SUPPORT['mbstring'] === false
3047
        ) {
3048
            /**
3049
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3050
             */
3051
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3052
        }
3053
3054
        do {
3055 49
            $str_compare = $str;
3056
3057 49
            if (\strpos($str, '&') !== false) {
3058 49
                if (\strpos($str, '&#') !== false) {
3059
                    // decode also numeric & UTF16 two byte entities
3060 41
                    $str = (string) \preg_replace(
3061 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3062 41
                        '$1;',
3063 41
                        $str
3064
                    );
3065
                }
3066
3067 49
                $str = \html_entity_decode(
3068 49
                    $str,
3069 49
                    $flags,
3070 49
                    $encoding
3071
                );
3072
            }
3073 49
        } while ($str_compare !== $str);
3074
3075 49
        return $str;
3076
    }
3077
3078
    /**
3079
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3080
     *
3081
     * @param string $str
3082
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3083
     *
3084
     * @psalm-pure
3085
     *
3086
     * @return string
3087
     */
3088 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3089
    {
3090 6
        return self::htmlspecialchars(
3091 6
            $str,
3092 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3093 6
            $encoding
3094
        );
3095
    }
3096
3097
    /**
3098
     * Remove empty html-tag.
3099
     *
3100
     * e.g.: <pre><tag></tag></pre>
3101
     *
3102
     * @param string $str
3103
     *
3104
     * @psalm-pure
3105
     *
3106
     * @return string
3107
     */
3108 1
    public static function html_stripe_empty_tags(string $str): string
3109
    {
3110 1
        return (string) \preg_replace(
3111 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3112 1
            '',
3113 1
            $str
3114
        );
3115
    }
3116
3117
    /**
3118
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3119
     *
3120
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3121
     *
3122
     * @see http://php.net/manual/en/function.htmlentities.php
3123
     *
3124
     * @param string $str           <p>
3125
     *                              The input string.
3126
     *                              </p>
3127
     * @param int    $flags         [optional] <p>
3128
     *                              A bitmask of one or more of the following flags, which specify how to handle
3129
     *                              quotes, invalid code unit sequences and the used document type. The default is
3130
     *                              ENT_COMPAT | ENT_HTML401.
3131
     *                              <table>
3132
     *                              Available <i>flags</i> constants
3133
     *                              <tr valign="top">
3134
     *                              <td>Constant Name</td>
3135
     *                              <td>Description</td>
3136
     *                              </tr>
3137
     *                              <tr valign="top">
3138
     *                              <td><b>ENT_COMPAT</b></td>
3139
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3140
     *                              </tr>
3141
     *                              <tr valign="top">
3142
     *                              <td><b>ENT_QUOTES</b></td>
3143
     *                              <td>Will convert both double and single quotes.</td>
3144
     *                              </tr>
3145
     *                              <tr valign="top">
3146
     *                              <td><b>ENT_NOQUOTES</b></td>
3147
     *                              <td>Will leave both double and single quotes unconverted.</td>
3148
     *                              </tr>
3149
     *                              <tr valign="top">
3150
     *                              <td><b>ENT_IGNORE</b></td>
3151
     *                              <td>
3152
     *                              Silently discard invalid code unit sequences instead of returning
3153
     *                              an empty string. Using this flag is discouraged as it
3154
     *                              may have security implications.
3155
     *                              </td>
3156
     *                              </tr>
3157
     *                              <tr valign="top">
3158
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3159
     *                              <td>
3160
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3161
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3162
     *                              string.
3163
     *                              </td>
3164
     *                              </tr>
3165
     *                              <tr valign="top">
3166
     *                              <td><b>ENT_DISALLOWED</b></td>
3167
     *                              <td>
3168
     *                              Replace invalid code points for the given document type with a
3169
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3170
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3171
     *                              instance, to ensure the well-formedness of XML documents with
3172
     *                              embedded external content.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_HTML401</b></td>
3177
     *                              <td>
3178
     *                              Handle code as HTML 4.01.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XML1</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XML 1.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_XHTML</b></td>
3189
     *                              <td>
3190
     *                              Handle code as XHTML.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              <tr valign="top">
3194
     *                              <td><b>ENT_HTML5</b></td>
3195
     *                              <td>
3196
     *                              Handle code as HTML 5.
3197
     *                              </td>
3198
     *                              </tr>
3199
     *                              </table>
3200
     *                              </p>
3201
     * @param string $encoding      [optional] <p>
3202
     *                              Like <b>htmlspecialchars</b>,
3203
     *                              <b>htmlentities</b> takes an optional third argument
3204
     *                              <i>encoding</i> which defines encoding used in
3205
     *                              conversion.
3206
     *                              Although this argument is technically optional, you are highly
3207
     *                              encouraged to specify the correct value for your code.
3208
     *                              </p>
3209
     * @param bool   $double_encode [optional] <p>
3210
     *                              When <i>double_encode</i> is turned off PHP will not
3211
     *                              encode existing html entities. The default is to convert everything.
3212
     *                              </p>
3213
     *
3214
     * @psalm-pure
3215
     *
3216
     * @return string
3217
     *                <p>
3218
     *                The encoded string.
3219
     *                <br><br>
3220
     *                If the input <i>string</i> contains an invalid code unit
3221
     *                sequence within the given <i>encoding</i> an empty string
3222
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3223
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3224
     *                </p>
3225
     */
3226 9
    public static function htmlentities(
3227
        string $str,
3228
        int $flags = \ENT_COMPAT,
3229
        string $encoding = 'UTF-8',
3230
        bool $double_encode = true
3231
    ): string {
3232 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3233 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3234
        }
3235
3236 9
        $str = \htmlentities(
3237 9
            $str,
3238 9
            $flags,
3239 9
            $encoding,
3240 9
            $double_encode
3241
        );
3242
3243
        /**
3244
         * PHP doesn't replace a backslash to its html entity since this is something
3245
         * that's mostly used to escape characters when inserting in a database. Since
3246
         * we're using a decent database layer, we don't need this shit and we're replacing
3247
         * the double backslashes by its' html entity equivalent.
3248
         *
3249
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3250
         */
3251 9
        $str = \str_replace('\\', '&#92;', $str);
3252
3253 9
        return self::html_encode($str, true, $encoding);
3254
    }
3255
3256
    /**
3257
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3258
     *
3259
     * INFO: Take a look at "UTF8::htmlentities()"
3260
     *
3261
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3262
     *
3263
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3264
     *
3265
     * @param string $str           <p>
3266
     *                              The string being converted.
3267
     *                              </p>
3268
     * @param int    $flags         [optional] <p>
3269
     *                              A bitmask of one or more of the following flags, which specify how to handle
3270
     *                              quotes, invalid code unit sequences and the used document type. The default is
3271
     *                              ENT_COMPAT | ENT_HTML401.
3272
     *                              <table>
3273
     *                              Available <i>flags</i> constants
3274
     *                              <tr valign="top">
3275
     *                              <td>Constant Name</td>
3276
     *                              <td>Description</td>
3277
     *                              </tr>
3278
     *                              <tr valign="top">
3279
     *                              <td><b>ENT_COMPAT</b></td>
3280
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3281
     *                              </tr>
3282
     *                              <tr valign="top">
3283
     *                              <td><b>ENT_QUOTES</b></td>
3284
     *                              <td>Will convert both double and single quotes.</td>
3285
     *                              </tr>
3286
     *                              <tr valign="top">
3287
     *                              <td><b>ENT_NOQUOTES</b></td>
3288
     *                              <td>Will leave both double and single quotes unconverted.</td>
3289
     *                              </tr>
3290
     *                              <tr valign="top">
3291
     *                              <td><b>ENT_IGNORE</b></td>
3292
     *                              <td>
3293
     *                              Silently discard invalid code unit sequences instead of returning
3294
     *                              an empty string. Using this flag is discouraged as it
3295
     *                              may have security implications.
3296
     *                              </td>
3297
     *                              </tr>
3298
     *                              <tr valign="top">
3299
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3300
     *                              <td>
3301
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3302
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3303
     *                              string.
3304
     *                              </td>
3305
     *                              </tr>
3306
     *                              <tr valign="top">
3307
     *                              <td><b>ENT_DISALLOWED</b></td>
3308
     *                              <td>
3309
     *                              Replace invalid code points for the given document type with a
3310
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3311
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3312
     *                              instance, to ensure the well-formedness of XML documents with
3313
     *                              embedded external content.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_HTML401</b></td>
3318
     *                              <td>
3319
     *                              Handle code as HTML 4.01.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XML1</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XML 1.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_XHTML</b></td>
3330
     *                              <td>
3331
     *                              Handle code as XHTML.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              <tr valign="top">
3335
     *                              <td><b>ENT_HTML5</b></td>
3336
     *                              <td>
3337
     *                              Handle code as HTML 5.
3338
     *                              </td>
3339
     *                              </tr>
3340
     *                              </table>
3341
     *                              </p>
3342
     * @param string $encoding      [optional] <p>
3343
     *                              Defines encoding used in conversion.
3344
     *                              </p>
3345
     *                              <p>
3346
     *                              For the purposes of this function, the encodings
3347
     *                              ISO-8859-1, ISO-8859-15,
3348
     *                              UTF-8, cp866,
3349
     *                              cp1251, cp1252, and
3350
     *                              KOI8-R are effectively equivalent, provided the
3351
     *                              <i>string</i> itself is valid for the encoding, as
3352
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3353
     *                              the same positions in all of these encodings.
3354
     *                              </p>
3355
     * @param bool   $double_encode [optional] <p>
3356
     *                              When <i>double_encode</i> is turned off PHP will not
3357
     *                              encode existing html entities, the default is to convert everything.
3358
     *                              </p>
3359
     *
3360
     * @psalm-pure
3361
     *
3362
     * @return string the converted string.
3363
     *                </p>
3364
     *                <p>
3365
     *                If the input <i>string</i> contains an invalid code unit
3366
     *                sequence within the given <i>encoding</i> an empty string
3367
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3368
     *                <b>ENT_SUBSTITUTE</b> flags are set
3369
     */
3370 8
    public static function htmlspecialchars(
3371
        string $str,
3372
        int $flags = \ENT_COMPAT,
3373
        string $encoding = 'UTF-8',
3374
        bool $double_encode = true
3375
    ): string {
3376 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3377 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3378
        }
3379
3380 8
        return \htmlspecialchars(
3381 8
            $str,
3382 8
            $flags,
3383 8
            $encoding,
3384 8
            $double_encode
3385
        );
3386
    }
3387
3388
    /**
3389
     * Checks whether iconv is available on the server.
3390
     *
3391
     * @psalm-pure
3392
     *
3393
     * @return bool
3394
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3395
     *
3396
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3397
     */
3398
    public static function iconv_loaded(): bool
3399
    {
3400
        return \extension_loaded('iconv');
3401
    }
3402
3403
    /**
3404
     * alias for "UTF8::decimal_to_chr()"
3405
     *
3406
     * @param int|string $int
3407
     *
3408
     * @phpstan-param int|numeric-string $int
3409
     *
3410
     * @psalm-pure
3411
     *
3412
     * @return string
3413
     *
3414
     * @see        UTF8::decimal_to_chr()
3415
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3416
     */
3417 4
    public static function int_to_chr($int): string
3418
    {
3419 4
        return self::decimal_to_chr($int);
3420
    }
3421
3422
    /**
3423
     * Converts Integer to hexadecimal U+xxxx code point representation.
3424
     *
3425
     * INFO: opposite to UTF8::hex_to_int()
3426
     *
3427
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3428
     *
3429
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3430
     * @param string $prefix [optional]
3431
     *
3432
     * @psalm-pure
3433
     *
3434
     * @return string the code point, or empty string on failure
3435
     */
3436 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3437
    {
3438 6
        $hex = \dechex($int);
3439
3440 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3441
3442 6
        return $prefix . $hex . '';
3443
    }
3444
3445
    /**
3446
     * Checks whether intl-char is available on the server.
3447
     *
3448
     * @psalm-pure
3449
     *
3450
     * @return bool
3451
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3452
     *
3453
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3454
     */
3455
    public static function intlChar_loaded(): bool
3456
    {
3457
        return \class_exists('IntlChar');
3458
    }
3459
3460
    /**
3461
     * Checks whether intl is available on the server.
3462
     *
3463
     * @psalm-pure
3464
     *
3465
     * @return bool
3466
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3467
     *
3468
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3469
     */
3470 5
    public static function intl_loaded(): bool
3471
    {
3472 5
        return \extension_loaded('intl');
3473
    }
3474
3475
    /**
3476
     * alias for "UTF8::is_ascii()"
3477
     *
3478
     * @param string $str
3479
     *
3480
     * @psalm-pure
3481
     *
3482
     * @return bool
3483
     *
3484
     * @see        UTF8::is_ascii()
3485
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3486
     */
3487 2
    public static function isAscii(string $str): bool
3488
    {
3489 2
        return ASCII::is_ascii($str);
3490
    }
3491
3492
    /**
3493
     * alias for "UTF8::is_base64()"
3494
     *
3495
     * @param string $str
3496
     *
3497
     * @psalm-pure
3498
     *
3499
     * @return bool
3500
     *
3501
     * @see        UTF8::is_base64()
3502
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3503
     */
3504 2
    public static function isBase64($str): bool
3505
    {
3506 2
        return self::is_base64($str);
3507
    }
3508
3509
    /**
3510
     * alias for "UTF8::is_binary()"
3511
     *
3512
     * @param int|string $str
3513
     * @param bool       $strict
3514
     *
3515
     * @psalm-pure
3516
     *
3517
     * @return bool
3518
     *
3519
     * @see        UTF8::is_binary()
3520
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3521
     */
3522 4
    public static function isBinary($str, bool $strict = false): bool
3523
    {
3524 4
        return self::is_binary($str, $strict);
3525
    }
3526
3527
    /**
3528
     * alias for "UTF8::is_bom()"
3529
     *
3530
     * @param string $utf8_chr
3531
     *
3532
     * @psalm-pure
3533
     *
3534
     * @return bool
3535
     *
3536
     * @see        UTF8::is_bom()
3537
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3538
     */
3539 2
    public static function isBom(string $utf8_chr): bool
3540
    {
3541 2
        return self::is_bom($utf8_chr);
3542
    }
3543
3544
    /**
3545
     * alias for "UTF8::is_html()"
3546
     *
3547
     * @param string $str
3548
     *
3549
     * @psalm-pure
3550
     *
3551
     * @return bool
3552
     *
3553
     * @see        UTF8::is_html()
3554
     * @deprecated <p>please use "UTF8::is_html()"</p>
3555
     */
3556 2
    public static function isHtml(string $str): bool
3557
    {
3558 2
        return self::is_html($str);
3559
    }
3560
3561
    /**
3562
     * alias for "UTF8::is_json()"
3563
     *
3564
     * @param string $str
3565
     *
3566
     * @return bool
3567
     *
3568
     * @see        UTF8::is_json()
3569
     * @deprecated <p>please use "UTF8::is_json()"</p>
3570
     */
3571 1
    public static function isJson(string $str): bool
3572
    {
3573 1
        return self::is_json($str);
3574
    }
3575
3576
    /**
3577
     * alias for "UTF8::is_utf16()"
3578
     *
3579
     * @param string $str
3580
     *
3581
     * @psalm-pure
3582
     *
3583
     * @return false|int
3584
     *                   <strong>false</strong> if is't not UTF16,<br>
3585
     *                   <strong>1</strong> for UTF-16LE,<br>
3586
     *                   <strong>2</strong> for UTF-16BE
3587
     *
3588
     * @see        UTF8::is_utf16()
3589
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3590
     */
3591 2
    public static function isUtf16($str)
3592
    {
3593 2
        return self::is_utf16($str);
3594
    }
3595
3596
    /**
3597
     * alias for "UTF8::is_utf32()"
3598
     *
3599
     * @param string $str
3600
     *
3601
     * @psalm-pure
3602
     *
3603
     * @return false|int
3604
     *                   <strong>false</strong> if is't not UTF16,
3605
     *                   <strong>1</strong> for UTF-32LE,
3606
     *                   <strong>2</strong> for UTF-32BE
3607
     *
3608
     * @see        UTF8::is_utf32()
3609
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3610
     */
3611 2
    public static function isUtf32($str)
3612
    {
3613 2
        return self::is_utf32($str);
3614
    }
3615
3616
    /**
3617
     * alias for "UTF8::is_utf8()"
3618
     *
3619
     * @param string $str
3620
     * @param bool   $strict
3621
     *
3622
     * @psalm-pure
3623
     *
3624
     * @return bool
3625
     *
3626
     * @see        UTF8::is_utf8()
3627
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3628
     */
3629 17
    public static function isUtf8($str, bool $strict = false): bool
3630
    {
3631 17
        return self::is_utf8($str, $strict);
3632
    }
3633
3634
    /**
3635
     * Returns true if the string contains only alphabetic chars, false otherwise.
3636
     *
3637
     * @param string $str <p>The input string.</p>
3638
     *
3639
     * @psalm-pure
3640
     *
3641
     * @return bool
3642
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3643
     */
3644 10
    public static function is_alpha(string $str): bool
3645
    {
3646 10
        if (self::$SUPPORT['mbstring'] === true) {
3647
            /** @noinspection PhpComposerExtensionStubsInspection */
3648 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3649
        }
3650
3651
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3652
    }
3653
3654
    /**
3655
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3656
     *
3657
     * @param string $str <p>The input string.</p>
3658
     *
3659
     * @psalm-pure
3660
     *
3661
     * @return bool
3662
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3663
     */
3664 13
    public static function is_alphanumeric(string $str): bool
3665
    {
3666 13
        if (self::$SUPPORT['mbstring'] === true) {
3667
            /** @noinspection PhpComposerExtensionStubsInspection */
3668 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3669
        }
3670
3671
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3672
    }
3673
3674
    /**
3675
     * Returns true if the string contains only punctuation chars, false otherwise.
3676
     *
3677
     * @param string $str <p>The input string.</p>
3678
     *
3679
     * @psalm-pure
3680
     *
3681
     * @return bool
3682
     *              <p>Whether or not $str contains only punctuation chars.</p>
3683
     */
3684 10
    public static function is_punctuation(string $str): bool
3685
    {
3686 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3687
    }
3688
3689
    /**
3690
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3691
     *
3692
     * @param string $str                       <p>The input string.</p>
3693
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3694
     *
3695
     * @psalm-pure
3696
     *
3697
     * @return bool
3698
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3699
     */
3700 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3701
    {
3702 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3703
    }
3704
3705
    /**
3706
     * Checks if a string is 7 bit ASCII.
3707
     *
3708
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3709
     *
3710
     * @param string $str <p>The string to check.</p>
3711
     *
3712
     * @psalm-pure
3713
     *
3714
     * @return bool
3715
     *              <p>
3716
     *              <strong>true</strong> if it is ASCII<br>
3717
     *              <strong>false</strong> otherwise
3718
     *              </p>
3719
     */
3720 8
    public static function is_ascii(string $str): bool
3721
    {
3722 8
        return ASCII::is_ascii($str);
3723
    }
3724
3725
    /**
3726
     * Returns true if the string is base64 encoded, false otherwise.
3727
     *
3728
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3729
     *
3730
     * @param string|null $str                   <p>The input string.</p>
3731
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3732
     *
3733
     * @psalm-pure
3734
     *
3735
     * @return bool
3736
     *              <p>Whether or not $str is base64 encoded.</p>
3737
     */
3738 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3739
    {
3740
        if (
3741 16
            !$empty_string_is_valid
3742
            &&
3743 16
            $str === ''
3744
        ) {
3745 3
            return false;
3746
        }
3747
3748 15
        if (!\is_string($str)) {
3749 2
            return false;
3750
        }
3751
3752 15
        $base64String = \base64_decode($str, true);
3753
3754 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3755
    }
3756
3757
    /**
3758
     * Check if the input is binary... (is look like a hack).
3759
     *
3760
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3761
     *
3762
     * @param int|string $input
3763
     * @param bool       $strict
3764
     *
3765
     * @psalm-pure
3766
     *
3767
     * @return bool
3768
     */
3769 40
    public static function is_binary($input, bool $strict = false): bool
3770
    {
3771 40
        $input = (string) $input;
3772 40
        if ($input === '') {
3773 10
            return false;
3774
        }
3775
3776 40
        if (\preg_match('~^[01]+$~', $input)) {
3777 13
            return true;
3778
        }
3779
3780 40
        $ext = self::get_file_type($input);
3781 40
        if ($ext['type'] === 'binary') {
3782 7
            return true;
3783
        }
3784
3785 39
        $test_length = \strlen($input);
3786 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3787 39
        if (($test_null_counting / $test_length) > 0.25) {
3788 15
            return true;
3789
        }
3790
3791 35
        if ($strict) {
3792 35
            if (self::$SUPPORT['finfo'] === false) {
3793
                throw new \RuntimeException('ext-fileinfo: is not installed');
3794
            }
3795
3796
            /**
3797
             * @noinspection   PhpComposerExtensionStubsInspection
3798
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3799
             */
3800 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3801 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3802 15
                return true;
3803
            }
3804
        }
3805
3806 31
        return false;
3807
    }
3808
3809
    /**
3810
     * Check if the file is binary.
3811
     *
3812
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3813
     *
3814
     * @param string $file
3815
     *
3816
     * @return bool
3817
     */
3818 6
    public static function is_binary_file($file): bool
3819
    {
3820
        // init
3821 6
        $block = '';
3822
3823 6
        $fp = \fopen($file, 'rb');
3824 6
        if (\is_resource($fp)) {
3825 6
            $block = \fread($fp, 512);
3826 6
            \fclose($fp);
3827
        }
3828
3829 6
        if ($block === '' || $block === false) {
3830 2
            return false;
3831
        }
3832
3833 6
        return self::is_binary($block, true);
3834
    }
3835
3836
    /**
3837
     * Returns true if the string contains only whitespace chars, false otherwise.
3838
     *
3839
     * @param string $str <p>The input string.</p>
3840
     *
3841
     * @psalm-pure
3842
     *
3843
     * @return bool
3844
     *              <p>Whether or not $str contains only whitespace characters.</p>
3845
     */
3846 15
    public static function is_blank(string $str): bool
3847
    {
3848 15
        if (self::$SUPPORT['mbstring'] === true) {
3849
            /** @noinspection PhpComposerExtensionStubsInspection */
3850 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3851
        }
3852
3853
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3854
    }
3855
3856
    /**
3857
     * Checks if the given string is equal to any "Byte Order Mark".
3858
     *
3859
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3860
     *
3861
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3862
     *
3863
     * @param string $str <p>The input string.</p>
3864
     *
3865
     * @psalm-pure
3866
     *
3867
     * @return bool
3868
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3869
     */
3870 2
    public static function is_bom($str): bool
3871
    {
3872
        /** @noinspection PhpUnusedLocalVariableInspection */
3873 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3874 2
            if ($str === $bom_string) {
3875 2
                return true;
3876
            }
3877
        }
3878
3879 2
        return false;
3880
    }
3881
3882
    /**
3883
     * Determine whether the string is considered to be empty.
3884
     *
3885
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3886
     * empty() does not generate a warning if the variable does not exist.
3887
     *
3888
     * @param array|float|int|string $str
3889
     *
3890
     * @psalm-pure
3891
     *
3892
     * @return bool
3893
     *              <p>Whether or not $str is empty().</p>
3894
     */
3895 1
    public static function is_empty($str): bool
3896
    {
3897 1
        return empty($str);
3898
    }
3899
3900
    /**
3901
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3902
     *
3903
     * @param string $str <p>The input string.</p>
3904
     *
3905
     * @psalm-pure
3906
     *
3907
     * @return bool
3908
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3909
     */
3910 13
    public static function is_hexadecimal(string $str): bool
3911
    {
3912 13
        if (self::$SUPPORT['mbstring'] === true) {
3913
            /** @noinspection PhpComposerExtensionStubsInspection */
3914 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3915
        }
3916
3917
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3918
    }
3919
3920
    /**
3921
     * Check if the string contains any HTML tags.
3922
     *
3923
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3924
     *
3925
     * @param string $str <p>The input string.</p>
3926
     *
3927
     * @psalm-pure
3928
     *
3929
     * @return bool
3930
     *              <p>Whether or not $str contains html elements.</p>
3931
     */
3932 3
    public static function is_html(string $str): bool
3933
    {
3934 3
        if ($str === '') {
3935 3
            return false;
3936
        }
3937
3938
        // init
3939 3
        $matches = [];
3940
3941 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3942
3943 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3944
3945 3
        return $matches !== [];
3946
    }
3947
3948
    /**
3949
     * Check if $url is an correct url.
3950
     *
3951
     * @param string $url
3952
     * @param bool   $disallow_localhost
3953
     *
3954
     * @psalm-pure
3955
     *
3956
     * @return bool
3957
     */
3958 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3959
    {
3960 1
        if ($url === '') {
3961 1
            return false;
3962
        }
3963
3964
        // WARNING: keep this as hack protection
3965 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3966 1
            return false;
3967
        }
3968
3969
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3970 1
        if ($disallow_localhost) {
3971 1
            if (self::str_istarts_with_any(
3972 1
                $url,
3973
                [
3974 1
                    'http://localhost',
3975
                    'https://localhost',
3976
                    'http://127.0.0.1',
3977
                    'https://127.0.0.1',
3978
                    'http://::1',
3979
                    'https://::1',
3980
                ]
3981
            )) {
3982 1
                return false;
3983
            }
3984
3985 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3986
            /** @noinspection BypassedUrlValidationInspection */
3987 1
            if (\preg_match($regex, $url)) {
3988 1
                return false;
3989
            }
3990
        }
3991
3992
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3993
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3994 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3995
        /** @noinspection BypassedUrlValidationInspection */
3996 1
        if (\preg_match($regex, $url)) {
3997 1
            return true;
3998
        }
3999
4000
        /** @noinspection BypassedUrlValidationInspection */
4001 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4002
    }
4003
4004
    /**
4005
     * Try to check if "$str" is a JSON-string.
4006
     *
4007
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4008
     *
4009
     * @param string $str                                    <p>The input string.</p>
4010
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4011
     *                                                       results.</p>
4012
     *
4013
     * @return bool
4014
     *              <p>Whether or not the $str is in JSON format.</p>
4015
     */
4016 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4017
    {
4018 42
        if ($str === '') {
4019 4
            return false;
4020
        }
4021
4022 40
        if (self::$SUPPORT['json'] === false) {
4023
            throw new \RuntimeException('ext-json: is not installed');
4024
        }
4025
4026 40
        $jsonOrNull = self::json_decode($str);
4027 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4028 18
            return false;
4029
        }
4030
4031
        if (
4032 24
            $only_array_or_object_results_are_valid
4033
            &&
4034 24
            !\is_object($jsonOrNull)
4035
            &&
4036 24
            !\is_array($jsonOrNull)
4037
        ) {
4038 5
            return false;
4039
        }
4040
4041
        /** @noinspection PhpComposerExtensionStubsInspection */
4042 19
        return \json_last_error() === \JSON_ERROR_NONE;
4043
    }
4044
4045
    /**
4046
     * @param string $str <p>The input string.</p>
4047
     *
4048
     * @psalm-pure
4049
     *
4050
     * @return bool
4051
     *              <p>Whether or not $str contains only lowercase chars.</p>
4052
     */
4053 8
    public static function is_lowercase(string $str): bool
4054
    {
4055 8
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4058
        }
4059
4060
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4061
    }
4062
4063
    /**
4064
     * Returns true if the string is serialized, false otherwise.
4065
     *
4066
     * @param string $str <p>The input string.</p>
4067
     *
4068
     * @psalm-pure
4069
     *
4070
     * @return bool
4071
     *              <p>Whether or not $str is serialized.</p>
4072
     */
4073 7
    public static function is_serialized(string $str): bool
4074
    {
4075 7
        if ($str === '') {
4076 1
            return false;
4077
        }
4078
4079
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4080
        /** @noinspection UnserializeExploitsInspection */
4081 6
        return $str === 'b:0;'
4082
               ||
4083 6
               @\unserialize($str) !== false;
4084
    }
4085
4086
    /**
4087
     * Returns true if the string contains only lower case chars, false
4088
     * otherwise.
4089
     *
4090
     * @param string $str <p>The input string.</p>
4091
     *
4092
     * @psalm-pure
4093
     *
4094
     * @return bool
4095
     *              <p>Whether or not $str contains only lower case characters.</p>
4096
     */
4097 8
    public static function is_uppercase(string $str): bool
4098
    {
4099 8
        if (self::$SUPPORT['mbstring'] === true) {
4100
            /** @noinspection PhpComposerExtensionStubsInspection */
4101 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4102
        }
4103
4104
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4105
    }
4106
4107
    /**
4108
     * Check if the string is UTF-16.
4109
     *
4110
     * EXAMPLE: <code>
4111
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4112
     * //
4113
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4114
     * //
4115
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4116
     * </code>
4117
     *
4118
     * @param string $str                       <p>The input string.</p>
4119
     * @param bool   $check_if_string_is_binary
4120
     *
4121
     * @psalm-pure
4122
     *
4123
     * @return false|int
4124
     *                   <strong>false</strong> if is't not UTF-16,<br>
4125
     *                   <strong>1</strong> for UTF-16LE,<br>
4126
     *                   <strong>2</strong> for UTF-16BE
4127
     */
4128 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4129
    {
4130
        // init
4131 22
        $str = (string) $str;
4132 22
        $str_chars = [];
4133
4134
        if (
4135 22
            $check_if_string_is_binary
4136
            &&
4137 22
            !self::is_binary($str, true)
4138
        ) {
4139 2
            return false;
4140
        }
4141
4142 22
        if (self::$SUPPORT['mbstring'] === false) {
4143
            /**
4144
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4145
             */
4146 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4147
        }
4148
4149 22
        $str = self::remove_bom($str);
4150
4151 22
        $maybe_utf16le = 0;
4152 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4153 22
        if ($test) {
4154 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4155 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4156 15
            if ($test3 === $test) {
4157
                /**
4158
                 * @psalm-suppress RedundantCondition
4159
                 */
4160 15
                if ($str_chars === []) {
4161 15
                    $str_chars = self::count_chars($str, true, false);
4162
                }
4163 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4164 15
                    if (\in_array($test3char, $str_chars, true)) {
4165 15
                        ++$maybe_utf16le;
4166
                    }
4167
                }
4168 15
                unset($test3charEmpty);
4169
            }
4170
        }
4171
4172 22
        $maybe_utf16be = 0;
4173 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4174 22
        if ($test) {
4175 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4176 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4177 15
            if ($test3 === $test) {
4178 15
                if ($str_chars === []) {
4179 7
                    $str_chars = self::count_chars($str, true, false);
4180
                }
4181 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4182 15
                    if (\in_array($test3char, $str_chars, true)) {
4183 15
                        ++$maybe_utf16be;
4184
                    }
4185
                }
4186 15
                unset($test3charEmpty);
4187
            }
4188
        }
4189
4190 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4191 7
            if ($maybe_utf16le > $maybe_utf16be) {
4192 5
                return 1;
4193
            }
4194
4195 6
            return 2;
4196
        }
4197
4198 18
        return false;
4199
    }
4200
4201
    /**
4202
     * Check if the string is UTF-32.
4203
     *
4204
     * EXAMPLE: <code>
4205
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4206
     * //
4207
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4208
     * //
4209
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4210
     * </code>
4211
     *
4212
     * @param string $str                       <p>The input string.</p>
4213
     * @param bool   $check_if_string_is_binary
4214
     *
4215
     * @psalm-pure
4216
     *
4217
     * @return false|int
4218
     *                   <strong>false</strong> if is't not UTF-32,<br>
4219
     *                   <strong>1</strong> for UTF-32LE,<br>
4220
     *                   <strong>2</strong> for UTF-32BE
4221
     */
4222 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4223
    {
4224
        // init
4225 20
        $str = (string) $str;
4226 20
        $str_chars = [];
4227
4228
        if (
4229 20
            $check_if_string_is_binary
4230
            &&
4231 20
            !self::is_binary($str, true)
4232
        ) {
4233 2
            return false;
4234
        }
4235
4236 20
        if (self::$SUPPORT['mbstring'] === false) {
4237
            /**
4238
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4239
             */
4240 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4241
        }
4242
4243 20
        $str = self::remove_bom($str);
4244
4245 20
        $maybe_utf32le = 0;
4246 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4247 20
        if ($test) {
4248 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4249 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4250 13
            if ($test3 === $test) {
4251
                /**
4252
                 * @psalm-suppress RedundantCondition
4253
                 */
4254 13
                if ($str_chars === []) {
4255 13
                    $str_chars = self::count_chars($str, true, false);
4256
                }
4257 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4258 13
                    if (\in_array($test3char, $str_chars, true)) {
4259 13
                        ++$maybe_utf32le;
4260
                    }
4261
                }
4262 13
                unset($test3charEmpty);
4263
            }
4264
        }
4265
4266 20
        $maybe_utf32be = 0;
4267 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4268 20
        if ($test) {
4269 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4270 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4271 13
            if ($test3 === $test) {
4272 13
                if ($str_chars === []) {
4273 7
                    $str_chars = self::count_chars($str, true, false);
4274
                }
4275 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4276 13
                    if (\in_array($test3char, $str_chars, true)) {
4277 13
                        ++$maybe_utf32be;
4278
                    }
4279
                }
4280 13
                unset($test3charEmpty);
4281
            }
4282
        }
4283
4284 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4285 3
            if ($maybe_utf32le > $maybe_utf32be) {
4286 2
                return 1;
4287
            }
4288
4289 3
            return 2;
4290
        }
4291
4292 20
        return false;
4293
    }
4294
4295
    /**
4296
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4297
     *
4298
     * EXAMPLE: <code>
4299
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4300
     * //
4301
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4302
     * </code>
4303
     *
4304
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4305
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4306
     *
4307
     * @psalm-pure
4308
     *
4309
     * @return bool
4310
     */
4311 83
    public static function is_utf8($str, bool $strict = false): bool
4312
    {
4313 83
        if (\is_array($str)) {
4314 2
            foreach ($str as &$v) {
4315 2
                if (!self::is_utf8($v, $strict)) {
4316 2
                    return false;
4317
                }
4318
            }
4319
4320
            return true;
4321
        }
4322
4323 83
        return self::is_utf8_string((string) $str, $strict);
4324
    }
4325
4326
    /**
4327
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4328
     * Decodes a JSON string
4329
     *
4330
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4331
     *
4332
     * @see http://php.net/manual/en/function.json-decode.php
4333
     *
4334
     * @param string $json    <p>
4335
     *                        The <i>json</i> string being decoded.
4336
     *                        </p>
4337
     *                        <p>
4338
     *                        This function only works with UTF-8 encoded strings.
4339
     *                        </p>
4340
     *                        <p>PHP implements a superset of
4341
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4342
     *                        only supports these values when they are nested inside an array or an object.
4343
     *                        </p>
4344
     * @param bool   $assoc   [optional] <p>
4345
     *                        When <b>TRUE</b>, returned objects will be converted into
4346
     *                        associative arrays.
4347
     *                        </p>
4348
     * @param int    $depth   [optional] <p>
4349
     *                        User specified recursion depth.
4350
     *                        </p>
4351
     * @param int    $options [optional] <p>
4352
     *                        Bitmask of JSON decode options. Currently only
4353
     *                        <b>JSON_BIGINT_AS_STRING</b>
4354
     *                        is supported (default is to cast large integers as floats)
4355
     *                        </p>
4356
     *
4357
     * @psalm-pure
4358
     *
4359
     * @return mixed
4360
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4361
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4362
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4363
     *               is deeper than the recursion limit.</p>
4364
     */
4365 43
    public static function json_decode(
4366
        string $json,
4367
        bool $assoc = false,
4368
        int $depth = 512,
4369
        int $options = 0
4370
    ) {
4371 43
        $json = self::filter($json);
4372
4373 43
        if (self::$SUPPORT['json'] === false) {
4374
            throw new \RuntimeException('ext-json: is not installed');
4375
        }
4376
4377
        /** @noinspection PhpComposerExtensionStubsInspection */
4378 43
        return \json_decode($json, $assoc, $depth, $options);
4379
    }
4380
4381
    /**
4382
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4383
     * Returns the JSON representation of a value.
4384
     *
4385
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4386
     *
4387
     * @see http://php.net/manual/en/function.json-encode.php
4388
     *
4389
     * @param mixed $value   <p>
4390
     *                       The <i>value</i> being encoded. Can be any type except
4391
     *                       a resource.
4392
     *                       </p>
4393
     *                       <p>
4394
     *                       All string data must be UTF-8 encoded.
4395
     *                       </p>
4396
     *                       <p>PHP implements a superset of
4397
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4398
     *                       only supports these values when they are nested inside an array or an object.
4399
     *                       </p>
4400
     * @param int   $options [optional] <p>
4401
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4402
     *                       <b>JSON_HEX_TAG</b>,
4403
     *                       <b>JSON_HEX_AMP</b>,
4404
     *                       <b>JSON_HEX_APOS</b>,
4405
     *                       <b>JSON_NUMERIC_CHECK</b>,
4406
     *                       <b>JSON_PRETTY_PRINT</b>,
4407
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4408
     *                       <b>JSON_FORCE_OBJECT</b>,
4409
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4410
     *                       constants is described on
4411
     *                       the JSON constants page.
4412
     *                       </p>
4413
     * @param int   $depth   [optional] <p>
4414
     *                       Set the maximum depth. Must be greater than zero.
4415
     *                       </p>
4416
     *
4417
     * @psalm-pure
4418
     *
4419
     * @return false|string
4420
     *                      A JSON encoded <strong>string</strong> on success or<br>
4421
     *                      <strong>FALSE</strong> on failure
4422
     */
4423 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4424
    {
4425 5
        $value = self::filter($value);
4426
4427 5
        if (self::$SUPPORT['json'] === false) {
4428
            throw new \RuntimeException('ext-json: is not installed');
4429
        }
4430
4431
        /** @noinspection PhpComposerExtensionStubsInspection */
4432 5
        return \json_encode($value, $options, $depth);
4433
    }
4434
4435
    /**
4436
     * Checks whether JSON is available on the server.
4437
     *
4438
     * @psalm-pure
4439
     *
4440
     * @return bool
4441
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4442
     *
4443
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4444
     */
4445
    public static function json_loaded(): bool
4446
    {
4447
        return \function_exists('json_decode');
4448
    }
4449
4450
    /**
4451
     * Makes string's first char lowercase.
4452
     *
4453
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4454
     *
4455
     * @param string      $str                           <p>The input string</p>
4456
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4457
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4458
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4459
     *                                                   tr</p>
4460
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4461
     *                                                   -> ß</p>
4462
     *
4463
     * @psalm-pure
4464
     *
4465
     * @return string the resulting string
4466
     */
4467 46
    public static function lcfirst(
4468
        string $str,
4469
        string $encoding = 'UTF-8',
4470
        bool $clean_utf8 = false,
4471
        string $lang = null,
4472
        bool $try_to_keep_the_string_length = false
4473
    ): string {
4474 46
        if ($clean_utf8) {
4475
            $str = self::clean($str);
4476
        }
4477
4478 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4479
4480 46
        if ($encoding === 'UTF-8') {
4481 43
            $str_part_two = (string) \mb_substr($str, 1);
4482
4483 43
            if ($use_mb_functions) {
4484 43
                $str_part_one = \mb_strtolower(
4485 43
                    (string) \mb_substr($str, 0, 1)
4486
                );
4487
            } else {
4488
                $str_part_one = self::strtolower(
4489
                    (string) \mb_substr($str, 0, 1),
4490
                    $encoding,
4491
                    false,
4492
                    $lang,
4493 43
                    $try_to_keep_the_string_length
4494
                );
4495
            }
4496
        } else {
4497 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4498
4499 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4500
4501 3
            $str_part_one = self::strtolower(
4502 3
                (string) self::substr($str, 0, 1, $encoding),
4503 3
                $encoding,
4504 3
                false,
4505 3
                $lang,
4506 3
                $try_to_keep_the_string_length
4507
            );
4508
        }
4509
4510 46
        return $str_part_one . $str_part_two;
4511
    }
4512
4513
    /**
4514
     * alias for "UTF8::lcfirst()"
4515
     *
4516
     * @param string      $str
4517
     * @param string      $encoding
4518
     * @param bool        $clean_utf8
4519
     * @param string|null $lang
4520
     * @param bool        $try_to_keep_the_string_length
4521
     *
4522
     * @psalm-pure
4523
     *
4524
     * @return string
4525
     *
4526
     * @see        UTF8::lcfirst()
4527
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4528
     */
4529 2
    public static function lcword(
4530
        string $str,
4531
        string $encoding = 'UTF-8',
4532
        bool $clean_utf8 = false,
4533
        string $lang = null,
4534
        bool $try_to_keep_the_string_length = false
4535
    ): string {
4536 2
        return self::lcfirst(
4537 2
            $str,
4538 2
            $encoding,
4539 2
            $clean_utf8,
4540 2
            $lang,
4541 2
            $try_to_keep_the_string_length
4542
        );
4543
    }
4544
4545
    /**
4546
     * Lowercase for all words in the string.
4547
     *
4548
     * @param string      $str                           <p>The input string.</p>
4549
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4550
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4551
     *                                                   not start a new word.</p>
4552
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4553
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4554
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4555
     *                                                   tr</p>
4556
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4557
     *                                                   -> ß</p>
4558
     *
4559
     * @psalm-pure
4560
     *
4561
     * @return string
4562
     */
4563 2
    public static function lcwords(
4564
        string $str,
4565
        array $exceptions = [],
4566
        string $char_list = '',
4567
        string $encoding = 'UTF-8',
4568
        bool $clean_utf8 = false,
4569
        string $lang = null,
4570
        bool $try_to_keep_the_string_length = false
4571
    ): string {
4572 2
        if (!$str) {
4573 2
            return '';
4574
        }
4575
4576 2
        $words = self::str_to_words($str, $char_list);
4577 2
        $use_exceptions = $exceptions !== [];
4578
4579 2
        $words_str = '';
4580 2
        foreach ($words as &$word) {
4581 2
            if (!$word) {
4582 2
                continue;
4583
            }
4584
4585
            if (
4586 2
                !$use_exceptions
4587
                ||
4588 2
                !\in_array($word, $exceptions, true)
4589
            ) {
4590 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4591
            } else {
4592 2
                $words_str .= $word;
4593
            }
4594
        }
4595
4596 2
        return $words_str;
4597
    }
4598
4599
    /**
4600
     * alias for "UTF8::lcfirst()"
4601
     *
4602
     * @param string      $str
4603
     * @param string      $encoding
4604
     * @param bool        $clean_utf8
4605
     * @param string|null $lang
4606
     * @param bool        $try_to_keep_the_string_length
4607
     *
4608
     * @psalm-pure
4609
     *
4610
     * @return string
4611
     *
4612
     * @see        UTF8::lcfirst()
4613
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4614
     */
4615 5
    public static function lowerCaseFirst(
4616
        string $str,
4617
        string $encoding = 'UTF-8',
4618
        bool $clean_utf8 = false,
4619
        string $lang = null,
4620
        bool $try_to_keep_the_string_length = false
4621
    ): string {
4622 5
        return self::lcfirst(
4623 5
            $str,
4624 5
            $encoding,
4625 5
            $clean_utf8,
4626 5
            $lang,
4627 5
            $try_to_keep_the_string_length
4628
        );
4629
    }
4630
4631
    /**
4632
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4633
     *
4634
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4635
     *
4636
     * @param string      $str   <p>The string to be trimmed</p>
4637
     * @param string|null $chars <p>Optional characters to be stripped</p>
4638
     *
4639
     * @psalm-pure
4640
     *
4641
     * @return string the string with unwanted characters stripped from the left
4642
     */
4643 23
    public static function ltrim(string $str = '', string $chars = null): string
4644
    {
4645 23
        if ($str === '') {
4646 3
            return '';
4647
        }
4648
4649 22
        if (self::$SUPPORT['mbstring'] === true) {
4650 22
            if ($chars !== null) {
4651
                /** @noinspection PregQuoteUsageInspection */
4652 11
                $chars = \preg_quote($chars);
4653 11
                $pattern = "^[${chars}]+";
4654
            } else {
4655 14
                $pattern = '^[\\s]+';
4656
            }
4657
4658
            /** @noinspection PhpComposerExtensionStubsInspection */
4659 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4660
        }
4661
4662
        if ($chars !== null) {
4663
            $chars = \preg_quote($chars, '/');
4664
            $pattern = "^[${chars}]+";
4665
        } else {
4666
            $pattern = '^[\\s]+';
4667
        }
4668
4669
        return self::regex_replace($str, $pattern, '');
4670
    }
4671
4672
    /**
4673
     * Returns the UTF-8 character with the maximum code point in the given data.
4674
     *
4675
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4676
     *
4677
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4678
     *
4679
     * @psalm-pure
4680
     *
4681
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4682
     */
4683
    public static function max($arg)
4684
    {
4685 2
        if (\is_array($arg)) {
4686 2
            $arg = \implode('', $arg);
4687
        }
4688
4689 2
        $codepoints = self::codepoints($arg);
4690 2
        if ($codepoints === []) {
4691 2
            return null;
4692
        }
4693
4694 2
        $codepoint_max = \max($codepoints);
4695
4696 2
        return self::chr((int) $codepoint_max);
4697
    }
4698
4699
    /**
4700
     * Calculates and returns the maximum number of bytes taken by any
4701
     * UTF-8 encoded character in the given string.
4702
     *
4703
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4704
     *
4705
     * @param string $str <p>The original Unicode string.</p>
4706
     *
4707
     * @psalm-pure
4708
     *
4709
     * @return int
4710
     *             <p>Max byte lengths of the given chars.</p>
4711
     */
4712
    public static function max_chr_width(string $str): int
4713
    {
4714 2
        $bytes = self::chr_size_list($str);
4715 2
        if ($bytes !== []) {
4716 2
            return (int) \max($bytes);
4717
        }
4718
4719 2
        return 0;
4720
    }
4721
4722
    /**
4723
     * Checks whether mbstring is available on the server.
4724
     *
4725
     * @psalm-pure
4726
     *
4727
     * @return bool
4728
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4729
     *
4730
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4731
     */
4732
    public static function mbstring_loaded(): bool
4733
    {
4734 28
        return \extension_loaded('mbstring');
4735
    }
4736
4737
    /**
4738
     * Returns the UTF-8 character with the minimum code point in the given data.
4739
     *
4740
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4741
     *
4742
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4743
     *
4744
     * @psalm-pure
4745
     *
4746
     * @return string|null
4747
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4748
     */
4749
    public static function min($arg)
4750
    {
4751 2
        if (\is_array($arg)) {
4752 2
            $arg = \implode('', $arg);
4753
        }
4754
4755 2
        $codepoints = self::codepoints($arg);
4756 2
        if ($codepoints === []) {
4757 2
            return null;
4758
        }
4759
4760 2
        $codepoint_min = \min($codepoints);
4761
4762 2
        return self::chr((int) $codepoint_min);
4763
    }
4764
4765
    /**
4766
     * alias for "UTF8::normalize_encoding()"
4767
     *
4768
     * @param mixed $encoding
4769
     * @param mixed $fallback
4770
     *
4771
     * @psalm-pure
4772
     *
4773
     * @return mixed
4774
     *
4775
     * @see        UTF8::normalize_encoding()
4776
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4777
     */
4778
    public static function normalizeEncoding($encoding, $fallback = '')
4779
    {
4780 2
        return self::normalize_encoding($encoding, $fallback);
4781
    }
4782
4783
    /**
4784
     * Normalize the encoding-"name" input.
4785
     *
4786
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4787
     *
4788
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4789
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4790
     *
4791
     * @psalm-pure
4792
     *
4793
     * @return mixed|string
4794
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4795
     *
4796
     * @template TNormalizeEncodingFallback
4797
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4798
     * @phpstan-return string|TNormalizeEncodingFallback
4799
     */
4800
    public static function normalize_encoding($encoding, $fallback = '')
4801
    {
4802
        /**
4803
         * @psalm-suppress ImpureStaticVariable
4804
         *
4805
         * @var array<string,string>
4806
         */
4807 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4808
4809
        // init
4810 339
        $encoding = (string) $encoding;
4811
4812 339
        if (!$encoding) {
4813 290
            return $fallback;
4814
        }
4815
4816
        if (
4817 53
            $encoding === 'UTF-8'
4818
            ||
4819 53
            $encoding === 'UTF8'
4820
        ) {
4821 29
            return 'UTF-8';
4822
        }
4823
4824
        if (
4825 44
            $encoding === '8BIT'
4826
            ||
4827 44
            $encoding === 'BINARY'
4828
        ) {
4829
            return 'CP850';
4830
        }
4831
4832
        if (
4833 44
            $encoding === 'HTML'
4834
            ||
4835 44
            $encoding === 'HTML-ENTITIES'
4836
        ) {
4837 2
            return 'HTML-ENTITIES';
4838
        }
4839
4840
        if (
4841 44
            $encoding === 'ISO'
4842
            ||
4843 44
            $encoding === 'ISO-8859-1'
4844
        ) {
4845 41
            return 'ISO-8859-1';
4846
        }
4847
4848
        if (
4849 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4850
            ||
4851 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4852
        ) {
4853
            return $fallback;
4854
        }
4855
4856 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4857 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4858
        }
4859
4860 5
        if (self::$ENCODINGS === null) {
4861 1
            self::$ENCODINGS = self::getData('encodings');
4862
        }
4863
4864 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4865 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4866
4867 3
            return $encoding;
4868
        }
4869
4870 4
        $encoding_original = $encoding;
4871 4
        $encoding = \strtoupper($encoding);
4872 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4873
4874
        $equivalences = [
4875 4
            'ISO8859'     => 'ISO-8859-1',
4876
            'ISO88591'    => 'ISO-8859-1',
4877
            'ISO'         => 'ISO-8859-1',
4878
            'LATIN'       => 'ISO-8859-1',
4879
            'LATIN1'      => 'ISO-8859-1', // Western European
4880
            'ISO88592'    => 'ISO-8859-2',
4881
            'LATIN2'      => 'ISO-8859-2', // Central European
4882
            'ISO88593'    => 'ISO-8859-3',
4883
            'LATIN3'      => 'ISO-8859-3', // Southern European
4884
            'ISO88594'    => 'ISO-8859-4',
4885
            'LATIN4'      => 'ISO-8859-4', // Northern European
4886
            'ISO88595'    => 'ISO-8859-5',
4887
            'ISO88596'    => 'ISO-8859-6', // Greek
4888
            'ISO88597'    => 'ISO-8859-7',
4889
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4890
            'ISO88599'    => 'ISO-8859-9',
4891
            'LATIN5'      => 'ISO-8859-9', // Turkish
4892
            'ISO885911'   => 'ISO-8859-11',
4893
            'TIS620'      => 'ISO-8859-11', // Thai
4894
            'ISO885910'   => 'ISO-8859-10',
4895
            'LATIN6'      => 'ISO-8859-10', // Nordic
4896
            'ISO885913'   => 'ISO-8859-13',
4897
            'LATIN7'      => 'ISO-8859-13', // Baltic
4898
            'ISO885914'   => 'ISO-8859-14',
4899
            'LATIN8'      => 'ISO-8859-14', // Celtic
4900
            'ISO885915'   => 'ISO-8859-15',
4901
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4902
            'ISO885916'   => 'ISO-8859-16',
4903
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4904
            'CP1250'      => 'WINDOWS-1250',
4905
            'WIN1250'     => 'WINDOWS-1250',
4906
            'WINDOWS1250' => 'WINDOWS-1250',
4907
            'CP1251'      => 'WINDOWS-1251',
4908
            'WIN1251'     => 'WINDOWS-1251',
4909
            'WINDOWS1251' => 'WINDOWS-1251',
4910
            'CP1252'      => 'WINDOWS-1252',
4911
            'WIN1252'     => 'WINDOWS-1252',
4912
            'WINDOWS1252' => 'WINDOWS-1252',
4913
            'CP1253'      => 'WINDOWS-1253',
4914
            'WIN1253'     => 'WINDOWS-1253',
4915
            'WINDOWS1253' => 'WINDOWS-1253',
4916
            'CP1254'      => 'WINDOWS-1254',
4917
            'WIN1254'     => 'WINDOWS-1254',
4918
            'WINDOWS1254' => 'WINDOWS-1254',
4919
            'CP1255'      => 'WINDOWS-1255',
4920
            'WIN1255'     => 'WINDOWS-1255',
4921
            'WINDOWS1255' => 'WINDOWS-1255',
4922
            'CP1256'      => 'WINDOWS-1256',
4923
            'WIN1256'     => 'WINDOWS-1256',
4924
            'WINDOWS1256' => 'WINDOWS-1256',
4925
            'CP1257'      => 'WINDOWS-1257',
4926
            'WIN1257'     => 'WINDOWS-1257',
4927
            'WINDOWS1257' => 'WINDOWS-1257',
4928
            'CP1258'      => 'WINDOWS-1258',
4929
            'WIN1258'     => 'WINDOWS-1258',
4930
            'WINDOWS1258' => 'WINDOWS-1258',
4931
            'UTF16'       => 'UTF-16',
4932
            'UTF32'       => 'UTF-32',
4933
            'UTF8'        => 'UTF-8',
4934
            'UTF'         => 'UTF-8',
4935
            'UTF7'        => 'UTF-7',
4936
            '8BIT'        => 'CP850',
4937
            'BINARY'      => 'CP850',
4938
        ];
4939
4940 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4941 3
            $encoding = $equivalences[$encoding_upper_helper];
4942
        }
4943
4944 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4945
4946 4
        return $encoding;
4947
    }
4948
4949
    /**
4950
     * Standardize line ending to unix-like.
4951
     *
4952
     * @param string          $str      <p>The input string.</p>
4953
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4954
     *                                  here.</p>
4955
     *
4956
     * @psalm-pure
4957
     *
4958
     * @return string
4959
     *                <p>A string with normalized line ending.</p>
4960
     */
4961
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4962
    {
4963 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4964
    }
4965
4966
    /**
4967
     * Normalize some MS Word special characters.
4968
     *
4969
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4970
     *
4971
     * @param string $str <p>The string to be normalized.</p>
4972
     *
4973
     * @psalm-pure
4974
     *
4975
     * @return string
4976
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4977
     */
4978
    public static function normalize_msword(string $str): string
4979
    {
4980 10
        return ASCII::normalize_msword($str);
4981
    }
4982
4983
    /**
4984
     * Normalize the whitespace.
4985
     *
4986
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4987
     *
4988
     * @param string $str                          <p>The string to be normalized.</p>
4989
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4990
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4991
     *                                             bidirectional text chars.</p>
4992
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4993
     *
4994
     * @psalm-pure
4995
     *
4996
     * @return string
4997
     *                <p>A string with normalized whitespace.</p>
4998
     */
4999
    public static function normalize_whitespace(
5000
        string $str,
5001
        bool $keep_non_breaking_space = false,
5002
        bool $keep_bidi_unicode_controls = false,
5003
        bool $normalize_control_characters = false
5004
    ): string {
5005 61
        return ASCII::normalize_whitespace(
5006 61
            $str,
5007 61
            $keep_non_breaking_space,
5008 61
            $keep_bidi_unicode_controls,
5009 61
            $normalize_control_characters
5010
        );
5011
    }
5012
5013
    /**
5014
     * Calculates Unicode code point of the given UTF-8 encoded character.
5015
     *
5016
     * INFO: opposite to UTF8::chr()
5017
     *
5018
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5019
     *
5020
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5021
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5022
     *
5023
     * @psalm-pure
5024
     *
5025
     * @return int
5026
     *             <p>Unicode code point of the given character,<br>
5027
     *             0 on invalid UTF-8 byte sequence</p>
5028
     */
5029
    public static function ord($chr, string $encoding = 'UTF-8'): int
5030
    {
5031
        /**
5032
         * @psalm-suppress ImpureStaticVariable
5033
         *
5034
         * @var array<string,int>
5035
         */
5036 27
        static $CHAR_CACHE = [];
5037
5038
        // init
5039 27
        $chr = (string) $chr;
5040
5041 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5042 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5043
        }
5044
5045 27
        $cache_key = $chr . '_' . $encoding;
5046 27
        if (isset($CHAR_CACHE[$cache_key])) {
5047 27
            return $CHAR_CACHE[$cache_key];
5048
        }
5049
5050
        // check again, if it's still not UTF-8
5051 11
        if ($encoding !== 'UTF-8') {
5052 3
            $chr = self::encode($encoding, $chr);
5053
        }
5054
5055 11
        if (self::$ORD === null) {
5056
            self::$ORD = self::getData('ord');
5057
        }
5058
5059 11
        if (isset(self::$ORD[$chr])) {
5060 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5061
        }
5062
5063
        //
5064
        // fallback via "IntlChar"
5065
        //
5066
5067 6
        if (self::$SUPPORT['intlChar'] === true) {
5068
            /** @noinspection PhpComposerExtensionStubsInspection */
5069 5
            $code = \IntlChar::ord($chr);
5070 5
            if ($code) {
5071 5
                return $CHAR_CACHE[$cache_key] = $code;
5072
            }
5073
        }
5074
5075
        //
5076
        // fallback via vanilla php
5077
        //
5078
5079
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5080 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5081
        /** @noinspection OffsetOperationsInspection */
5082 1
        $code = $chr ? $chr[1] : 0;
5083
5084
        /** @noinspection OffsetOperationsInspection */
5085 1
        if ($code >= 0xF0 && isset($chr[4])) {
5086
            /** @noinspection UnnecessaryCastingInspection */
5087
            /** @noinspection OffsetOperationsInspection */
5088
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5089
        }
5090
5091
        /** @noinspection OffsetOperationsInspection */
5092 1
        if ($code >= 0xE0 && isset($chr[3])) {
5093
            /** @noinspection UnnecessaryCastingInspection */
5094
            /** @noinspection OffsetOperationsInspection */
5095 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5096
        }
5097
5098
        /** @noinspection OffsetOperationsInspection */
5099 1
        if ($code >= 0xC0 && isset($chr[2])) {
5100
            /** @noinspection UnnecessaryCastingInspection */
5101
            /** @noinspection OffsetOperationsInspection */
5102 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5103
        }
5104
5105
        return $CHAR_CACHE[$cache_key] = $code;
5106
    }
5107
5108
    /**
5109
     * Parses the string into an array (into the the second parameter).
5110
     *
5111
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5112
     *          if the second parameter is not set!
5113
     *
5114
     * EXAMPLE: <code>
5115
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5116
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5117
     * </code>
5118
     *
5119
     * @see http://php.net/manual/en/function.parse-str.php
5120
     *
5121
     * @param string $str        <p>The input string.</p>
5122
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5123
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5124
     *
5125
     * @psalm-pure
5126
     *
5127
     * @return bool
5128
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5129
     */
5130
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5131
    {
5132 2
        if ($clean_utf8) {
5133 2
            $str = self::clean($str);
5134
        }
5135
5136 2
        if (self::$SUPPORT['mbstring'] === true) {
5137 2
            $return = \mb_parse_str($str, $result);
5138
5139 2
            return $return !== false && $result !== [];
5140
        }
5141
5142
        /**
5143
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5144
         */
5145
        \parse_str($str, $result);
5146
5147
        return $result !== [];
5148
    }
5149
5150
    /**
5151
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5152
     *
5153
     * @psalm-pure
5154
     *
5155
     * @return bool
5156
     *              <p>
5157
     *              <strong>true</strong> if support is available,<br>
5158
     *              <strong>false</strong> otherwise
5159
     *              </p>
5160
     */
5161
    public static function pcre_utf8_support(): bool
5162
    {
5163
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5164
        return (bool) @\preg_match('//u', '');
5165
    }
5166
5167
    /**
5168
     * Create an array containing a range of UTF-8 characters.
5169
     *
5170
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5171
     *
5172
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5173
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5174
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5175
     *                              "is_numeric"</p>
5176
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5177
     * @param float|int  $step      [optional] <p>
5178
     *                              If a step value is given, it will be used as the
5179
     *                              increment between elements in the sequence. step
5180
     *                              should be given as a positive number. If not specified,
5181
     *                              step will default to 1.
5182
     *                              </p>
5183
     *
5184
     * @psalm-pure
5185
     *
5186
     * @return string[]
5187
     */
5188
    public static function range(
5189
        $var1,
5190
        $var2,
5191
        bool $use_ctype = true,
5192
        string $encoding = 'UTF-8',
5193
        $step = 1
5194
    ): array {
5195 2
        if (!$var1 || !$var2) {
5196 2
            return [];
5197
        }
5198
5199 2
        if ($step !== 1) {
5200
            /**
5201
             * @psalm-suppress RedundantConditionGivenDocblockType
5202
             * @psalm-suppress DocblockTypeContradiction
5203
             */
5204 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5205
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5206
            }
5207
5208
            /**
5209
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5210
             */
5211 1
            if ($step <= 0) {
5212
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5213
            }
5214
        }
5215
5216 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5217
            throw new \RuntimeException('ext-ctype: is not installed');
5218
        }
5219
5220 2
        $is_digit = false;
5221 2
        $is_xdigit = false;
5222
5223
        /** @noinspection PhpComposerExtensionStubsInspection */
5224 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5225 2
            $is_digit = true;
5226 2
            $start = (int) $var1;
5227 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5228
            $is_xdigit = true;
5229
            $start = (int) self::hex_to_int((string) $var1);
5230 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5231 1
            $start = (int) $var1;
5232
        } else {
5233 2
            $start = self::ord((string) $var1);
5234
        }
5235
5236 2
        if (!$start) {
5237
            return [];
5238
        }
5239
5240 2
        if ($is_digit) {
5241 2
            $end = (int) $var2;
5242 2
        } elseif ($is_xdigit) {
5243
            $end = (int) self::hex_to_int((string) $var2);
5244 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5245 1
            $end = (int) $var2;
5246
        } else {
5247 2
            $end = self::ord((string) $var2);
5248
        }
5249
5250 2
        if (!$end) {
5251
            return [];
5252
        }
5253
5254 2
        $array = [];
5255 2
        foreach (\range($start, $end, $step) as $i) {
5256 2
            $array[] = (string) self::chr((int) $i, $encoding);
5257
        }
5258
5259 2
        return $array;
5260
    }
5261
5262
    /**
5263
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5264
     *
5265
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5266
     *
5267
     * e.g:
5268
     * 'test+test'                     => 'test+test'
5269
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5270
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5271
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5272
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5273
     * 'Düsseldorf'                   => 'Düsseldorf'
5274
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5275
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5276
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5277
     *
5278
     * @param string $str          <p>The input string.</p>
5279
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5280
     *
5281
     * @psalm-pure
5282
     *
5283
     * @return string
5284
     *                <p>The decoded URL, as a string.</p>
5285
     */
5286
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5287
    {
5288 7
        if ($str === '') {
5289 4
            return '';
5290
        }
5291
5292
        if (
5293 7
            \strpos($str, '&') === false
5294
            &&
5295 7
            \strpos($str, '%') === false
5296
            &&
5297 7
            \strpos($str, '+') === false
5298
            &&
5299 7
            \strpos($str, '\u') === false
5300
        ) {
5301 4
            return self::fix_simple_utf8($str);
5302
        }
5303
5304 7
        $str = self::urldecode_unicode_helper($str);
5305
5306 7
        if ($multi_decode) {
5307
            do {
5308 6
                $str_compare = $str;
5309
5310
                /**
5311
                 * @psalm-suppress PossiblyInvalidArgument
5312
                 */
5313 6
                $str = self::fix_simple_utf8(
5314 6
                    \rawurldecode(
5315 6
                        self::html_entity_decode(
5316 6
                            self::to_utf8($str),
5317 6
                            \ENT_QUOTES | \ENT_HTML5
5318
                        )
5319
                    )
5320
                );
5321 6
            } while ($str_compare !== $str);
5322
        } else {
5323
            /**
5324
             * @psalm-suppress PossiblyInvalidArgument
5325
             */
5326 1
            $str = self::fix_simple_utf8(
5327 1
                \rawurldecode(
5328 1
                    self::html_entity_decode(
5329 1
                        self::to_utf8($str),
5330 1
                        \ENT_QUOTES | \ENT_HTML5
5331
                    )
5332
                )
5333
            );
5334
        }
5335
5336 7
        return $str;
5337
    }
5338
5339
    /**
5340
     * Replaces all occurrences of $pattern in $str by $replacement.
5341
     *
5342
     * @param string $str         <p>The input string.</p>
5343
     * @param string $pattern     <p>The regular expression pattern.</p>
5344
     * @param string $replacement <p>The string to replace with.</p>
5345
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5346
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5347
     *
5348
     * @psalm-pure
5349
     *
5350
     * @return string
5351
     */
5352
    public static function regex_replace(
5353
        string $str,
5354
        string $pattern,
5355
        string $replacement,
5356
        string $options = '',
5357
        string $delimiter = '/'
5358
    ): string {
5359 18
        if ($options === 'msr') {
5360 9
            $options = 'ms';
5361
        }
5362
5363
        // fallback
5364 18
        if (!$delimiter) {
5365
            $delimiter = '/';
5366
        }
5367
5368 18
        return (string) \preg_replace(
5369 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5370 18
            $replacement,
5371 18
            $str
5372
        );
5373
    }
5374
5375
    /**
5376
     * alias for "UTF8::remove_bom()"
5377
     *
5378
     * @param string $str
5379
     *
5380
     * @psalm-pure
5381
     *
5382
     * @return string
5383
     *
5384
     * @see        UTF8::remove_bom()
5385
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5386
     */
5387
    public static function removeBOM(string $str): string
5388
    {
5389 1
        return self::remove_bom($str);
5390
    }
5391
5392
    /**
5393
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5394
     *
5395
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5396
     *
5397
     * @param string $str <p>The input string.</p>
5398
     *
5399
     * @psalm-pure
5400
     *
5401
     * @return string
5402
     *                <p>A string without UTF-BOM.</p>
5403
     */
5404
    public static function remove_bom(string $str): string
5405
    {
5406 55
        if ($str === '') {
5407 9
            return '';
5408
        }
5409
5410 55
        $str_length = \strlen($str);
5411 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5412 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5413
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5414 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5415 11
                if ($str_tmp === false) {
5416
                    return '';
5417
                }
5418
5419 11
                $str_length -= (int) $bom_byte_length;
5420
5421 55
                $str = (string) $str_tmp;
5422
            }
5423
        }
5424
5425 55
        return $str;
5426
    }
5427
5428
    /**
5429
     * Removes duplicate occurrences of a string in another string.
5430
     *
5431
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5432
     *
5433
     * @param string          $str  <p>The base string.</p>
5434
     * @param string|string[] $what <p>String to search for in the base string.</p>
5435
     *
5436
     * @psalm-pure
5437
     *
5438
     * @return string
5439
     *                <p>A string with removed duplicates.</p>
5440
     */
5441
    public static function remove_duplicates(string $str, $what = ' '): string
5442
    {
5443 2
        if (\is_string($what)) {
5444 2
            $what = [$what];
5445
        }
5446
5447
        /**
5448
         * @psalm-suppress RedundantConditionGivenDocblockType
5449
         */
5450 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5451 2
            foreach ($what as $item) {
5452 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5453
            }
5454
        }
5455
5456 2
        return $str;
5457
    }
5458
5459
    /**
5460
     * Remove html via "strip_tags()" from the string.
5461
     *
5462
     * @param string $str            <p>The input string.</p>
5463
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5464
     *                               should not be stripped. Default: null
5465
     *                               </p>
5466
     *
5467
     * @psalm-pure
5468
     *
5469
     * @return string
5470
     *                <p>A string with without html tags.</p>
5471
     */
5472
    public static function remove_html(string $str, string $allowable_tags = ''): string
5473
    {
5474 6
        return \strip_tags($str, $allowable_tags);
5475
    }
5476
5477
    /**
5478
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5479
     *
5480
     * @param string $str         <p>The input string.</p>
5481
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5482
     *
5483
     * @psalm-pure
5484
     *
5485
     * @return string
5486
     *                <p>A string without breaks.</p>
5487
     */
5488
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5489
    {
5490 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5491
    }
5492
5493
    /**
5494
     * Remove invisible characters from a string.
5495
     *
5496
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5497
     *
5498
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5499
     *
5500
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5501
     *
5502
     * @param string $str                           <p>The input string.</p>
5503
     * @param bool   $url_encoded                   [optional] <p>
5504
     *                                              Try to remove url encoded control character.
5505
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5506
     *                                              <br>
5507
     *                                              Default: false
5508
     *                                              </p>
5509
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5510
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5511
     *
5512
     * @psalm-pure
5513
     *
5514
     * @return string
5515
     *                <p>A string without invisible chars.</p>
5516
     */
5517
    public static function remove_invisible_characters(
5518
        string $str,
5519
        bool $url_encoded = false,
5520
        string $replacement = '',
5521
        bool $keep_basic_control_characters = true
5522
    ): string {
5523 92
        return ASCII::remove_invisible_characters(
5524 92
            $str,
5525 92
            $url_encoded,
5526 92
            $replacement,
5527 92
            $keep_basic_control_characters
5528
        );
5529
    }
5530
5531
    /**
5532
     * Returns a new string with the prefix $substring removed, if present.
5533
     *
5534
     * @param string $str       <p>The input string.</p>
5535
     * @param string $substring <p>The prefix to remove.</p>
5536
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5537
     *
5538
     * @psalm-pure
5539
     *
5540
     * @return string
5541
     *                <p>A string without the prefix $substring.</p>
5542
     */
5543
    public static function remove_left(
5544
        string $str,
5545
        string $substring,
5546
        string $encoding = 'UTF-8'
5547
    ): string {
5548
        if (
5549 12
            $substring
5550
            &&
5551 12
            \strpos($str, $substring) === 0
5552
        ) {
5553 6
            if ($encoding === 'UTF-8') {
5554 4
                return (string) \mb_substr(
5555 4
                    $str,
5556 4
                    (int) \mb_strlen($substring)
5557
                );
5558
            }
5559
5560 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5561
5562 2
            return (string) self::substr(
5563 2
                $str,
5564 2
                (int) self::strlen($substring, $encoding),
5565 2
                null,
5566 2
                $encoding
5567
            );
5568
        }
5569
5570 6
        return $str;
5571
    }
5572
5573
    /**
5574
     * Returns a new string with the suffix $substring removed, if present.
5575
     *
5576
     * @param string $str
5577
     * @param string $substring <p>The suffix to remove.</p>
5578
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5579
     *
5580
     * @psalm-pure
5581
     *
5582
     * @return string
5583
     *                <p>A string having a $str without the suffix $substring.</p>
5584
     */
5585
    public static function remove_right(
5586
        string $str,
5587
        string $substring,
5588
        string $encoding = 'UTF-8'
5589
    ): string {
5590 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5591 6
            if ($encoding === 'UTF-8') {
5592 4
                return (string) \mb_substr(
5593 4
                    $str,
5594 4
                    0,
5595 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5596
                );
5597
            }
5598
5599 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5600
5601 2
            return (string) self::substr(
5602 2
                $str,
5603 2
                0,
5604 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5605 2
                $encoding
5606
            );
5607
        }
5608
5609 6
        return $str;
5610
    }
5611
5612
    /**
5613
     * Replaces all occurrences of $search in $str by $replacement.
5614
     *
5615
     * @param string $str            <p>The input string.</p>
5616
     * @param string $search         <p>The needle to search for.</p>
5617
     * @param string $replacement    <p>The string to replace with.</p>
5618
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5619
     *
5620
     * @psalm-pure
5621
     *
5622
     * @return string
5623
     *                <p>A string with replaced parts.</p>
5624
     */
5625
    public static function replace(
5626
        string $str,
5627
        string $search,
5628
        string $replacement,
5629
        bool $case_sensitive = true
5630
    ): string {
5631 29
        if ($case_sensitive) {
5632 22
            return \str_replace($search, $replacement, $str);
5633
        }
5634
5635 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5636
    }
5637
5638
    /**
5639
     * Replaces all occurrences of $search in $str by $replacement.
5640
     *
5641
     * @param string       $str            <p>The input string.</p>
5642
     * @param array        $search         <p>The elements to search for.</p>
5643
     * @param array|string $replacement    <p>The string to replace with.</p>
5644
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5645
     *
5646
     * @psalm-pure
5647
     *
5648
     * @return string
5649
     *                <p>A string with replaced parts.</p>
5650
     */
5651
    public static function replace_all(
5652
        string $str,
5653
        array $search,
5654
        $replacement,
5655
        bool $case_sensitive = true
5656
    ): string {
5657 30
        if ($case_sensitive) {
5658 23
            return \str_replace($search, $replacement, $str);
5659
        }
5660
5661 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5662
    }
5663
5664
    /**
5665
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5666
     *
5667
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5668
     *
5669
     * @param string $str                        <p>The input string</p>
5670
     * @param string $replacement_char           <p>The replacement character.</p>
5671
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5672
     *
5673
     * @psalm-pure
5674
     *
5675
     * @return string
5676
     *                <p>A string without diamond question marks (�).</p>
5677
     */
5678
    public static function replace_diamond_question_mark(
5679
        string $str,
5680
        string $replacement_char = '',
5681
        bool $process_invalid_utf8_chars = true
5682
    ): string {
5683 35
        if ($str === '') {
5684 9
            return '';
5685
        }
5686
5687 35
        if ($process_invalid_utf8_chars) {
5688 35
            if ($replacement_char === '') {
5689 35
                $replacement_char_helper = 'none';
5690
            } else {
5691 2
                $replacement_char_helper = \ord($replacement_char);
5692
            }
5693
5694 35
            if (self::$SUPPORT['mbstring'] === false) {
5695
                // if there is no native support for "mbstring",
5696
                // then we need to clean the string before ...
5697
                $str = self::clean($str);
5698
            }
5699
5700
            /**
5701
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5702
             */
5703 35
            $save = \mb_substitute_character();
5704
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5705 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5705
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5706
            // the polyfill maybe return false, so cast to string
5707 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5708 35
            \mb_substitute_character($save);
5709
        }
5710
5711 35
        return \str_replace(
5712
            [
5713 35
                "\xEF\xBF\xBD",
5714
                '�',
5715
            ],
5716
            [
5717 35
                $replacement_char,
5718 35
                $replacement_char,
5719
            ],
5720 35
            $str
5721
        );
5722
    }
5723
5724
    /**
5725
     * Strip whitespace or other characters from the end of a UTF-8 string.
5726
     *
5727
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5728
     *
5729
     * @param string      $str   <p>The string to be trimmed.</p>
5730
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5731
     *
5732
     * @psalm-pure
5733
     *
5734
     * @return string
5735
     *                <p>A string with unwanted characters stripped from the right.</p>
5736
     */
5737
    public static function rtrim(string $str = '', string $chars = null): string
5738
    {
5739 21
        if ($str === '') {
5740 3
            return '';
5741
        }
5742
5743 20
        if (self::$SUPPORT['mbstring'] === true) {
5744 20
            if ($chars !== null) {
5745
                /** @noinspection PregQuoteUsageInspection */
5746 9
                $chars = \preg_quote($chars);
5747 9
                $pattern = "[${chars}]+$";
5748
            } else {
5749 14
                $pattern = '[\\s]+$';
5750
            }
5751
5752
            /** @noinspection PhpComposerExtensionStubsInspection */
5753 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5754
        }
5755
5756
        if ($chars !== null) {
5757
            $chars = \preg_quote($chars, '/');
5758
            $pattern = "[${chars}]+$";
5759
        } else {
5760
            $pattern = '[\\s]+$';
5761
        }
5762
5763
        return self::regex_replace($str, $pattern, '');
5764
    }
5765
5766
    /**
5767
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5768
     *
5769
     * @param bool $useEcho
5770
     *
5771
     * @psalm-pure
5772
     *
5773
     * @return string|void
5774
     */
5775
    public static function showSupport(bool $useEcho = true)
5776
    {
5777
        // init
5778 2
        $html = '';
5779
5780 2
        $html .= '<pre>';
5781
        /** @noinspection AlterInForeachInspection */
5782 2
        foreach (self::$SUPPORT as $key => &$value) {
5783 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5784
        }
5785 2
        $html .= '</pre>';
5786
5787 2
        if ($useEcho) {
5788 1
            echo $html;
5789
        }
5790
5791 2
        return $html;
5792
    }
5793
5794
    /**
5795
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5796
     *
5797
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5798
     *
5799
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5800
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5801
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5802
     *
5803
     * @psalm-pure
5804
     *
5805
     * @return string
5806
     *                <p>The HTML numbered entity for the given character.</p>
5807
     */
5808
    public static function single_chr_html_encode(
5809
        string $char,
5810
        bool $keep_ascii_chars = false,
5811
        string $encoding = 'UTF-8'
5812
    ): string {
5813 2
        if ($char === '') {
5814 2
            return '';
5815
        }
5816
5817
        if (
5818 2
            $keep_ascii_chars
5819
            &&
5820 2
            ASCII::is_ascii($char)
5821
        ) {
5822 2
            return $char;
5823
        }
5824
5825 2
        return '&#' . self::ord($char, $encoding) . ';';
5826
    }
5827
5828
    /**
5829
     * @param string $str
5830
     * @param int    $tab_length
5831
     *
5832
     * @psalm-pure
5833
     *
5834
     * @return string
5835
     */
5836
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5837
    {
5838 5
        if ($tab_length === 4) {
5839 3
            $tab = '    ';
5840 2
        } elseif ($tab_length === 2) {
5841 1
            $tab = '  ';
5842
        } else {
5843 1
            $tab = \str_repeat(' ', $tab_length);
5844
        }
5845
5846 5
        return \str_replace($tab, "\t", $str);
5847
    }
5848
5849
    /**
5850
     * alias for "UTF8::str_split()"
5851
     *
5852
     * @param int|string $str
5853
     * @param int        $length
5854
     * @param bool       $clean_utf8
5855
     *
5856
     * @psalm-pure
5857
     *
5858
     * @return string[]
5859
     *
5860
     * @see        UTF8::str_split()
5861
     * @deprecated <p>please use "UTF8::str_split()"</p>
5862
     */
5863
    public static function split(
5864
        $str,
5865
        int $length = 1,
5866
        bool $clean_utf8 = false
5867
    ): array {
5868
        /** @var string[] */
5869 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5870
    }
5871
5872
    /**
5873
     * alias for "UTF8::str_starts_with()"
5874
     *
5875
     * @param string $haystack
5876
     * @param string $needle
5877
     *
5878
     * @psalm-pure
5879
     *
5880
     * @return bool
5881
     *
5882
     * @see        UTF8::str_starts_with()
5883
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5884
     */
5885
    public static function str_begins(string $haystack, string $needle): bool
5886
    {
5887 1
        return self::str_starts_with($haystack, $needle);
5888
    }
5889
5890
    /**
5891
     * Returns a camelCase version of the string. Trims surrounding spaces,
5892
     * capitalizes letters following digits, spaces, dashes and underscores,
5893
     * and removes spaces, dashes, as well as underscores.
5894
     *
5895
     * @param string      $str                           <p>The input string.</p>
5896
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5897
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5898
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5899
     *                                                   tr</p>
5900
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5901
     *                                                   -> ß</p>
5902
     *
5903
     * @psalm-pure
5904
     *
5905
     * @return string
5906
     */
5907
    public static function str_camelize(
5908
        string $str,
5909
        string $encoding = 'UTF-8',
5910
        bool $clean_utf8 = false,
5911
        string $lang = null,
5912
        bool $try_to_keep_the_string_length = false
5913
    ): string {
5914 32
        if ($clean_utf8) {
5915
            $str = self::clean($str);
5916
        }
5917
5918 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5919 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5920
        }
5921
5922 32
        $str = self::lcfirst(
5923 32
            \trim($str),
5924 32
            $encoding,
5925 32
            false,
5926 32
            $lang,
5927 32
            $try_to_keep_the_string_length
5928
        );
5929 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5930
5931 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5932
5933 32
        $str = (string) \preg_replace_callback(
5934 32
            '/[-_\\s]+(.)?/u',
5935
            /**
5936
             * @param array $match
5937
             *
5938
             * @psalm-pure
5939
             *
5940
             * @return string
5941
             */
5942
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5943 27
                if (isset($match[1])) {
5944 27
                    if ($use_mb_functions) {
5945 27
                        if ($encoding === 'UTF-8') {
5946 27
                            return \mb_strtoupper($match[1]);
5947
                        }
5948
5949
                        return \mb_strtoupper($match[1], $encoding);
5950
                    }
5951
5952
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5953
                }
5954
5955 1
                return '';
5956 32
            },
5957 32
            $str
5958
        );
5959
5960 32
        return (string) \preg_replace_callback(
5961 32
            '/[\\p{N}]+(.)?/u',
5962
            /**
5963
             * @param array $match
5964
             *
5965
             * @psalm-pure
5966
             *
5967
             * @return string
5968
             */
5969
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5970 6
                if ($use_mb_functions) {
5971 6
                    if ($encoding === 'UTF-8') {
5972 6
                        return \mb_strtoupper($match[0]);
5973
                    }
5974
5975
                    return \mb_strtoupper($match[0], $encoding);
5976
                }
5977
5978
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5979 32
            },
5980 32
            $str
5981
        );
5982
    }
5983
5984
    /**
5985
     * Returns the string with the first letter of each word capitalized,
5986
     * except for when the word is a name which shouldn't be capitalized.
5987
     *
5988
     * @param string $str
5989
     *
5990
     * @psalm-pure
5991
     *
5992
     * @return string
5993
     *                <p>A string with $str capitalized.</p>
5994
     */
5995
    public static function str_capitalize_name(string $str): string
5996
    {
5997 1
        return self::str_capitalize_name_helper(
5998 1
            self::str_capitalize_name_helper(
5999 1
                self::collapse_whitespace($str),
6000 1
                ' '
6001
            ),
6002 1
            '-'
6003
        );
6004
    }
6005
6006
    /**
6007
     * Returns true if the string contains $needle, false otherwise. By default
6008
     * the comparison is case-sensitive, but can be made insensitive by setting
6009
     * $case_sensitive to false.
6010
     *
6011
     * @param string $haystack       <p>The input string.</p>
6012
     * @param string $needle         <p>Substring to look for.</p>
6013
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6014
     *
6015
     * @psalm-pure
6016
     *
6017
     * @return bool
6018
     *              <p>Whether or not $haystack contains $needle.</p>
6019
     */
6020
    public static function str_contains(
6021
        string $haystack,
6022
        string $needle,
6023
        bool $case_sensitive = true
6024
    ): bool {
6025 21
        if ($case_sensitive) {
6026 11
            if (\PHP_VERSION_ID >= 80000) {
6027
                /** @phpstan-ignore-next-line - only for PHP8 */
6028
                return \str_contains($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_contains was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6028
                return /** @scrutinizer ignore-call */ \str_contains($haystack, $needle);
Loading history...
6029
            }
6030
6031 11
            return \strpos($haystack, $needle) !== false;
6032
        }
6033
6034 10
        return \mb_stripos($haystack, $needle) !== false;
6035
    }
6036
6037
    /**
6038
     * Returns true if the string contains all $needles, false otherwise. By
6039
     * default the comparison is case-sensitive, but can be made insensitive by
6040
     * setting $case_sensitive to false.
6041
     *
6042
     * @param string $haystack       <p>The input string.</p>
6043
     * @param array  $needles        <p>SubStrings to look for.</p>
6044
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6045
     *
6046
     * @psalm-pure
6047
     *
6048
     * @return bool
6049
     *              <p>Whether or not $haystack contains $needle.</p>
6050
     */
6051
    public static function str_contains_all(
6052
        string $haystack,
6053
        array $needles,
6054
        bool $case_sensitive = true
6055
    ): bool {
6056 45
        if ($haystack === '' || $needles === []) {
6057 1
            return false;
6058
        }
6059
6060
        /** @noinspection LoopWhichDoesNotLoopInspection */
6061 44
        foreach ($needles as &$needle) {
6062 44
            if ($case_sensitive) {
6063
                /** @noinspection NestedPositiveIfStatementsInspection */
6064 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6065 12
                    return false;
6066
                }
6067
            }
6068
6069 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6070 33
                return false;
6071
            }
6072
        }
6073
6074 24
        return true;
6075
    }
6076
6077
    /**
6078
     * Returns true if the string contains any $needles, false otherwise. By
6079
     * default the comparison is case-sensitive, but can be made insensitive by
6080
     * setting $case_sensitive to false.
6081
     *
6082
     * @param string $haystack       <p>The input string.</p>
6083
     * @param array  $needles        <p>SubStrings to look for.</p>
6084
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6085
     *
6086
     * @psalm-pure
6087
     *
6088
     * @return bool
6089
     *              <p>Whether or not $str contains $needle.</p>
6090
     */
6091
    public static function str_contains_any(
6092
        string $haystack,
6093
        array $needles,
6094
        bool $case_sensitive = true
6095
    ): bool {
6096 46
        if ($haystack === '' || $needles === []) {
6097 1
            return false;
6098
        }
6099
6100
        /** @noinspection LoopWhichDoesNotLoopInspection */
6101 45
        foreach ($needles as &$needle) {
6102 45
            if (!$needle) {
6103
                continue;
6104
            }
6105
6106 45
            if ($case_sensitive) {
6107 25
                if (\strpos($haystack, $needle) !== false) {
6108 14
                    return true;
6109
                }
6110
6111 13
                continue;
6112
            }
6113
6114 20
            if (\mb_stripos($haystack, $needle) !== false) {
6115 20
                return true;
6116
            }
6117
        }
6118
6119 19
        return false;
6120
    }
6121
6122
    /**
6123
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6124
     * inserted before uppercase characters (with the exception of the first
6125
     * character of the string), and in place of spaces as well as underscores.
6126
     *
6127
     * @param string $str      <p>The input string.</p>
6128
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6129
     *
6130
     * @psalm-pure
6131
     *
6132
     * @return string
6133
     */
6134
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6135
    {
6136 19
        return self::str_delimit($str, '-', $encoding);
6137
    }
6138
6139
    /**
6140
     * Returns a lowercase and trimmed string separated by the given delimiter.
6141
     * Delimiters are inserted before uppercase characters (with the exception
6142
     * of the first character of the string), and in place of spaces, dashes,
6143
     * and underscores. Alpha delimiters are not converted to lowercase.
6144
     *
6145
     * @param string      $str                           <p>The input string.</p>
6146
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6147
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6148
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6149
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6150
     *                                                   tr</p>
6151
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6152
     *                                                   ß</p>
6153
     *
6154
     * @psalm-pure
6155
     *
6156
     * @return string
6157
     */
6158
    public static function str_delimit(
6159
        string $str,
6160
        string $delimiter,
6161
        string $encoding = 'UTF-8',
6162
        bool $clean_utf8 = false,
6163
        string $lang = null,
6164
        bool $try_to_keep_the_string_length = false
6165
    ): string {
6166 49
        if (self::$SUPPORT['mbstring'] === true) {
6167
            /** @noinspection PhpComposerExtensionStubsInspection */
6168 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6169
6170 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6171 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6172 22
                $str = \mb_strtolower($str);
6173
            } else {
6174 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6175
            }
6176
6177
            /** @noinspection PhpComposerExtensionStubsInspection */
6178 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6179
        }
6180
6181
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6182
6183
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6184
        if ($use_mb_functions && $encoding === 'UTF-8') {
6185
            $str = \mb_strtolower($str);
6186
        } else {
6187
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6188
        }
6189
6190
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6191
    }
6192
6193
    /**
6194
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6195
     *
6196
     * EXAMPLE: <code>
6197
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6198
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6199
     * </code>
6200
     *
6201
     * @param string $str <p>The input string.</p>
6202
     *
6203
     * @psalm-pure
6204
     *
6205
     * @return false|string
6206
     *                      <p>
6207
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6208
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6209
     *                      </p>
6210
     */
6211
    public static function str_detect_encoding($str)
6212
    {
6213
        // init
6214 31
        $str = (string) $str;
6215
6216
        //
6217
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6218
        //
6219
6220 31
        if (self::is_binary($str, true)) {
6221 11
            $is_utf32 = self::is_utf32($str, false);
6222 11
            if ($is_utf32 === 1) {
6223
                return 'UTF-32LE';
6224
            }
6225 11
            if ($is_utf32 === 2) {
6226 1
                return 'UTF-32BE';
6227
            }
6228
6229 11
            $is_utf16 = self::is_utf16($str, false);
6230 11
            if ($is_utf16 === 1) {
6231 3
                return 'UTF-16LE';
6232
            }
6233 11
            if ($is_utf16 === 2) {
6234 2
                return 'UTF-16BE';
6235
            }
6236
6237
            // is binary but not "UTF-16" or "UTF-32"
6238 9
            return false;
6239
        }
6240
6241
        //
6242
        // 2.) simple check for ASCII chars
6243
        //
6244
6245 27
        if (ASCII::is_ascii($str)) {
6246 10
            return 'ASCII';
6247
        }
6248
6249
        //
6250
        // 3.) simple check for UTF-8 chars
6251
        //
6252
6253 27
        if (self::is_utf8_string($str)) {
6254 19
            return 'UTF-8';
6255
        }
6256
6257
        //
6258
        // 4.) check via "mb_detect_encoding()"
6259
        //
6260
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6261
6262
        $encoding_detecting_order = [
6263 16
            'ISO-8859-1',
6264
            'ISO-8859-2',
6265
            'ISO-8859-3',
6266
            'ISO-8859-4',
6267
            'ISO-8859-5',
6268
            'ISO-8859-6',
6269
            'ISO-8859-7',
6270
            'ISO-8859-8',
6271
            'ISO-8859-9',
6272
            'ISO-8859-10',
6273
            'ISO-8859-13',
6274
            'ISO-8859-14',
6275
            'ISO-8859-15',
6276
            'ISO-8859-16',
6277
            'WINDOWS-1251',
6278
            'WINDOWS-1252',
6279
            'WINDOWS-1254',
6280
            'CP932',
6281
            'CP936',
6282
            'CP950',
6283
            'CP866',
6284
            'CP850',
6285
            'CP51932',
6286
            'CP50220',
6287
            'CP50221',
6288
            'CP50222',
6289
            'ISO-2022-JP',
6290
            'ISO-2022-KR',
6291
            'JIS',
6292
            'JIS-ms',
6293
            'EUC-CN',
6294
            'EUC-JP',
6295
        ];
6296
6297 16
        if (self::$SUPPORT['mbstring'] === true) {
6298
            // info: do not use the symfony polyfill here
6299 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6300 16
            if ($encoding) {
6301 16
                return $encoding;
6302
            }
6303
        }
6304
6305
        //
6306
        // 5.) check via "iconv()"
6307
        //
6308
6309
        if (self::$ENCODINGS === null) {
6310
            self::$ENCODINGS = self::getData('encodings');
6311
        }
6312
6313
        foreach (self::$ENCODINGS as $encoding_tmp) {
6314
            // INFO: //IGNORE but still throw notice
6315
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6316
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6317
                return $encoding_tmp;
6318
            }
6319
        }
6320
6321
        return false;
6322
    }
6323
6324
    /**
6325
     * alias for "UTF8::str_ends_with()"
6326
     *
6327
     * @param string $haystack
6328
     * @param string $needle
6329
     *
6330
     * @psalm-pure
6331
     *
6332
     * @return bool
6333
     *
6334
     * @see        UTF8::str_ends_with()
6335
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6336
     */
6337
    public static function str_ends(string $haystack, string $needle): bool
6338
    {
6339 1
        return self::str_ends_with($haystack, $needle);
6340
    }
6341
6342
    /**
6343
     * Check if the string ends with the given substring.
6344
     *
6345
     * EXAMPLE: <code>
6346
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6347
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6348
     * </code>
6349
     *
6350
     * @param string $haystack <p>The string to search in.</p>
6351
     * @param string $needle   <p>The substring to search for.</p>
6352
     *
6353
     * @psalm-pure
6354
     *
6355
     * @return bool
6356
     */
6357
    public static function str_ends_with(string $haystack, string $needle): bool
6358
    {
6359 9
        if ($needle === '') {
6360 2
            return true;
6361
        }
6362
6363 9
        if ($haystack === '') {
6364
            return false;
6365
        }
6366
6367 9
        if (\PHP_VERSION_ID >= 80000) {
6368
            /** @phpstan-ignore-next-line - only for PHP8 */
6369
            return \str_ends_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_ends_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6369
            return /** @scrutinizer ignore-call */ \str_ends_with($haystack, $needle);
Loading history...
6370
        }
6371
6372 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6373
    }
6374
6375
    /**
6376
     * Returns true if the string ends with any of $substrings, false otherwise.
6377
     *
6378
     * - case-sensitive
6379
     *
6380
     * @param string   $str        <p>The input string.</p>
6381
     * @param string[] $substrings <p>Substrings to look for.</p>
6382
     *
6383
     * @psalm-pure
6384
     *
6385
     * @return bool
6386
     *              <p>Whether or not $str ends with $substring.</p>
6387
     */
6388
    public static function str_ends_with_any(string $str, array $substrings): bool
6389
    {
6390 7
        if ($substrings === []) {
6391
            return false;
6392
        }
6393
6394 7
        foreach ($substrings as &$substring) {
6395 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6396 7
                return true;
6397
            }
6398
        }
6399
6400 6
        return false;
6401
    }
6402
6403
    /**
6404
     * Ensures that the string begins with $substring. If it doesn't, it's
6405
     * prepended.
6406
     *
6407
     * @param string $str       <p>The input string.</p>
6408
     * @param string $substring <p>The substring to add if not present.</p>
6409
     *
6410
     * @psalm-pure
6411
     *
6412
     * @return string
6413
     */
6414
    public static function str_ensure_left(string $str, string $substring): string
6415
    {
6416
        if (
6417 10
            $substring !== ''
6418
            &&
6419 10
            \strpos($str, $substring) === 0
6420
        ) {
6421 6
            return $str;
6422
        }
6423
6424 4
        return $substring . $str;
6425
    }
6426
6427
    /**
6428
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6429
     *
6430
     * @param string $str       <p>The input string.</p>
6431
     * @param string $substring <p>The substring to add if not present.</p>
6432
     *
6433
     * @psalm-pure
6434
     *
6435
     * @return string
6436
     */
6437
    public static function str_ensure_right(string $str, string $substring): string
6438
    {
6439
        if (
6440 10
            $str === ''
6441
            ||
6442 10
            $substring === ''
6443
            ||
6444 10
            \substr($str, -\strlen($substring)) !== $substring
6445
        ) {
6446 4
            $str .= $substring;
6447
        }
6448
6449 10
        return $str;
6450
    }
6451
6452
    /**
6453
     * Capitalizes the first word of the string, replaces underscores with
6454
     * spaces, and strips '_id'.
6455
     *
6456
     * @param string $str
6457
     *
6458
     * @psalm-pure
6459
     *
6460
     * @return string
6461
     */
6462
    public static function str_humanize($str): string
6463
    {
6464 3
        $str = \str_replace(
6465
            [
6466 3
                '_id',
6467
                '_',
6468
            ],
6469
            [
6470 3
                '',
6471
                ' ',
6472
            ],
6473 3
            $str
6474
        );
6475
6476 3
        return self::ucfirst(\trim($str));
6477
    }
6478
6479
    /**
6480
     * alias for "UTF8::str_istarts_with()"
6481
     *
6482
     * @param string $haystack
6483
     * @param string $needle
6484
     *
6485
     * @psalm-pure
6486
     *
6487
     * @return bool
6488
     *
6489
     * @see        UTF8::str_istarts_with()
6490
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6491
     */
6492
    public static function str_ibegins(string $haystack, string $needle): bool
6493
    {
6494 1
        return self::str_istarts_with($haystack, $needle);
6495
    }
6496
6497
    /**
6498
     * alias for "UTF8::str_iends_with()"
6499
     *
6500
     * @param string $haystack
6501
     * @param string $needle
6502
     *
6503
     * @psalm-pure
6504
     *
6505
     * @return bool
6506
     *
6507
     * @see        UTF8::str_iends_with()
6508
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6509
     */
6510
    public static function str_iends(string $haystack, string $needle): bool
6511
    {
6512 1
        return self::str_iends_with($haystack, $needle);
6513
    }
6514
6515
    /**
6516
     * Check if the string ends with the given substring, case-insensitive.
6517
     *
6518
     * EXAMPLE: <code>
6519
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6520
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6521
     * </code>
6522
     *
6523
     * @param string $haystack <p>The string to search in.</p>
6524
     * @param string $needle   <p>The substring to search for.</p>
6525
     *
6526
     * @psalm-pure
6527
     *
6528
     * @return bool
6529
     */
6530
    public static function str_iends_with(string $haystack, string $needle): bool
6531
    {
6532 12
        if ($needle === '') {
6533 2
            return true;
6534
        }
6535
6536 12
        if ($haystack === '') {
6537
            return false;
6538
        }
6539
6540 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6541
    }
6542
6543
    /**
6544
     * Returns true if the string ends with any of $substrings, false otherwise.
6545
     *
6546
     * - case-insensitive
6547
     *
6548
     * @param string   $str        <p>The input string.</p>
6549
     * @param string[] $substrings <p>Substrings to look for.</p>
6550
     *
6551
     * @psalm-pure
6552
     *
6553
     * @return bool
6554
     *              <p>Whether or not $str ends with $substring.</p>
6555
     */
6556
    public static function str_iends_with_any(string $str, array $substrings): bool
6557
    {
6558 4
        if ($substrings === []) {
6559
            return false;
6560
        }
6561
6562 4
        foreach ($substrings as &$substring) {
6563 4
            if (self::str_iends_with($str, $substring)) {
6564 4
                return true;
6565
            }
6566
        }
6567
6568
        return false;
6569
    }
6570
6571
    /**
6572
     * Returns the index of the first occurrence of $needle in the string,
6573
     * and false if not found. Accepts an optional offset from which to begin
6574
     * the search.
6575
     *
6576
     * @param string $str      <p>The input string.</p>
6577
     * @param string $needle   <p>Substring to look for.</p>
6578
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6579
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6580
     *
6581
     * @psalm-pure
6582
     *
6583
     * @return false|int
6584
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6585
     *
6586
     * @see        UTF8::stripos()
6587
     * @deprecated <p>please use "UTF8::stripos()"</p>
6588
     */
6589
    public static function str_iindex_first(
6590
        string $str,
6591
        string $needle,
6592
        int $offset = 0,
6593
        string $encoding = 'UTF-8'
6594
    ) {
6595 1
        return self::stripos(
6596 1
            $str,
6597 1
            $needle,
6598 1
            $offset,
6599 1
            $encoding
6600
        );
6601
    }
6602
6603
    /**
6604
     * Returns the index of the last occurrence of $needle in the string,
6605
     * and false if not found. Accepts an optional offset from which to begin
6606
     * the search. Offsets may be negative to count from the last character
6607
     * in the string.
6608
     *
6609
     * @param string $str      <p>The input string.</p>
6610
     * @param string $needle   <p>Substring to look for.</p>
6611
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6612
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6613
     *
6614
     * @psalm-pure
6615
     *
6616
     * @return false|int
6617
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6618
     *
6619
     * @see        UTF8::strripos()
6620
     * @deprecated <p>please use "UTF8::strripos()"</p>
6621
     */
6622
    public static function str_iindex_last(
6623
        string $str,
6624
        string $needle,
6625
        int $offset = 0,
6626
        string $encoding = 'UTF-8'
6627
    ) {
6628 10
        return self::strripos(
6629 10
            $str,
6630 10
            $needle,
6631 10
            $offset,
6632 10
            $encoding
6633
        );
6634
    }
6635
6636
    /**
6637
     * Returns the index of the first occurrence of $needle in the string,
6638
     * and false if not found. Accepts an optional offset from which to begin
6639
     * the search.
6640
     *
6641
     * @param string $str      <p>The input string.</p>
6642
     * @param string $needle   <p>Substring to look for.</p>
6643
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6644
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6645
     *
6646
     * @psalm-pure
6647
     *
6648
     * @return false|int
6649
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6650
     *
6651
     * @see        UTF8::strpos()
6652
     * @deprecated <p>please use "UTF8::strpos()"</p>
6653
     */
6654
    public static function str_index_first(
6655
        string $str,
6656
        string $needle,
6657
        int $offset = 0,
6658
        string $encoding = 'UTF-8'
6659
    ) {
6660 11
        return self::strpos(
6661 11
            $str,
6662 11
            $needle,
6663 11
            $offset,
6664 11
            $encoding
6665
        );
6666
    }
6667
6668
    /**
6669
     * Returns the index of the last occurrence of $needle in the string,
6670
     * and false if not found. Accepts an optional offset from which to begin
6671
     * the search. Offsets may be negative to count from the last character
6672
     * in the string.
6673
     *
6674
     * @param string $str      <p>The input string.</p>
6675
     * @param string $needle   <p>Substring to look for.</p>
6676
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6677
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6678
     *
6679
     * @psalm-pure
6680
     *
6681
     * @return false|int
6682
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6683
     *
6684
     * @see        UTF8::strrpos()
6685
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6686
     */
6687
    public static function str_index_last(
6688
        string $str,
6689
        string $needle,
6690
        int $offset = 0,
6691
        string $encoding = 'UTF-8'
6692
    ) {
6693 10
        return self::strrpos(
6694 10
            $str,
6695 10
            $needle,
6696 10
            $offset,
6697 10
            $encoding
6698
        );
6699
    }
6700
6701
    /**
6702
     * Inserts $substring into the string at the $index provided.
6703
     *
6704
     * @param string $str       <p>The input string.</p>
6705
     * @param string $substring <p>String to be inserted.</p>
6706
     * @param int    $index     <p>The index at which to insert the substring.</p>
6707
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6708
     *
6709
     * @psalm-pure
6710
     *
6711
     * @return string
6712
     */
6713
    public static function str_insert(
6714
        string $str,
6715
        string $substring,
6716
        int $index,
6717
        string $encoding = 'UTF-8'
6718
    ): string {
6719 8
        if ($encoding === 'UTF-8') {
6720 4
            $len = (int) \mb_strlen($str);
6721 4
            if ($index > $len) {
6722
                return $str;
6723
            }
6724
6725
            /** @noinspection UnnecessaryCastingInspection */
6726 4
            return (string) \mb_substr($str, 0, $index) .
6727 4
                   $substring .
6728 4
                   (string) \mb_substr($str, $index, $len);
6729
        }
6730
6731 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6732
6733 4
        $len = (int) self::strlen($str, $encoding);
6734 4
        if ($index > $len) {
6735 1
            return $str;
6736
        }
6737
6738 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6739 3
               $substring .
6740 3
               ((string) self::substr($str, $index, $len, $encoding));
6741
    }
6742
6743
    /**
6744
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6745
     *
6746
     * EXAMPLE: <code>
6747
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6748
     * </code>
6749
     *
6750
     * @see http://php.net/manual/en/function.str-ireplace.php
6751
     *
6752
     * @param string|string[] $search      <p>
6753
     *                                     Every replacement with search array is
6754
     *                                     performed on the result of previous replacement.
6755
     *                                     </p>
6756
     * @param string|string[] $replacement <p>The replacement.</p>
6757
     * @param string|string[] $subject     <p>
6758
     *                                     If subject is an array, then the search and
6759
     *                                     replace is performed with every entry of
6760
     *                                     subject, and the return value is an array as
6761
     *                                     well.
6762
     *                                     </p>
6763
     * @param int             $count       [optional] <p>
6764
     *                                     The number of matched and replaced needles will
6765
     *                                     be returned in count which is passed by
6766
     *                                     reference.
6767
     *                                     </p>
6768
     *
6769
     * @psalm-pure
6770
     *
6771
     * @return string|string[]
6772
     *                         <p>A string or an array of replacements.</p>
6773
     *
6774
     * @template TStrIReplaceSubject
6775
     * @phpstan-param TStrIReplaceSubject $subject
6776
     * @phpstan-return TStrIReplaceSubject
6777
     */
6778
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6779
    {
6780 29
        $search = (array) $search;
6781
6782
        /** @noinspection AlterInForeachInspection */
6783 29
        foreach ($search as &$s) {
6784 29
            $s = (string) $s;
6785 29
            if ($s === '') {
6786 6
                $s = '/^(?<=.)$/';
6787
            } else {
6788 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6789
            }
6790
        }
6791
6792
        // fallback
6793
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6794 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6795 1
            $replacement = '';
6796
        }
6797
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6798 29
        if ($subject === null) {
6799 1
            $subject = '';
6800
        }
6801
6802
        /**
6803
         * @psalm-suppress PossiblyNullArgument
6804
         * @phpstan-var TStrIReplaceSubject $subject
6805
         */
6806 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6807
6808 29
        return $subject;
6809
    }
6810
6811
    /**
6812
     * Replaces $search from the beginning of string with $replacement.
6813
     *
6814
     * @param string $str         <p>The input string.</p>
6815
     * @param string $search      <p>The string to search for.</p>
6816
     * @param string $replacement <p>The replacement.</p>
6817
     *
6818
     * @psalm-pure
6819
     *
6820
     * @return string
6821
     *                <p>The string after the replacement.</p>
6822
     */
6823
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6824
    {
6825 17
        if ($str === '') {
6826 4
            if ($replacement === '') {
6827 2
                return '';
6828
            }
6829
6830 2
            if ($search === '') {
6831 2
                return $replacement;
6832
            }
6833
        }
6834
6835 13
        if ($search === '') {
6836 2
            return $str . $replacement;
6837
        }
6838
6839 11
        $searchLength = \strlen($search);
6840 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6841 10
            return $replacement . \substr($str, $searchLength);
6842
        }
6843
6844 1
        return $str;
6845
    }
6846
6847
    /**
6848
     * Replaces $search from the ending of string with $replacement.
6849
     *
6850
     * @param string $str         <p>The input string.</p>
6851
     * @param string $search      <p>The string to search for.</p>
6852
     * @param string $replacement <p>The replacement.</p>
6853
     *
6854
     * @psalm-pure
6855
     *
6856
     * @return string
6857
     *                <p>The string after the replacement.</p>
6858
     */
6859
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6860
    {
6861 17
        if ($str === '') {
6862 4
            if ($replacement === '') {
6863 2
                return '';
6864
            }
6865
6866 2
            if ($search === '') {
6867 2
                return $replacement;
6868
            }
6869
        }
6870
6871 13
        if ($search === '') {
6872 2
            return $str . $replacement;
6873
        }
6874
6875 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6876 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6877
        }
6878
6879 11
        return $str;
6880
    }
6881
6882
    /**
6883
     * Check if the string starts with the given substring, case-insensitive.
6884
     *
6885
     * EXAMPLE: <code>
6886
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6887
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6888
     * </code>
6889
     *
6890
     * @param string $haystack <p>The string to search in.</p>
6891
     * @param string $needle   <p>The substring to search for.</p>
6892
     *
6893
     * @psalm-pure
6894
     *
6895
     * @return bool
6896
     */
6897
    public static function str_istarts_with(string $haystack, string $needle): bool
6898
    {
6899 13
        if ($needle === '') {
6900 2
            return true;
6901
        }
6902
6903 13
        if ($haystack === '') {
6904
            return false;
6905
        }
6906
6907 13
        return self::stripos($haystack, $needle) === 0;
6908
    }
6909
6910
    /**
6911
     * Returns true if the string begins with any of $substrings, false otherwise.
6912
     *
6913
     * - case-insensitive
6914
     *
6915
     * @param string $str        <p>The input string.</p>
6916
     * @param array  $substrings <p>Substrings to look for.</p>
6917
     *
6918
     * @psalm-pure
6919
     *
6920
     * @return bool
6921
     *              <p>Whether or not $str starts with $substring.</p>
6922
     */
6923
    public static function str_istarts_with_any(string $str, array $substrings): bool
6924
    {
6925 5
        if ($str === '') {
6926
            return false;
6927
        }
6928
6929 5
        if ($substrings === []) {
6930
            return false;
6931
        }
6932
6933 5
        foreach ($substrings as &$substring) {
6934 5
            if (self::str_istarts_with($str, $substring)) {
6935 5
                return true;
6936
            }
6937
        }
6938
6939 1
        return false;
6940
    }
6941
6942
    /**
6943
     * Gets the substring after the first occurrence of a separator.
6944
     *
6945
     * @param string $str       <p>The input string.</p>
6946
     * @param string $separator <p>The string separator.</p>
6947
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6948
     *
6949
     * @psalm-pure
6950
     *
6951
     * @return string
6952
     */
6953
    public static function str_isubstr_after_first_separator(
6954
        string $str,
6955
        string $separator,
6956
        string $encoding = 'UTF-8'
6957
    ): string {
6958 1
        if ($separator === '' || $str === '') {
6959 1
            return '';
6960
        }
6961
6962 1
        $offset = self::stripos($str, $separator);
6963 1
        if ($offset === false) {
6964 1
            return '';
6965
        }
6966
6967 1
        if ($encoding === 'UTF-8') {
6968 1
            return (string) \mb_substr(
6969 1
                $str,
6970 1
                $offset + (int) \mb_strlen($separator)
6971
            );
6972
        }
6973
6974
        return (string) self::substr(
6975
            $str,
6976
            $offset + (int) self::strlen($separator, $encoding),
6977
            null,
6978
            $encoding
6979
        );
6980
    }
6981
6982
    /**
6983
     * Gets the substring after the last occurrence of a separator.
6984
     *
6985
     * @param string $str       <p>The input string.</p>
6986
     * @param string $separator <p>The string separator.</p>
6987
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6988
     *
6989
     * @psalm-pure
6990
     *
6991
     * @return string
6992
     */
6993
    public static function str_isubstr_after_last_separator(
6994
        string $str,
6995
        string $separator,
6996
        string $encoding = 'UTF-8'
6997
    ): string {
6998 1
        if ($separator === '' || $str === '') {
6999 1
            return '';
7000
        }
7001
7002 1
        $offset = self::strripos($str, $separator);
7003 1
        if ($offset === false) {
7004 1
            return '';
7005
        }
7006
7007 1
        if ($encoding === 'UTF-8') {
7008 1
            return (string) \mb_substr(
7009 1
                $str,
7010 1
                $offset + (int) self::strlen($separator)
7011
            );
7012
        }
7013
7014
        return (string) self::substr(
7015
            $str,
7016
            $offset + (int) self::strlen($separator, $encoding),
7017
            null,
7018
            $encoding
7019
        );
7020
    }
7021
7022
    /**
7023
     * Gets the substring before the first occurrence of a separator.
7024
     *
7025
     * @param string $str       <p>The input string.</p>
7026
     * @param string $separator <p>The string separator.</p>
7027
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7028
     *
7029
     * @psalm-pure
7030
     *
7031
     * @return string
7032
     */
7033
    public static function str_isubstr_before_first_separator(
7034
        string $str,
7035
        string $separator,
7036
        string $encoding = 'UTF-8'
7037
    ): string {
7038 1
        if ($separator === '' || $str === '') {
7039 1
            return '';
7040
        }
7041
7042 1
        $offset = self::stripos($str, $separator);
7043 1
        if ($offset === false) {
7044 1
            return '';
7045
        }
7046
7047 1
        if ($encoding === 'UTF-8') {
7048 1
            return (string) \mb_substr($str, 0, $offset);
7049
        }
7050
7051
        return (string) self::substr($str, 0, $offset, $encoding);
7052
    }
7053
7054
    /**
7055
     * Gets the substring before the last occurrence of a separator.
7056
     *
7057
     * @param string $str       <p>The input string.</p>
7058
     * @param string $separator <p>The string separator.</p>
7059
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7060
     *
7061
     * @psalm-pure
7062
     *
7063
     * @return string
7064
     */
7065
    public static function str_isubstr_before_last_separator(
7066
        string $str,
7067
        string $separator,
7068
        string $encoding = 'UTF-8'
7069
    ): string {
7070 1
        if ($separator === '' || $str === '') {
7071 1
            return '';
7072
        }
7073
7074 1
        if ($encoding === 'UTF-8') {
7075 1
            $offset = \mb_strripos($str, $separator);
7076 1
            if ($offset === false) {
7077 1
                return '';
7078
            }
7079
7080 1
            return (string) \mb_substr($str, 0, $offset);
7081
        }
7082
7083
        $offset = self::strripos($str, $separator, 0, $encoding);
7084
        if ($offset === false) {
7085
            return '';
7086
        }
7087
7088
        return (string) self::substr($str, 0, $offset, $encoding);
7089
    }
7090
7091
    /**
7092
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7093
     *
7094
     * @param string $str           <p>The input string.</p>
7095
     * @param string $needle        <p>The string to look for.</p>
7096
     * @param bool   $before_needle [optional] <p>Default: false</p>
7097
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7098
     *
7099
     * @psalm-pure
7100
     *
7101
     * @return string
7102
     */
7103
    public static function str_isubstr_first(
7104
        string $str,
7105
        string $needle,
7106
        bool $before_needle = false,
7107
        string $encoding = 'UTF-8'
7108
    ): string {
7109
        if (
7110 2
            $needle === ''
7111
            ||
7112 2
            $str === ''
7113
        ) {
7114 2
            return '';
7115
        }
7116
7117 2
        $part = self::stristr(
7118 2
            $str,
7119 2
            $needle,
7120 2
            $before_needle,
7121 2
            $encoding
7122
        );
7123 2
        if ($part === false) {
7124 2
            return '';
7125
        }
7126
7127 2
        return $part;
7128
    }
7129
7130
    /**
7131
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7132
     *
7133
     * @param string $str           <p>The input string.</p>
7134
     * @param string $needle        <p>The string to look for.</p>
7135
     * @param bool   $before_needle [optional] <p>Default: false</p>
7136
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7137
     *
7138
     * @psalm-pure
7139
     *
7140
     * @return string
7141
     */
7142
    public static function str_isubstr_last(
7143
        string $str,
7144
        string $needle,
7145
        bool $before_needle = false,
7146
        string $encoding = 'UTF-8'
7147
    ): string {
7148
        if (
7149 1
            $needle === ''
7150
            ||
7151 1
            $str === ''
7152
        ) {
7153 1
            return '';
7154
        }
7155
7156 1
        $part = self::strrichr(
7157 1
            $str,
7158 1
            $needle,
7159 1
            $before_needle,
7160 1
            $encoding
7161
        );
7162 1
        if ($part === false) {
7163 1
            return '';
7164
        }
7165
7166 1
        return $part;
7167
    }
7168
7169
    /**
7170
     * Returns the last $n characters of the string.
7171
     *
7172
     * @param string $str      <p>The input string.</p>
7173
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7174
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7175
     *
7176
     * @psalm-pure
7177
     *
7178
     * @return string
7179
     */
7180
    public static function str_last_char(
7181
        string $str,
7182
        int $n = 1,
7183
        string $encoding = 'UTF-8'
7184
    ): string {
7185 12
        if ($str === '' || $n <= 0) {
7186 4
            return '';
7187
        }
7188
7189 8
        if ($encoding === 'UTF-8') {
7190 4
            return (string) \mb_substr($str, -$n);
7191
        }
7192
7193 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7194
7195 4
        return (string) self::substr($str, -$n, null, $encoding);
7196
    }
7197
7198
    /**
7199
     * Limit the number of characters in a string.
7200
     *
7201
     * @param string $str        <p>The input string.</p>
7202
     * @param int    $length     [optional] <p>Default: 100</p>
7203
     * @param string $str_add_on [optional] <p>Default: …</p>
7204
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7205
     *
7206
     * @psalm-pure
7207
     *
7208
     * @return string
7209
     */
7210
    public static function str_limit(
7211
        string $str,
7212
        int $length = 100,
7213
        string $str_add_on = '…',
7214
        string $encoding = 'UTF-8'
7215
    ): string {
7216 2
        if ($str === '' || $length <= 0) {
7217 2
            return '';
7218
        }
7219
7220 2
        if ($encoding === 'UTF-8') {
7221 2
            if ((int) \mb_strlen($str) <= $length) {
7222 2
                return $str;
7223
            }
7224
7225
            /** @noinspection UnnecessaryCastingInspection */
7226 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7227
        }
7228
7229
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7230
7231
        if ((int) self::strlen($str, $encoding) <= $length) {
7232
            return $str;
7233
        }
7234
7235
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7236
    }
7237
7238
    /**
7239
     * Limit the number of characters in a string, but also after the next word.
7240
     *
7241
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7242
     *
7243
     * @param string $str        <p>The input string.</p>
7244
     * @param int    $length     [optional] <p>Default: 100</p>
7245
     * @param string $str_add_on [optional] <p>Default: …</p>
7246
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7247
     *
7248
     * @psalm-pure
7249
     *
7250
     * @return string
7251
     */
7252
    public static function str_limit_after_word(
7253
        string $str,
7254
        int $length = 100,
7255
        string $str_add_on = '…',
7256
        string $encoding = 'UTF-8'
7257
    ): string {
7258 6
        if ($str === '' || $length <= 0) {
7259 2
            return '';
7260
        }
7261
7262 6
        if ($encoding === 'UTF-8') {
7263
            /** @noinspection UnnecessaryCastingInspection */
7264 2
            if ((int) \mb_strlen($str) <= $length) {
7265 2
                return $str;
7266
            }
7267
7268 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7269 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7270
            }
7271
7272 2
            $str = \mb_substr($str, 0, $length);
7273
7274 2
            $array = \explode(' ', $str, -1);
7275 2
            $new_str = \implode(' ', $array);
7276
7277 2
            if ($new_str === '') {
7278 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7279
            }
7280
        } else {
7281 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7282
                return $str;
7283
            }
7284
7285 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7286 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7287
            }
7288
7289
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7290 1
            $str = self::substr($str, 0, $length, $encoding);
7291
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7292 1
            if ($str === false) {
7293
                return '' . $str_add_on;
7294
            }
7295
7296 1
            $array = \explode(' ', $str, -1);
7297 1
            $new_str = \implode(' ', $array);
7298
7299 1
            if ($new_str === '') {
7300
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7301
            }
7302
        }
7303
7304 3
        return $new_str . $str_add_on;
7305
    }
7306
7307
    /**
7308
     * Returns the longest common prefix between the $str1 and $str2.
7309
     *
7310
     * @param string $str1     <p>The input sting.</p>
7311
     * @param string $str2     <p>Second string for comparison.</p>
7312
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7313
     *
7314
     * @psalm-pure
7315
     *
7316
     * @return string
7317
     */
7318
    public static function str_longest_common_prefix(
7319
        string $str1,
7320
        string $str2,
7321
        string $encoding = 'UTF-8'
7322
    ): string {
7323
        // init
7324 10
        $longest_common_prefix = '';
7325
7326 10
        if ($encoding === 'UTF-8') {
7327 5
            $max_length = (int) \min(
7328 5
                \mb_strlen($str1),
7329 5
                \mb_strlen($str2)
7330
            );
7331
7332 5
            for ($i = 0; $i < $max_length; ++$i) {
7333 4
                $char = \mb_substr($str1, $i, 1);
7334
7335
                if (
7336 4
                    $char !== false
7337
                    &&
7338 4
                    $char === \mb_substr($str2, $i, 1)
7339
                ) {
7340 3
                    $longest_common_prefix .= $char;
7341
                } else {
7342 3
                    break;
7343
                }
7344
            }
7345
        } else {
7346 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7347
7348 5
            $max_length = (int) \min(
7349 5
                self::strlen($str1, $encoding),
7350 5
                self::strlen($str2, $encoding)
7351
            );
7352
7353 5
            for ($i = 0; $i < $max_length; ++$i) {
7354 4
                $char = self::substr($str1, $i, 1, $encoding);
7355
7356
                if (
7357 4
                    $char !== false
7358
                    &&
7359 4
                    $char === self::substr($str2, $i, 1, $encoding)
7360
                ) {
7361 3
                    $longest_common_prefix .= $char;
7362
                } else {
7363 3
                    break;
7364
                }
7365
            }
7366
        }
7367
7368 10
        return $longest_common_prefix;
7369
    }
7370
7371
    /**
7372
     * Returns the longest common substring between the $str1 and $str2.
7373
     * In the case of ties, it returns that which occurs first.
7374
     *
7375
     * @param string $str1
7376
     * @param string $str2     <p>Second string for comparison.</p>
7377
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7378
     *
7379
     * @psalm-pure
7380
     *
7381
     * @return string
7382
     *                <p>A string with its $str being the longest common substring.</p>
7383
     */
7384
    public static function str_longest_common_substring(
7385
        string $str1,
7386
        string $str2,
7387
        string $encoding = 'UTF-8'
7388
    ): string {
7389 11
        if ($str1 === '' || $str2 === '') {
7390 2
            return '';
7391
        }
7392
7393
        // Uses dynamic programming to solve
7394
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7395
7396 9
        if ($encoding === 'UTF-8') {
7397 4
            $str_length = (int) \mb_strlen($str1);
7398 4
            $other_length = (int) \mb_strlen($str2);
7399
        } else {
7400 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7401
7402 5
            $str_length = (int) self::strlen($str1, $encoding);
7403 5
            $other_length = (int) self::strlen($str2, $encoding);
7404
        }
7405
7406
        // Return if either string is empty
7407 9
        if ($str_length === 0 || $other_length === 0) {
7408
            return '';
7409
        }
7410
7411 9
        $len = 0;
7412 9
        $end = 0;
7413 9
        $table = \array_fill(
7414 9
            0,
7415 9
            $str_length + 1,
7416 9
            \array_fill(0, $other_length + 1, 0)
7417
        );
7418
7419 9
        if ($encoding === 'UTF-8') {
7420 9
            for ($i = 1; $i <= $str_length; ++$i) {
7421 9
                for ($j = 1; $j <= $other_length; ++$j) {
7422 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7423 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7424
7425 9
                    if ($str_char === $other_char) {
7426 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7427 8
                        if ($table[$i][$j] > $len) {
7428 8
                            $len = $table[$i][$j];
7429 8
                            $end = $i;
7430
                        }
7431
                    } else {
7432 9
                        $table[$i][$j] = 0;
7433
                    }
7434
                }
7435
            }
7436
        } else {
7437
            for ($i = 1; $i <= $str_length; ++$i) {
7438
                for ($j = 1; $j <= $other_length; ++$j) {
7439
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7440
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7441
7442
                    if ($str_char === $other_char) {
7443
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7444
                        if ($table[$i][$j] > $len) {
7445
                            $len = $table[$i][$j];
7446
                            $end = $i;
7447
                        }
7448
                    } else {
7449
                        $table[$i][$j] = 0;
7450
                    }
7451
                }
7452
            }
7453
        }
7454
7455 9
        if ($encoding === 'UTF-8') {
7456 9
            return (string) \mb_substr($str1, $end - $len, $len);
7457
        }
7458
7459
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7460
    }
7461
7462
    /**
7463
     * Returns the longest common suffix between the $str1 and $str2.
7464
     *
7465
     * @param string $str1
7466
     * @param string $str2     <p>Second string for comparison.</p>
7467
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7468
     *
7469
     * @psalm-pure
7470
     *
7471
     * @return string
7472
     */
7473
    public static function str_longest_common_suffix(
7474
        string $str1,
7475
        string $str2,
7476
        string $encoding = 'UTF-8'
7477
    ): string {
7478 10
        if ($str1 === '' || $str2 === '') {
7479 2
            return '';
7480
        }
7481
7482 8
        if ($encoding === 'UTF-8') {
7483 4
            $max_length = (int) \min(
7484 4
                \mb_strlen($str1, $encoding),
7485 4
                \mb_strlen($str2, $encoding)
7486
            );
7487
7488 4
            $longest_common_suffix = '';
7489 4
            for ($i = 1; $i <= $max_length; ++$i) {
7490 4
                $char = \mb_substr($str1, -$i, 1);
7491
7492
                if (
7493 4
                    $char !== false
7494
                    &&
7495 4
                    $char === \mb_substr($str2, -$i, 1)
7496
                ) {
7497 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7498
                } else {
7499 3
                    break;
7500
                }
7501
            }
7502
        } else {
7503 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7504
7505 4
            $max_length = (int) \min(
7506 4
                self::strlen($str1, $encoding),
7507 4
                self::strlen($str2, $encoding)
7508
            );
7509
7510 4
            $longest_common_suffix = '';
7511 4
            for ($i = 1; $i <= $max_length; ++$i) {
7512 4
                $char = self::substr($str1, -$i, 1, $encoding);
7513
7514
                if (
7515 4
                    $char !== false
7516
                    &&
7517 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7518
                ) {
7519 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7520
                } else {
7521 3
                    break;
7522
                }
7523
            }
7524
        }
7525
7526 8
        return $longest_common_suffix;
7527
    }
7528
7529
    /**
7530
     * Returns true if $str matches the supplied pattern, false otherwise.
7531
     *
7532
     * @param string $str     <p>The input string.</p>
7533
     * @param string $pattern <p>Regex pattern to match against.</p>
7534
     *
7535
     * @psalm-pure
7536
     *
7537
     * @return bool
7538
     *              <p>Whether or not $str matches the pattern.</p>
7539
     */
7540
    public static function str_matches_pattern(string $str, string $pattern): bool
7541
    {
7542 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7543
    }
7544
7545
    /**
7546
     * Returns whether or not a character exists at an index. Offsets may be
7547
     * negative to count from the last character in the string. Implements
7548
     * part of the ArrayAccess interface.
7549
     *
7550
     * @param string $str      <p>The input string.</p>
7551
     * @param int    $offset   <p>The index to check.</p>
7552
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7553
     *
7554
     * @psalm-pure
7555
     *
7556
     * @return bool
7557
     *              <p>Whether or not the index exists.</p>
7558
     */
7559
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7560
    {
7561
        // init
7562 6
        $length = (int) self::strlen($str, $encoding);
7563
7564 6
        if ($offset >= 0) {
7565 3
            return $length > $offset;
7566
        }
7567
7568 3
        return $length >= \abs($offset);
7569
    }
7570
7571
    /**
7572
     * Returns the character at the given index. Offsets may be negative to
7573
     * count from the last character in the string. Implements part of the
7574
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7575
     * does not exist.
7576
     *
7577
     * @param string $str      <p>The input string.</p>
7578
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7579
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7580
     *
7581
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7582
     *
7583
     * @return string
7584
     *                <p>The character at the specified index.</p>
7585
     *
7586
     * @psalm-pure
7587
     */
7588
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7589
    {
7590
        // init
7591 2
        $length = (int) self::strlen($str);
7592
7593
        if (
7594 2
            ($index >= 0 && $length <= $index)
7595
            ||
7596 2
            $length < \abs($index)
7597
        ) {
7598 1
            throw new \OutOfBoundsException('No character exists at the index');
7599
        }
7600
7601 1
        return self::char_at($str, $index, $encoding);
7602
    }
7603
7604
    /**
7605
     * Pad a UTF-8 string to a given length with another string.
7606
     *
7607
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7608
     *
7609
     * @param string     $str        <p>The input string.</p>
7610
     * @param int        $pad_length <p>The length of return string.</p>
7611
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7612
     * @param int|string $pad_type   [optional] <p>
7613
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7614
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7615
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7616
     *                               </p>
7617
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7618
     *
7619
     * @psalm-pure
7620
     *
7621
     * @return string
7622
     *                <p>Returns the padded string.</p>
7623
     */
7624
    public static function str_pad(
7625
        string $str,
7626
        int $pad_length,
7627
        string $pad_string = ' ',
7628
        $pad_type = \STR_PAD_RIGHT,
7629
        string $encoding = 'UTF-8'
7630
    ): string {
7631 41
        if ($pad_length === 0 || $pad_string === '') {
7632 1
            return $str;
7633
        }
7634
7635 41
        if ($pad_type !== (int) $pad_type) {
7636 13
            if ($pad_type === 'left') {
7637 3
                $pad_type = \STR_PAD_LEFT;
7638 10
            } elseif ($pad_type === 'right') {
7639 6
                $pad_type = \STR_PAD_RIGHT;
7640 4
            } elseif ($pad_type === 'both') {
7641 3
                $pad_type = \STR_PAD_BOTH;
7642
            } else {
7643 1
                throw new \InvalidArgumentException(
7644 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7645
                );
7646
            }
7647
        }
7648
7649 40
        if ($encoding === 'UTF-8') {
7650 25
            $str_length = (int) \mb_strlen($str);
7651
7652 25
            if ($pad_length >= $str_length) {
7653
                switch ($pad_type) {
7654 25
                    case \STR_PAD_LEFT:
7655 8
                        $ps_length = (int) \mb_strlen($pad_string);
7656
7657 8
                        $diff = ($pad_length - $str_length);
7658
7659 8
                        $pre = (string) \mb_substr(
7660 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7661 8
                            0,
7662 8
                            $diff
7663
                        );
7664 8
                        $post = '';
7665
7666 8
                        break;
7667
7668 20
                    case \STR_PAD_BOTH:
7669 14
                        $diff = ($pad_length - $str_length);
7670
7671 14
                        $ps_length_left = (int) \floor($diff / 2);
7672
7673 14
                        $ps_length_right = (int) \ceil($diff / 2);
7674
7675 14
                        $pre = (string) \mb_substr(
7676 14
                            \str_repeat($pad_string, $ps_length_left),
7677 14
                            0,
7678 14
                            $ps_length_left
7679
                        );
7680 14
                        $post = (string) \mb_substr(
7681 14
                            \str_repeat($pad_string, $ps_length_right),
7682 14
                            0,
7683 14
                            $ps_length_right
7684
                        );
7685
7686 14
                        break;
7687
7688 9
                    case \STR_PAD_RIGHT:
7689
                    default:
7690 9
                        $ps_length = (int) \mb_strlen($pad_string);
7691
7692 9
                        $diff = ($pad_length - $str_length);
7693
7694 9
                        $post = (string) \mb_substr(
7695 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7696 9
                            0,
7697 9
                            $diff
7698
                        );
7699 9
                        $pre = '';
7700
                }
7701
7702 25
                return $pre . $str . $post;
7703
            }
7704
7705 3
            return $str;
7706
        }
7707
7708 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7709
7710 15
        $str_length = (int) self::strlen($str, $encoding);
7711
7712 15
        if ($pad_length >= $str_length) {
7713
            switch ($pad_type) {
7714 14
                case \STR_PAD_LEFT:
7715 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7716
7717 5
                    $diff = ($pad_length - $str_length);
7718
7719 5
                    $pre = (string) self::substr(
7720 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7721 5
                        0,
7722 5
                        $diff,
7723 5
                        $encoding
7724
                    );
7725 5
                    $post = '';
7726
7727 5
                    break;
7728
7729 9
                case \STR_PAD_BOTH:
7730 3
                    $diff = ($pad_length - $str_length);
7731
7732 3
                    $ps_length_left = (int) \floor($diff / 2);
7733
7734 3
                    $ps_length_right = (int) \ceil($diff / 2);
7735
7736 3
                    $pre = (string) self::substr(
7737 3
                        \str_repeat($pad_string, $ps_length_left),
7738 3
                        0,
7739 3
                        $ps_length_left,
7740 3
                        $encoding
7741
                    );
7742 3
                    $post = (string) self::substr(
7743 3
                        \str_repeat($pad_string, $ps_length_right),
7744 3
                        0,
7745 3
                        $ps_length_right,
7746 3
                        $encoding
7747
                    );
7748
7749 3
                    break;
7750
7751 6
                case \STR_PAD_RIGHT:
7752
                default:
7753 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7754
7755 6
                    $diff = ($pad_length - $str_length);
7756
7757 6
                    $post = (string) self::substr(
7758 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7759 6
                        0,
7760 6
                        $diff,
7761 6
                        $encoding
7762
                    );
7763 6
                    $pre = '';
7764
            }
7765
7766 14
            return $pre . $str . $post;
7767
        }
7768
7769 1
        return $str;
7770
    }
7771
7772
    /**
7773
     * Returns a new string of a given length such that both sides of the
7774
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7775
     *
7776
     * @param string $str
7777
     * @param int    $length   <p>Desired string length after padding.</p>
7778
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7779
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7780
     *
7781
     * @psalm-pure
7782
     *
7783
     * @return string
7784
     *                <p>The string with padding applied.</p>
7785
     */
7786
    public static function str_pad_both(
7787
        string $str,
7788
        int $length,
7789
        string $pad_str = ' ',
7790
        string $encoding = 'UTF-8'
7791
    ): string {
7792 11
        return self::str_pad(
7793 11
            $str,
7794 11
            $length,
7795 11
            $pad_str,
7796 11
            \STR_PAD_BOTH,
7797 11
            $encoding
7798
        );
7799
    }
7800
7801
    /**
7802
     * Returns a new string of a given length such that the beginning of the
7803
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7804
     *
7805
     * @param string $str
7806
     * @param int    $length   <p>Desired string length after padding.</p>
7807
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7808
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7809
     *
7810
     * @psalm-pure
7811
     *
7812
     * @return string
7813
     *                <p>The string with left padding.</p>
7814
     */
7815
    public static function str_pad_left(
7816
        string $str,
7817
        int $length,
7818
        string $pad_str = ' ',
7819
        string $encoding = 'UTF-8'
7820
    ): string {
7821 7
        return self::str_pad(
7822 7
            $str,
7823 7
            $length,
7824 7
            $pad_str,
7825 7
            \STR_PAD_LEFT,
7826 7
            $encoding
7827
        );
7828
    }
7829
7830
    /**
7831
     * Returns a new string of a given length such that the end of the string
7832
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7833
     *
7834
     * @param string $str
7835
     * @param int    $length   <p>Desired string length after padding.</p>
7836
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7837
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7838
     *
7839
     * @psalm-pure
7840
     *
7841
     * @return string
7842
     *                <p>The string with right padding.</p>
7843
     */
7844
    public static function str_pad_right(
7845
        string $str,
7846
        int $length,
7847
        string $pad_str = ' ',
7848
        string $encoding = 'UTF-8'
7849
    ): string {
7850 7
        return self::str_pad(
7851 7
            $str,
7852 7
            $length,
7853 7
            $pad_str,
7854 7
            \STR_PAD_RIGHT,
7855 7
            $encoding
7856
        );
7857
    }
7858
7859
    /**
7860
     * Repeat a string.
7861
     *
7862
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7863
     *
7864
     * @param string $str        <p>
7865
     *                           The string to be repeated.
7866
     *                           </p>
7867
     * @param int    $multiplier <p>
7868
     *                           Number of time the input string should be
7869
     *                           repeated.
7870
     *                           </p>
7871
     *                           <p>
7872
     *                           multiplier has to be greater than or equal to 0.
7873
     *                           If the multiplier is set to 0, the function
7874
     *                           will return an empty string.
7875
     *                           </p>
7876
     *
7877
     * @psalm-pure
7878
     *
7879
     * @return string
7880
     *                <p>The repeated string.</p>
7881
     */
7882
    public static function str_repeat(string $str, int $multiplier): string
7883
    {
7884 9
        $str = self::filter($str);
7885
7886 9
        return \str_repeat($str, $multiplier);
7887
    }
7888
7889
    /**
7890
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7891
     *
7892
     * Replace all occurrences of the search string with the replacement string
7893
     *
7894
     * @see http://php.net/manual/en/function.str-replace.php
7895
     *
7896
     * @param string|string[] $search  <p>
7897
     *                                 The value being searched for, otherwise known as the needle.
7898
     *                                 An array may be used to designate multiple needles.
7899
     *                                 </p>
7900
     * @param string|string[] $replace <p>
7901
     *                                 The replacement value that replaces found search
7902
     *                                 values. An array may be used to designate multiple replacements.
7903
     *                                 </p>
7904
     * @param string|string[] $subject <p>
7905
     *                                 The string or array of strings being searched and replaced on,
7906
     *                                 otherwise known as the haystack.
7907
     *                                 </p>
7908
     *                                 <p>
7909
     *                                 If subject is an array, then the search and
7910
     *                                 replace is performed with every entry of
7911
     *                                 subject, and the return value is an array as
7912
     *                                 well.
7913
     *                                 </p>
7914
     * @param int|null        $count   [optional] <p>
7915
     *                                 If passed, this will hold the number of matched and replaced needles.
7916
     *                                 </p>
7917
     *
7918
     * @psalm-pure
7919
     *
7920
     * @return string|string[]
7921
     *                         <p>This function returns a string or an array with the replaced values.</p>
7922
     *
7923
     * @template TStrReplaceSubject
7924
     * @phpstan-param TStrReplaceSubject $subject
7925
     * @phpstan-return TStrReplaceSubject
7926
     *
7927
     * @deprecated please use \str_replace() instead
7928
     */
7929
    public static function str_replace(
7930
        $search,
7931
        $replace,
7932
        $subject,
7933
        int &$count = null
7934
    ) {
7935
        /**
7936
         * @psalm-suppress PossiblyNullArgument
7937
         * @phpstan-var TStrReplaceSubject $return;
7938
         */
7939 12
        $return = \str_replace(
7940 12
            $search,
7941 12
            $replace,
7942 12
            $subject,
7943 12
            $count
7944
        );
7945
7946 12
        return $return;
7947
    }
7948
7949
    /**
7950
     * Replaces $search from the beginning of string with $replacement.
7951
     *
7952
     * @param string $str         <p>The input string.</p>
7953
     * @param string $search      <p>The string to search for.</p>
7954
     * @param string $replacement <p>The replacement.</p>
7955
     *
7956
     * @psalm-pure
7957
     *
7958
     * @return string
7959
     *                <p>A string after the replacements.</p>
7960
     */
7961
    public static function str_replace_beginning(
7962
        string $str,
7963
        string $search,
7964
        string $replacement
7965
    ): string {
7966 17
        if ($str === '') {
7967 4
            if ($replacement === '') {
7968 2
                return '';
7969
            }
7970
7971 2
            if ($search === '') {
7972 2
                return $replacement;
7973
            }
7974
        }
7975
7976 13
        if ($search === '') {
7977 2
            return $str . $replacement;
7978
        }
7979
7980 11
        $searchLength = \strlen($search);
7981 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7982 9
            return $replacement . \substr($str, $searchLength);
7983
        }
7984
7985 2
        return $str;
7986
    }
7987
7988
    /**
7989
     * Replaces $search from the ending of string with $replacement.
7990
     *
7991
     * @param string $str         <p>The input string.</p>
7992
     * @param string $search      <p>The string to search for.</p>
7993
     * @param string $replacement <p>The replacement.</p>
7994
     *
7995
     * @psalm-pure
7996
     *
7997
     * @return string
7998
     *                <p>A string after the replacements.</p>
7999
     */
8000
    public static function str_replace_ending(
8001
        string $str,
8002
        string $search,
8003
        string $replacement
8004
    ): string {
8005 17
        if ($str === '') {
8006 4
            if ($replacement === '') {
8007 2
                return '';
8008
            }
8009
8010 2
            if ($search === '') {
8011 2
                return $replacement;
8012
            }
8013
        }
8014
8015 13
        if ($search === '') {
8016 2
            return $str . $replacement;
8017
        }
8018
8019 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8020 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8021
        }
8022
8023 11
        return $str;
8024
    }
8025
8026
    /**
8027
     * Replace the first "$search"-term with the "$replace"-term.
8028
     *
8029
     * @param string $search
8030
     * @param string $replace
8031
     * @param string $subject
8032
     *
8033
     * @psalm-pure
8034
     *
8035
     * @return string
8036
     *
8037
     * @psalm-suppress InvalidReturnType
8038
     */
8039
    public static function str_replace_first(
8040
        string $search,
8041
        string $replace,
8042
        string $subject
8043
    ): string {
8044 2
        $pos = self::strpos($subject, $search);
8045
8046 2
        if ($pos !== false) {
8047
            /**
8048
             * @psalm-suppress InvalidReturnStatement
8049
             */
8050 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8051 2
                $subject,
8052 2
                $replace,
8053 2
                $pos,
8054 2
                (int) self::strlen($search)
8055
            );
8056
        }
8057
8058 2
        return $subject;
8059
    }
8060
8061
    /**
8062
     * Replace the last "$search"-term with the "$replace"-term.
8063
     *
8064
     * @param string $search
8065
     * @param string $replace
8066
     * @param string $subject
8067
     *
8068
     * @psalm-pure
8069
     *
8070
     * @return string
8071
     *
8072
     * @psalm-suppress InvalidReturnType
8073
     */
8074
    public static function str_replace_last(
8075
        string $search,
8076
        string $replace,
8077
        string $subject
8078
    ): string {
8079 2
        $pos = self::strrpos($subject, $search);
8080 2
        if ($pos !== false) {
8081
            /**
8082
             * @psalm-suppress InvalidReturnStatement
8083
             */
8084 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8085 2
                $subject,
8086 2
                $replace,
8087 2
                $pos,
8088 2
                (int) self::strlen($search)
8089
            );
8090
        }
8091
8092 2
        return $subject;
8093
    }
8094
8095
    /**
8096
     * Shuffles all the characters in the string.
8097
     *
8098
     * INFO: uses random algorithm which is weak for cryptography purposes
8099
     *
8100
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8101
     *
8102
     * @param string $str      <p>The input string</p>
8103
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8104
     *
8105
     * @return string
8106
     *                <p>The shuffled string.</p>
8107
     */
8108
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8109
    {
8110 5
        if ($encoding === 'UTF-8') {
8111 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8112
            /** @noinspection NonSecureShuffleUsageInspection */
8113 5
            \shuffle($indexes);
8114
8115
            // init
8116 5
            $shuffled_str = '';
8117
8118 5
            foreach ($indexes as &$i) {
8119 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8120 5
                if ($tmp_sub_str !== false) {
8121 5
                    $shuffled_str .= $tmp_sub_str;
8122
                }
8123
            }
8124
        } else {
8125
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8126
8127
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8128
            /** @noinspection NonSecureShuffleUsageInspection */
8129
            \shuffle($indexes);
8130
8131
            // init
8132
            $shuffled_str = '';
8133
8134
            foreach ($indexes as &$i) {
8135
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8136
                if ($tmp_sub_str !== false) {
8137
                    $shuffled_str .= $tmp_sub_str;
8138
                }
8139
            }
8140
        }
8141
8142 5
        return $shuffled_str;
8143
    }
8144
8145
    /**
8146
     * Returns the substring beginning at $start, and up to, but not including
8147
     * the index specified by $end. If $end is omitted, the function extracts
8148
     * the remaining string. If $end is negative, it is computed from the end
8149
     * of the string.
8150
     *
8151
     * @param string   $str
8152
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8153
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8154
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8155
     *
8156
     * @psalm-pure
8157
     *
8158
     * @return false|string
8159
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8160
     *                      characters long, <b>FALSE</b> will be returned.
8161
     */
8162
    public static function str_slice(
8163
        string $str,
8164
        int $start,
8165
        int $end = null,
8166
        string $encoding = 'UTF-8'
8167
    ) {
8168 18
        if ($encoding === 'UTF-8') {
8169 7
            if ($end === null) {
8170 1
                $length = (int) \mb_strlen($str);
8171 6
            } elseif ($end >= 0 && $end <= $start) {
8172 2
                return '';
8173 4
            } elseif ($end < 0) {
8174 1
                $length = (int) \mb_strlen($str) + $end - $start;
8175
            } else {
8176 3
                $length = $end - $start;
8177
            }
8178
8179 5
            return \mb_substr($str, $start, $length);
8180
        }
8181
8182 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8183
8184 11
        if ($end === null) {
8185 5
            $length = (int) self::strlen($str, $encoding);
8186 6
        } elseif ($end >= 0 && $end <= $start) {
8187 2
            return '';
8188 4
        } elseif ($end < 0) {
8189 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8190
        } else {
8191 3
            $length = $end - $start;
8192
        }
8193
8194 9
        return self::substr($str, $start, $length, $encoding);
8195
    }
8196
8197
    /**
8198
     * Convert a string to e.g.: "snake_case"
8199
     *
8200
     * @param string $str
8201
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8202
     *
8203
     * @psalm-pure
8204
     *
8205
     * @return string
8206
     *                <p>A string in snake_case.</p>
8207
     */
8208
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8209
    {
8210 22
        if ($str === '') {
8211
            return '';
8212
        }
8213
8214 22
        $str = \str_replace(
8215 22
            '-',
8216 22
            '_',
8217 22
            self::normalize_whitespace($str)
8218
        );
8219
8220 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8221 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8222
        }
8223
8224 22
        $str = (string) \preg_replace_callback(
8225 22
            '/([\\p{N}|\\p{Lu}])/u',
8226
            /**
8227
             * @param string[] $matches
8228
             *
8229
             * @psalm-pure
8230
             *
8231
             * @return string
8232
             */
8233
            static function (array $matches) use ($encoding): string {
8234 9
                $match = $matches[1];
8235 9
                $match_int = (int) $match;
8236
8237 9
                if ((string) $match_int === $match) {
8238 4
                    return '_' . $match . '_';
8239
                }
8240
8241 5
                if ($encoding === 'UTF-8') {
8242 5
                    return '_' . \mb_strtolower($match);
8243
                }
8244
8245
                return '_' . self::strtolower($match, $encoding);
8246 22
            },
8247 22
            $str
8248
        );
8249
8250 22
        $str = (string) \preg_replace(
8251
            [
8252 22
                '/\\s+/u',           // convert spaces to "_"
8253
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8254
                '/_+/',                 // remove double "_"
8255
            ],
8256
            [
8257 22
                '_',
8258
                '',
8259
                '_',
8260
            ],
8261 22
            $str
8262
        );
8263
8264 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8265
    }
8266
8267
    /**
8268
     * Sort all characters according to code points.
8269
     *
8270
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8271
     *
8272
     * @param string $str    <p>A UTF-8 string.</p>
8273
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8274
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8275
     *
8276
     * @psalm-pure
8277
     *
8278
     * @return string
8279
     *                <p>A string of sorted characters.</p>
8280
     */
8281
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8282
    {
8283 2
        $array = self::codepoints($str);
8284
8285 2
        if ($unique) {
8286 2
            $array = \array_flip(\array_flip($array));
8287
        }
8288
8289 2
        if ($desc) {
8290 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8290
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8291
        } else {
8292 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8292
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8293
        }
8294
8295 2
        return self::string($array);
8296
    }
8297
8298
    /**
8299
     * Convert a string to an array of Unicode characters.
8300
     *
8301
     * EXAMPLE: <code>
8302
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8303
     * </code>
8304
     *
8305
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8306
     * @param int            $length                  [optional] <p>Max character length of each array
8307
     *                                                lement.</p>
8308
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8309
     *                                                string.</p>
8310
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8311
     *                                                "mb_substr"</p>
8312
     *
8313
     * @psalm-pure
8314
     *
8315
     * @return string[][]
8316
     *                    <p>An array containing chunks of the input.</p>
8317
     */
8318
    public static function str_split_array(
8319
        array $input,
8320
        int $length = 1,
8321
        bool $clean_utf8 = false,
8322
        bool $try_to_use_mb_functions = true
8323
    ): array {
8324 1
        foreach ($input as $k => &$v) {
8325 1
            $v = self::str_split(
8326 1
                $v,
8327 1
                $length,
8328 1
                $clean_utf8,
8329 1
                $try_to_use_mb_functions
8330
            );
8331
        }
8332
8333
        /** @var string[][] $input */
8334 1
        return $input;
8335
    }
8336
8337
    /**
8338
     * Convert a string to an array of unicode characters.
8339
     *
8340
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8341
     *
8342
     * @param int|string $input                   <p>The string or int to split into array.</p>
8343
     * @param int        $length                  [optional] <p>Max character length of each array
8344
     *                                            element.</p>
8345
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8346
     *                                            string.</p>
8347
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8348
     *                                            "mb_substr"</p>
8349
     *
8350
     * @psalm-pure
8351
     *
8352
     * @return string[]
8353
     *                  <p>An array containing chunks of chars from the input.</p>
8354
     *
8355
     * @noinspection SuspiciousBinaryOperationInspection
8356
     * @noinspection OffsetOperationsInspection
8357
     */
8358
    public static function str_split(
8359
        $input,
8360
        int $length = 1,
8361
        bool $clean_utf8 = false,
8362
        bool $try_to_use_mb_functions = true
8363
    ): array {
8364 90
        if ($length <= 0) {
8365 3
            return [];
8366
        }
8367
8368
        // this is only an old fallback
8369
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8370
        /** @var int|int[]|string|string[] $input */
8371 89
        $input = $input;
8372 89
        if (\is_array($input)) {
8373
            /**
8374
             * @psalm-suppress InvalidReturnStatement
8375
             */
8376
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8377
                $input,
8378
                $length,
8379
                $clean_utf8,
8380
                $try_to_use_mb_functions
8381
            );
8382
        }
8383
8384
        // init
8385 89
        $input = (string) $input;
8386
8387 89
        if ($input === '') {
8388 14
            return [];
8389
        }
8390
8391 86
        if ($clean_utf8) {
8392 19
            $input = self::clean($input);
8393
        }
8394
8395
        if (
8396 86
            $try_to_use_mb_functions
8397
            &&
8398 86
            self::$SUPPORT['mbstring'] === true
8399
        ) {
8400 82
            if (\function_exists('mb_str_split')) {
8401
                /**
8402
                 * @psalm-suppress ImpureFunctionCall - why?
8403
                 */
8404 82
                $return = \mb_str_split($input, $length);
8405 82
                if ($return !== false) {
8406 82
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8407
                }
8408
            }
8409
8410
            $i_max = \mb_strlen($input);
8411
            if ($i_max <= 127) {
8412
                $ret = [];
8413
                for ($i = 0; $i < $i_max; ++$i) {
8414
                    $ret[] = \mb_substr($input, $i, 1);
8415
                }
8416
            } else {
8417
                $return_array = [];
8418
                \preg_match_all('/./us', $input, $return_array);
8419
                $ret = $return_array[0] ?? [];
8420
            }
8421 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8422 17
            $return_array = [];
8423 17
            \preg_match_all('/./us', $input, $return_array);
8424 17
            $ret = $return_array[0] ?? [];
8425
        } else {
8426
8427
            // fallback
8428
8429 8
            $ret = [];
8430 8
            $len = \strlen($input);
8431
8432
            /** @noinspection ForeachInvariantsInspection */
8433 8
            for ($i = 0; $i < $len; ++$i) {
8434 8
                if (($input[$i] & "\x80") === "\x00") {
8435 8
                    $ret[] = $input[$i];
8436
                } elseif (
8437 8
                    isset($input[$i + 1])
8438
                    &&
8439 8
                    ($input[$i] & "\xE0") === "\xC0"
8440
                ) {
8441 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8442 4
                        $ret[] = $input[$i] . $input[$i + 1];
8443
8444 4
                        ++$i;
8445
                    }
8446
                } elseif (
8447 6
                    isset($input[$i + 2])
8448
                    &&
8449 6
                    ($input[$i] & "\xF0") === "\xE0"
8450
                ) {
8451
                    if (
8452 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8453
                        &&
8454 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8455
                    ) {
8456 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8457
8458 6
                        $i += 2;
8459
                    }
8460
                } elseif (
8461
                    isset($input[$i + 3])
8462
                    &&
8463
                    ($input[$i] & "\xF8") === "\xF0"
8464
                ) {
8465
                    if (
8466
                        ($input[$i + 1] & "\xC0") === "\x80"
8467
                        &&
8468
                        ($input[$i + 2] & "\xC0") === "\x80"
8469
                        &&
8470
                        ($input[$i + 3] & "\xC0") === "\x80"
8471
                    ) {
8472
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8473
8474
                        $i += 3;
8475
                    }
8476
                }
8477
            }
8478
        }
8479
8480 23
        if ($length > 1) {
8481 2
            $ret = \array_chunk($ret, $length);
8482
8483 2
            return \array_map(
8484
                static function (array $item): string {
8485 2
                    return \implode('', $item);
8486 2
                },
8487 2
                $ret
8488
            );
8489
        }
8490
8491 23
        if (isset($ret[0]) && $ret[0] === '') {
8492
            return [];
8493
        }
8494
8495 23
        return $ret;
8496
    }
8497
8498
    /**
8499
     * Splits the string with the provided regular expression, returning an
8500
     * array of strings. An optional integer $limit will truncate the
8501
     * results.
8502
     *
8503
     * @param string $str
8504
     * @param string $pattern <p>The regex with which to split the string.</p>
8505
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8506
     *
8507
     * @psalm-pure
8508
     *
8509
     * @return string[]
8510
     *                  <p>An array of strings.</p>
8511
     */
8512
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8513
    {
8514 16
        if ($limit === 0) {
8515 2
            return [];
8516
        }
8517
8518 14
        if ($pattern === '') {
8519 1
            return [$str];
8520
        }
8521
8522 13
        if (self::$SUPPORT['mbstring'] === true) {
8523 13
            if ($limit >= 0) {
8524
                /** @noinspection PhpComposerExtensionStubsInspection */
8525 8
                $result_tmp = \mb_split($pattern, $str);
8526
8527 8
                $result = [];
8528 8
                foreach ($result_tmp as $item_tmp) {
8529 8
                    if ($limit === 0) {
8530 4
                        break;
8531
                    }
8532 8
                    --$limit;
8533
8534 8
                    $result[] = $item_tmp;
8535
                }
8536
8537 8
                return $result;
8538
            }
8539
8540
            /** @noinspection PhpComposerExtensionStubsInspection */
8541 5
            return \mb_split($pattern, $str);
8542
        }
8543
8544
        if ($limit > 0) {
8545
            ++$limit;
8546
        } else {
8547
            $limit = -1;
8548
        }
8549
8550
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8551
8552
        if ($array === false) {
8553
            return [];
8554
        }
8555
8556
        if ($limit > 0 && \count($array) === $limit) {
8557
            \array_pop($array);
8558
        }
8559
8560
        return $array;
8561
    }
8562
8563
    /**
8564
     * Check if the string starts with the given substring.
8565
     *
8566
     * EXAMPLE: <code>
8567
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8568
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8569
     * </code>
8570
     *
8571
     * @param string $haystack <p>The string to search in.</p>
8572
     * @param string $needle   <p>The substring to search for.</p>
8573
     *
8574
     * @psalm-pure
8575
     *
8576
     * @return bool
8577
     */
8578
    public static function str_starts_with(string $haystack, string $needle): bool
8579
    {
8580 19
        if ($needle === '') {
8581 2
            return true;
8582
        }
8583
8584 19
        if ($haystack === '') {
8585
            return false;
8586
        }
8587
8588 19
        if (\PHP_VERSION_ID >= 80000) {
8589
            /** @phpstan-ignore-next-line - only for PHP8 */
8590
            return \str_starts_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_starts_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

8590
            return /** @scrutinizer ignore-call */ \str_starts_with($haystack, $needle);
Loading history...
8591
        }
8592
8593 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8594
    }
8595
8596
    /**
8597
     * Returns true if the string begins with any of $substrings, false otherwise.
8598
     *
8599
     * - case-sensitive
8600
     *
8601
     * @param string $str        <p>The input string.</p>
8602
     * @param array  $substrings <p>Substrings to look for.</p>
8603
     *
8604
     * @psalm-pure
8605
     *
8606
     * @return bool
8607
     *              <p>Whether or not $str starts with $substring.</p>
8608
     */
8609
    public static function str_starts_with_any(string $str, array $substrings): bool
8610
    {
8611 8
        if ($str === '') {
8612
            return false;
8613
        }
8614
8615 8
        if ($substrings === []) {
8616
            return false;
8617
        }
8618
8619 8
        foreach ($substrings as &$substring) {
8620 8
            if (self::str_starts_with($str, $substring)) {
8621 8
                return true;
8622
            }
8623
        }
8624
8625 6
        return false;
8626
    }
8627
8628
    /**
8629
     * Gets the substring after the first occurrence of a separator.
8630
     *
8631
     * @param string $str       <p>The input string.</p>
8632
     * @param string $separator <p>The string separator.</p>
8633
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8634
     *
8635
     * @psalm-pure
8636
     *
8637
     * @return string
8638
     */
8639
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8640
    {
8641 1
        if ($separator === '' || $str === '') {
8642 1
            return '';
8643
        }
8644
8645 1
        if ($encoding === 'UTF-8') {
8646 1
            $offset = \mb_strpos($str, $separator);
8647 1
            if ($offset === false) {
8648 1
                return '';
8649
            }
8650
8651 1
            return (string) \mb_substr(
8652 1
                $str,
8653 1
                $offset + (int) \mb_strlen($separator)
8654
            );
8655
        }
8656
8657
        $offset = self::strpos($str, $separator, 0, $encoding);
8658
        if ($offset === false) {
8659
            return '';
8660
        }
8661
8662
        return (string) \mb_substr(
8663
            $str,
8664
            $offset + (int) self::strlen($separator, $encoding),
8665
            null,
8666
            $encoding
8667
        );
8668
    }
8669
8670
    /**
8671
     * Gets the substring after the last occurrence of a separator.
8672
     *
8673
     * @param string $str       <p>The input string.</p>
8674
     * @param string $separator <p>The string separator.</p>
8675
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8676
     *
8677
     * @psalm-pure
8678
     *
8679
     * @return string
8680
     */
8681
    public static function str_substr_after_last_separator(
8682
        string $str,
8683
        string $separator,
8684
        string $encoding = 'UTF-8'
8685
    ): string {
8686 1
        if ($separator === '' || $str === '') {
8687 1
            return '';
8688
        }
8689
8690 1
        if ($encoding === 'UTF-8') {
8691 1
            $offset = \mb_strrpos($str, $separator);
8692 1
            if ($offset === false) {
8693 1
                return '';
8694
            }
8695
8696 1
            return (string) \mb_substr(
8697 1
                $str,
8698 1
                $offset + (int) \mb_strlen($separator)
8699
            );
8700
        }
8701
8702
        $offset = self::strrpos($str, $separator, 0, $encoding);
8703
        if ($offset === false) {
8704
            return '';
8705
        }
8706
8707
        return (string) self::substr(
8708
            $str,
8709
            $offset + (int) self::strlen($separator, $encoding),
8710
            null,
8711
            $encoding
8712
        );
8713
    }
8714
8715
    /**
8716
     * Gets the substring before the first occurrence of a separator.
8717
     *
8718
     * @param string $str       <p>The input string.</p>
8719
     * @param string $separator <p>The string separator.</p>
8720
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8721
     *
8722
     * @psalm-pure
8723
     *
8724
     * @return string
8725
     */
8726
    public static function str_substr_before_first_separator(
8727
        string $str,
8728
        string $separator,
8729
        string $encoding = 'UTF-8'
8730
    ): string {
8731 1
        if ($separator === '' || $str === '') {
8732 1
            return '';
8733
        }
8734
8735 1
        if ($encoding === 'UTF-8') {
8736 1
            $offset = \mb_strpos($str, $separator);
8737 1
            if ($offset === false) {
8738 1
                return '';
8739
            }
8740
8741 1
            return (string) \mb_substr(
8742 1
                $str,
8743 1
                0,
8744 1
                $offset
8745
            );
8746
        }
8747
8748
        $offset = self::strpos($str, $separator, 0, $encoding);
8749
        if ($offset === false) {
8750
            return '';
8751
        }
8752
8753
        return (string) self::substr(
8754
            $str,
8755
            0,
8756
            $offset,
8757
            $encoding
8758
        );
8759
    }
8760
8761
    /**
8762
     * Gets the substring before the last occurrence of a separator.
8763
     *
8764
     * @param string $str       <p>The input string.</p>
8765
     * @param string $separator <p>The string separator.</p>
8766
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8767
     *
8768
     * @psalm-pure
8769
     *
8770
     * @return string
8771
     */
8772
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8773
    {
8774 1
        if ($separator === '' || $str === '') {
8775 1
            return '';
8776
        }
8777
8778 1
        if ($encoding === 'UTF-8') {
8779 1
            $offset = \mb_strrpos($str, $separator);
8780 1
            if ($offset === false) {
8781 1
                return '';
8782
            }
8783
8784 1
            return (string) \mb_substr(
8785 1
                $str,
8786 1
                0,
8787 1
                $offset
8788
            );
8789
        }
8790
8791
        $offset = self::strrpos($str, $separator, 0, $encoding);
8792
        if ($offset === false) {
8793
            return '';
8794
        }
8795
8796
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8797
8798
        return (string) self::substr(
8799
            $str,
8800
            0,
8801
            $offset,
8802
            $encoding
8803
        );
8804
    }
8805
8806
    /**
8807
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8808
     *
8809
     * @param string $str           <p>The input string.</p>
8810
     * @param string $needle        <p>The string to look for.</p>
8811
     * @param bool   $before_needle [optional] <p>Default: false</p>
8812
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8813
     *
8814
     * @psalm-pure
8815
     *
8816
     * @return string
8817
     */
8818
    public static function str_substr_first(
8819
        string $str,
8820
        string $needle,
8821
        bool $before_needle = false,
8822
        string $encoding = 'UTF-8'
8823
    ): string {
8824 2
        if ($str === '' || $needle === '') {
8825 2
            return '';
8826
        }
8827
8828 2
        if ($encoding === 'UTF-8') {
8829 2
            if ($before_needle) {
8830 1
                $part = \mb_strstr(
8831 1
                    $str,
8832 1
                    $needle,
8833 1
                    $before_needle
8834
                );
8835
            } else {
8836 1
                $part = \mb_strstr(
8837 1
                    $str,
8838 2
                    $needle
8839
                );
8840
            }
8841
        } else {
8842
            $part = self::strstr(
8843
                $str,
8844
                $needle,
8845
                $before_needle,
8846
                $encoding
8847
            );
8848
        }
8849
8850 2
        return $part === false ? '' : $part;
8851
    }
8852
8853
    /**
8854
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8855
     *
8856
     * @param string $str           <p>The input string.</p>
8857
     * @param string $needle        <p>The string to look for.</p>
8858
     * @param bool   $before_needle [optional] <p>Default: false</p>
8859
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8860
     *
8861
     * @psalm-pure
8862
     *
8863
     * @return string
8864
     */
8865
    public static function str_substr_last(
8866
        string $str,
8867
        string $needle,
8868
        bool $before_needle = false,
8869
        string $encoding = 'UTF-8'
8870
    ): string {
8871 2
        if ($str === '' || $needle === '') {
8872 2
            return '';
8873
        }
8874
8875 2
        if ($encoding === 'UTF-8') {
8876 2
            if ($before_needle) {
8877 1
                $part = \mb_strrchr(
8878 1
                    $str,
8879 1
                    $needle,
8880 1
                    $before_needle
8881
                );
8882
            } else {
8883 1
                $part = \mb_strrchr(
8884 1
                    $str,
8885 2
                    $needle
8886
                );
8887
            }
8888
        } else {
8889
            $part = self::strrchr(
8890
                $str,
8891
                $needle,
8892
                $before_needle,
8893
                $encoding
8894
            );
8895
        }
8896
8897 2
        return $part === false ? '' : $part;
8898
    }
8899
8900
    /**
8901
     * Surrounds $str with the given substring.
8902
     *
8903
     * @param string $str
8904
     * @param string $substring <p>The substring to add to both sides.</p>
8905
     *
8906
     * @psalm-pure
8907
     *
8908
     * @return string
8909
     *                <p>A string with the substring both prepended and appended.</p>
8910
     */
8911
    public static function str_surround(string $str, string $substring): string
8912
    {
8913 5
        return $substring . $str . $substring;
8914
    }
8915
8916
    /**
8917
     * Returns a trimmed string with the first letter of each word capitalized.
8918
     * Also accepts an array, $ignore, allowing you to list words not to be
8919
     * capitalized.
8920
     *
8921
     * @param string              $str
8922
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8923
     *                                                           null. Default: null</p>
8924
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8925
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8926
     *                                                           string.</p>
8927
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8928
     *                                                           el, lt, tr</p>
8929
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8930
     *                                                           e.g. ẞ -> ß</p>
8931
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8932
     *                                                           first</p>
8933
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8934
     *                                                           whitespace separator === words.</p>
8935
     *
8936
     * @psalm-pure
8937
     *
8938
     * @return string
8939
     *                <p>The titleized string.</p>
8940
     *
8941
     * @noinspection PhpTooManyParametersInspection
8942
     */
8943
    public static function str_titleize(
8944
        string $str,
8945
        array $ignore = null,
8946
        string $encoding = 'UTF-8',
8947
        bool $clean_utf8 = false,
8948
        string $lang = null,
8949
        bool $try_to_keep_the_string_length = false,
8950
        bool $use_trim_first = true,
8951
        string $word_define_chars = null
8952
    ): string {
8953 10
        if ($str === '') {
8954
            return '';
8955
        }
8956
8957 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8958 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8959
        }
8960
8961 10
        if ($use_trim_first) {
8962 10
            $str = \trim($str);
8963
        }
8964
8965 10
        if ($clean_utf8) {
8966
            $str = self::clean($str);
8967
        }
8968
8969 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8970
8971 10
        if ($word_define_chars) {
8972 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8973
        } else {
8974 6
            $word_define_chars = '';
8975
        }
8976
8977 10
        $str = (string) \preg_replace_callback(
8978 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8979
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8980 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8981 4
                    return $match[0];
8982
                }
8983
8984 10
                if ($use_mb_functions) {
8985 10
                    if ($encoding === 'UTF-8') {
8986 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8987 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8988
                    }
8989
8990
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8991
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8992
                }
8993
8994
                return self::ucfirst(
8995
                    self::strtolower(
8996
                        $match[0],
8997
                        $encoding,
8998
                        false,
8999
                        $lang,
9000
                        $try_to_keep_the_string_length
9001
                    ),
9002
                    $encoding,
9003
                    false,
9004
                    $lang,
9005
                    $try_to_keep_the_string_length
9006
                );
9007 10
            },
9008 10
            $str
9009
        );
9010
9011 10
        return $str;
9012
    }
9013
9014
    /**
9015
     * Convert a string into a obfuscate string.
9016
     *
9017
     * EXAMPLE: <code>
9018
     *
9019
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9020
     * </code>
9021
     *
9022
     * @param string   $str
9023
     * @param float    $percent
9024
     * @param string   $obfuscateChar
9025
     * @param string[] $keepChars
9026
     *
9027
     * @psalm-pure
9028
     *
9029
     * @return string
9030
     *                <p>The obfuscate string.</p>
9031
     */
9032
    public static function str_obfuscate(
9033
        string $str,
9034
        float $percent = 0.5,
9035
        string $obfuscateChar = '*',
9036
        array $keepChars = []
9037
    ): string {
9038 1
        $obfuscateCharHelper = "\u{2603}";
9039 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9040
9041 1
        $chars = self::chars($str);
9042 1
        $charsMax = \count($chars);
9043 1
        $charsMaxChange = \round($charsMax * $percent);
9044 1
        $charsCounter = 0;
9045 1
        $charKeyDone = [];
9046
9047 1
        while ($charsCounter < $charsMaxChange) {
9048 1
            foreach ($chars as $charKey => $char) {
9049 1
                if (isset($charKeyDone[$charKey])) {
9050 1
                    continue;
9051
                }
9052
9053 1
                if (\random_int(0, 100) > 50) {
9054 1
                    continue;
9055
                }
9056
9057 1
                if ($char === $obfuscateChar) {
9058
                    continue;
9059
                }
9060
9061 1
                ++$charsCounter;
9062 1
                $charKeyDone[$charKey] = true;
9063
9064 1
                if ($charsCounter > $charsMaxChange) {
9065
                    break;
9066
                }
9067
9068 1
                if (\in_array($char, $keepChars, true)) {
9069 1
                    continue;
9070
                }
9071
9072 1
                $chars[$charKey] = $obfuscateChar;
9073
            }
9074
        }
9075
9076 1
        $str = \implode('', $chars);
9077
9078 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9079
    }
9080
9081
    /**
9082
     * Returns a trimmed string in proper title case.
9083
     *
9084
     * Also accepts an array, $ignore, allowing you to list words not to be
9085
     * capitalized.
9086
     *
9087
     * Adapted from John Gruber's script.
9088
     *
9089
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9090
     *
9091
     * @param string $str
9092
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9093
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9094
     *
9095
     * @psalm-pure
9096
     *
9097
     * @return string
9098
     *                <p>The titleized string.</p>
9099
     */
9100
    public static function str_titleize_for_humans(
9101
        string $str,
9102
        array $ignore = [],
9103
        string $encoding = 'UTF-8'
9104
    ): string {
9105 35
        if ($str === '') {
9106
            return '';
9107
        }
9108
9109
        $small_words = [
9110 35
            '(?<!q&)a',
9111
            'an',
9112
            'and',
9113
            'as',
9114
            'at(?!&t)',
9115
            'but',
9116
            'by',
9117
            'en',
9118
            'for',
9119
            'if',
9120
            'in',
9121
            'of',
9122
            'on',
9123
            'or',
9124
            'the',
9125
            'to',
9126
            'v[.]?',
9127
            'via',
9128
            'vs[.]?',
9129
        ];
9130
9131 35
        if ($ignore !== []) {
9132 1
            $small_words = \array_merge($small_words, $ignore);
9133
        }
9134
9135 35
        $small_words_rx = \implode('|', $small_words);
9136 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9137
9138 35
        $str = \trim($str);
9139
9140 35
        if (!self::has_lowercase($str)) {
9141 2
            $str = self::strtolower($str, $encoding);
9142
        }
9143
9144
        // the main substitutions
9145
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9146 35
        $str = (string) \preg_replace_callback(
9147
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9148
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9149 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9150
                        |
9151 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9152
                        |
9153 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9154
                        |
9155 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9156
                      ) (_*) \\b                                                          # 6. With trailing underscore
9157
                    ~ux',
9158
            /**
9159
             * @param string[] $matches
9160
             *
9161
             * @psalm-pure
9162
             *
9163
             * @return string
9164
             */
9165
            static function (array $matches) use ($encoding): string {
9166
                // preserve leading underscore
9167 35
                $str = $matches[1];
9168 35
                if ($matches[2]) {
9169
                    // preserve URLs, domains, emails and file paths
9170 5
                    $str .= $matches[2];
9171 35
                } elseif ($matches[3]) {
9172
                    // lower-case small words
9173 25
                    $str .= self::strtolower($matches[3], $encoding);
9174 35
                } elseif ($matches[4]) {
9175
                    // capitalize word w/o internal caps
9176 34
                    $str .= static::ucfirst($matches[4], $encoding);
9177
                } else {
9178
                    // preserve other kinds of word (iPhone)
9179 7
                    $str .= $matches[5];
9180
                }
9181
                // preserve trailing underscore
9182 35
                $str .= $matches[6];
9183
9184 35
                return $str;
9185 35
            },
9186 35
            $str
9187
        );
9188
9189
        // Exceptions for small words: capitalize at start of title...
9190 35
        $str = (string) \preg_replace_callback(
9191
            '~(  \\A [[:punct:]]*            # start of title...
9192
                      |  [:.;?!][ ]+                # or of subsentence...
9193
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9194 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9195
                     ~uxi',
9196
            /**
9197
             * @param string[] $matches
9198
             *
9199
             * @psalm-pure
9200
             *
9201
             * @return string
9202
             */
9203
            static function (array $matches) use ($encoding): string {
9204 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9205 35
            },
9206 35
            $str
9207
        );
9208
9209
        // ...and end of title
9210 35
        $str = (string) \preg_replace_callback(
9211 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9212
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9213
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9214
                     ~uxi',
9215
            /**
9216
             * @param string[] $matches
9217
             *
9218
             * @psalm-pure
9219
             *
9220
             * @return string
9221
             */
9222
            static function (array $matches) use ($encoding): string {
9223 3
                return static::ucfirst($matches[1], $encoding);
9224 35
            },
9225 35
            $str
9226
        );
9227
9228
        // Exceptions for small words in hyphenated compound words.
9229
        // e.g. "in-flight" -> In-Flight
9230 35
        $str = (string) \preg_replace_callback(
9231
            '~\\b
9232
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9233 35
                        ( ' . $small_words_rx . ' )
9234
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9235
                       ~uxi',
9236
            /**
9237
             * @param string[] $matches
9238
             *
9239
             * @psalm-pure
9240
             *
9241
             * @return string
9242
             */
9243
            static function (array $matches) use ($encoding): string {
9244
                return static::ucfirst($matches[1], $encoding);
9245 35
            },
9246 35
            $str
9247
        );
9248
9249
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9250 35
        $str = (string) \preg_replace_callback(
9251
            '~\\b
9252
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9253
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9254 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9255
                      (?!	- )                 # Negative lookahead for another -
9256
                     ~uxi',
9257
            /**
9258
             * @param string[] $matches
9259
             *
9260
             * @psalm-pure
9261
             *
9262
             * @return string
9263
             */
9264
            static function (array $matches) use ($encoding): string {
9265
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9266 35
            },
9267 35
            $str
9268
        );
9269
9270 35
        return $str;
9271
    }
9272
9273
    /**
9274
     * Get a binary representation of a specific string.
9275
     *
9276
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9277
     *
9278
     * @param string $str <p>The input string.</p>
9279
     *
9280
     * @psalm-pure
9281
     *
9282
     * @return false|string
9283
     *                      <p>false on error</p>
9284
     */
9285
    public static function str_to_binary(string $str)
9286
    {
9287
        /** @var array|false $value - needed for PhpStan (stubs error) */
9288 2
        $value = \unpack('H*', $str);
9289 2
        if ($value === false) {
9290
            return false;
9291
        }
9292
9293
        /** @noinspection OffsetOperationsInspection */
9294 2
        return \base_convert($value[1], 16, 2);
9295
    }
9296
9297
    /**
9298
     * @param string   $str
9299
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9300
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9301
     *
9302
     * @psalm-pure
9303
     *
9304
     * @return string[]
9305
     */
9306
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9307
    {
9308 17
        if ($str === '') {
9309 1
            return $remove_empty_values ? [] : [''];
9310
        }
9311
9312 16
        if (self::$SUPPORT['mbstring'] === true) {
9313
            /** @noinspection PhpComposerExtensionStubsInspection */
9314 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9315
        } else {
9316
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9317
        }
9318
9319 16
        if ($return === false) {
9320
            return $remove_empty_values ? [] : [''];
9321
        }
9322
9323
        if (
9324 16
            $remove_short_values === null
9325
            &&
9326 16
            !$remove_empty_values
9327
        ) {
9328 16
            return $return;
9329
        }
9330
9331
        return self::reduce_string_array(
9332
            $return,
9333
            $remove_empty_values,
9334
            $remove_short_values
9335
        );
9336
    }
9337
9338
    /**
9339
     * Convert a string into an array of words.
9340
     *
9341
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9342
     *
9343
     * @param string   $str
9344
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9345
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9346
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9347
     *
9348
     * @psalm-pure
9349
     *
9350
     * @return string[]
9351
     */
9352
    public static function str_to_words(
9353
        string $str,
9354
        string $char_list = '',
9355
        bool $remove_empty_values = false,
9356
        int $remove_short_values = null
9357
    ): array {
9358 13
        if ($str === '') {
9359 4
            return $remove_empty_values ? [] : [''];
9360
        }
9361
9362 13
        $char_list = self::rxClass($char_list, '\pL');
9363
9364 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9365 13
        if ($return === false) {
9366
            return $remove_empty_values ? [] : [''];
9367
        }
9368
9369
        if (
9370 13
            $remove_short_values === null
9371
            &&
9372 13
            !$remove_empty_values
9373
        ) {
9374 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9375
        }
9376
9377 2
        $tmp_return = self::reduce_string_array(
9378 2
            $return,
9379 2
            $remove_empty_values,
9380 2
            $remove_short_values
9381
        );
9382
9383 2
        foreach ($tmp_return as &$item) {
9384 2
            $item = (string) $item;
9385
        }
9386
9387 2
        return $tmp_return;
9388
    }
9389
9390
    /**
9391
     * alias for "UTF8::to_ascii()"
9392
     *
9393
     * @param string $str
9394
     * @param string $unknown
9395
     * @param bool   $strict
9396
     *
9397
     * @psalm-pure
9398
     *
9399
     * @return string
9400
     *
9401
     * @see        UTF8::to_ascii()
9402
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9403
     */
9404
    public static function str_transliterate(
9405
        string $str,
9406
        string $unknown = '?',
9407
        bool $strict = false
9408
    ): string {
9409 7
        return self::to_ascii($str, $unknown, $strict);
9410
    }
9411
9412
    /**
9413
     * Truncates the string to a given length. If $substring is provided, and
9414
     * truncating occurs, the string is further truncated so that the substring
9415
     * may be appended without exceeding the desired length.
9416
     *
9417
     * @param string $str
9418
     * @param int    $length    <p>Desired length of the truncated string.</p>
9419
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9420
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9421
     *
9422
     * @psalm-pure
9423
     *
9424
     * @return string
9425
     *                <p>A string after truncating.</p>
9426
     */
9427
    public static function str_truncate(
9428
        string $str,
9429
        int $length,
9430
        string $substring = '',
9431
        string $encoding = 'UTF-8'
9432
    ): string {
9433 22
        if ($str === '') {
9434
            return '';
9435
        }
9436
9437 22
        if ($encoding === 'UTF-8') {
9438 10
            if ($length >= (int) \mb_strlen($str)) {
9439 2
                return $str;
9440
            }
9441
9442 8
            if ($substring !== '') {
9443 4
                $length -= (int) \mb_strlen($substring);
9444
9445
                /** @noinspection UnnecessaryCastingInspection */
9446 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9447
            }
9448
9449
            /** @noinspection UnnecessaryCastingInspection */
9450 4
            return (string) \mb_substr($str, 0, $length);
9451
        }
9452
9453 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9454
9455 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9456 2
            return $str;
9457
        }
9458
9459 10
        if ($substring !== '') {
9460 6
            $length -= (int) self::strlen($substring, $encoding);
9461
        }
9462
9463
        return (
9464 10
               (string) self::substr(
9465 10
                   $str,
9466 10
                   0,
9467 10
                   $length,
9468 10
                   $encoding
9469
               )
9470 10
               ) . $substring;
9471
    }
9472
9473
    /**
9474
     * Truncates the string to a given length, while ensuring that it does not
9475
     * split words. If $substring is provided, and truncating occurs, the
9476
     * string is further truncated so that the substring may be appended without
9477
     * exceeding the desired length.
9478
     *
9479
     * @param string $str
9480
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9481
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9482
     *                                                       Default:
9483
     *                                                       ''</p>
9484
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9485
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9486
     *
9487
     * @psalm-pure
9488
     *
9489
     * @return string
9490
     *                <p>A string after truncating.</p>
9491
     */
9492
    public static function str_truncate_safe(
9493
        string $str,
9494
        int $length,
9495
        string $substring = '',
9496
        string $encoding = 'UTF-8',
9497
        bool $ignore_do_not_split_words_for_one_word = false
9498
    ): string {
9499 47
        if ($str === '' || $length <= 0) {
9500 1
            return $substring;
9501
        }
9502
9503 47
        if ($encoding === 'UTF-8') {
9504 21
            if ($length >= (int) \mb_strlen($str)) {
9505 5
                return $str;
9506
            }
9507
9508
            // need to further trim the string so we can append the substring
9509 17
            $length -= (int) \mb_strlen($substring);
9510 17
            if ($length <= 0) {
9511 1
                return $substring;
9512
            }
9513
9514
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9515 17
            $truncated = \mb_substr($str, 0, $length);
9516 17
            if ($truncated === false) {
9517
                return '';
9518
            }
9519
9520
            // if the last word was truncated
9521 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9522 17
            if ($space_position !== $length) {
9523
                // find pos of the last occurrence of a space, get up to that
9524 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9525
9526
                if (
9527 13
                    $last_position !== false
9528
                    ||
9529
                    (
9530 3
                        $space_position !== false
9531
                        &&
9532 13
                         !$ignore_do_not_split_words_for_one_word
9533
                    )
9534
                ) {
9535 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9536
                }
9537
            }
9538
        } else {
9539 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9540
9541 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9542 4
                return $str;
9543
            }
9544
9545
            // need to further trim the string so we can append the substring
9546 22
            $length -= (int) self::strlen($substring, $encoding);
9547 22
            if ($length <= 0) {
9548
                return $substring;
9549
            }
9550
9551 22
            $truncated = self::substr($str, 0, $length, $encoding);
9552
9553 22
            if ($truncated === false) {
9554
                return '';
9555
            }
9556
9557
            // if the last word was truncated
9558 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9559 22
            if ($space_position !== $length) {
9560
                // find pos of the last occurrence of a space, get up to that
9561 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9562
9563
                if (
9564 12
                    $last_position !== false
9565
                    ||
9566
                    (
9567 4
                        $space_position !== false
9568
                        &&
9569 12
                        !$ignore_do_not_split_words_for_one_word
9570
                    )
9571
                ) {
9572 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9573
                }
9574
            }
9575
        }
9576
9577 39
        return $truncated . $substring;
9578
    }
9579
9580
    /**
9581
     * Returns a lowercase and trimmed string separated by underscores.
9582
     * Underscores are inserted before uppercase characters (with the exception
9583
     * of the first character of the string), and in place of spaces as well as
9584
     * dashes.
9585
     *
9586
     * @param string $str
9587
     *
9588
     * @psalm-pure
9589
     *
9590
     * @return string
9591
     *                <p>The underscored string.</p>
9592
     */
9593
    public static function str_underscored(string $str): string
9594
    {
9595 16
        return self::str_delimit($str, '_');
9596
    }
9597
9598
    /**
9599
     * Returns an UpperCamelCase version of the supplied string. It trims
9600
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9601
     * and underscores, and removes spaces, dashes, underscores.
9602
     *
9603
     * @param string      $str                           <p>The input string.</p>
9604
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9605
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9606
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9607
     *                                                   tr</p>
9608
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9609
     *                                                   -> ß</p>
9610
     *
9611
     * @psalm-pure
9612
     *
9613
     * @return string
9614
     *                <p>A string in UpperCamelCase.</p>
9615
     */
9616
    public static function str_upper_camelize(
9617
        string $str,
9618
        string $encoding = 'UTF-8',
9619
        bool $clean_utf8 = false,
9620
        string $lang = null,
9621
        bool $try_to_keep_the_string_length = false
9622
    ): string {
9623 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9624
    }
9625
9626
    /**
9627
     * alias for "UTF8::ucfirst()"
9628
     *
9629
     * @param string      $str
9630
     * @param string      $encoding
9631
     * @param bool        $clean_utf8
9632
     * @param string|null $lang
9633
     * @param bool        $try_to_keep_the_string_length
9634
     *
9635
     * @psalm-pure
9636
     *
9637
     * @return string
9638
     *
9639
     * @see        UTF8::ucfirst()
9640
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9641
     */
9642
    public static function str_upper_first(
9643
        string $str,
9644
        string $encoding = 'UTF-8',
9645
        bool $clean_utf8 = false,
9646
        string $lang = null,
9647
        bool $try_to_keep_the_string_length = false
9648
    ): string {
9649 5
        return self::ucfirst(
9650 5
            $str,
9651 5
            $encoding,
9652 5
            $clean_utf8,
9653 5
            $lang,
9654 5
            $try_to_keep_the_string_length
9655
        );
9656
    }
9657
9658
    /**
9659
     * Get the number of words in a specific string.
9660
     *
9661
     * EXAMPLES: <code>
9662
     * // format: 0 -> return only word count (int)
9663
     * //
9664
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9665
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9666
     *
9667
     * // format: 1 -> return words (array)
9668
     * //
9669
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9670
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9671
     *
9672
     * // format: 2 -> return words with offset (array)
9673
     * //
9674
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9675
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9676
     * </code>
9677
     *
9678
     * @param string $str       <p>The input string.</p>
9679
     * @param int    $format    [optional] <p>
9680
     *                          <strong>0</strong> => return a number of words (default)<br>
9681
     *                          <strong>1</strong> => return an array of words<br>
9682
     *                          <strong>2</strong> => return an array of words with word-offset as key
9683
     *                          </p>
9684
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9685
     *
9686
     * @psalm-pure
9687
     *
9688
     * @return int|string[]
9689
     *                      <p>The number of words in the string.</p>
9690
     */
9691
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9692
    {
9693 2
        $str_parts = self::str_to_words($str, $char_list);
9694
9695 2
        $len = \count($str_parts);
9696
9697 2
        if ($format === 1) {
9698 2
            $number_of_words = [];
9699 2
            for ($i = 1; $i < $len; $i += 2) {
9700 2
                $number_of_words[] = $str_parts[$i];
9701
            }
9702 2
        } elseif ($format === 2) {
9703 2
            $number_of_words = [];
9704 2
            $offset = (int) self::strlen($str_parts[0]);
9705 2
            for ($i = 1; $i < $len; $i += 2) {
9706 2
                $number_of_words[$offset] = $str_parts[$i];
9707 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9708
            }
9709
        } else {
9710 2
            $number_of_words = (int) (($len - 1) / 2);
9711
        }
9712
9713 2
        return $number_of_words;
9714
    }
9715
9716
    /**
9717
     * Case-insensitive string comparison.
9718
     *
9719
     * INFO: Case-insensitive version of UTF8::strcmp()
9720
     *
9721
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9722
     *
9723
     * @param string $str1     <p>The first string.</p>
9724
     * @param string $str2     <p>The second string.</p>
9725
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9726
     *
9727
     * @psalm-pure
9728
     *
9729
     * @return int
9730
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9731
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9732
     *             <strong>0</strong> if they are equal
9733
     */
9734
    public static function strcasecmp(
9735
        string $str1,
9736
        string $str2,
9737
        string $encoding = 'UTF-8'
9738
    ): int {
9739 23
        return self::strcmp(
9740 23
            self::strtocasefold(
9741 23
                $str1,
9742 23
                true,
9743 23
                false,
9744 23
                $encoding,
9745 23
                null,
9746 23
                false
9747
            ),
9748 23
            self::strtocasefold(
9749 23
                $str2,
9750 23
                true,
9751 23
                false,
9752 23
                $encoding,
9753 23
                null,
9754 23
                false
9755
            )
9756
        );
9757
    }
9758
9759
    /**
9760
     * alias for "UTF8::strstr()"
9761
     *
9762
     * @param string $haystack
9763
     * @param string $needle
9764
     * @param bool   $before_needle
9765
     * @param string $encoding
9766
     * @param bool   $clean_utf8
9767
     *
9768
     * @psalm-pure
9769
     *
9770
     * @return false|string
9771
     *
9772
     * @see        UTF8::strstr()
9773
     * @deprecated <p>please use "UTF8::strstr()"</p>
9774
     */
9775
    public static function strchr(
9776
        string $haystack,
9777
        string $needle,
9778
        bool $before_needle = false,
9779
        string $encoding = 'UTF-8',
9780
        bool $clean_utf8 = false
9781
    ) {
9782 2
        return self::strstr(
9783 2
            $haystack,
9784 2
            $needle,
9785 2
            $before_needle,
9786 2
            $encoding,
9787 2
            $clean_utf8
9788
        );
9789
    }
9790
9791
    /**
9792
     * Case-sensitive string comparison.
9793
     *
9794
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9795
     *
9796
     * @param string $str1 <p>The first string.</p>
9797
     * @param string $str2 <p>The second string.</p>
9798
     *
9799
     * @psalm-pure
9800
     *
9801
     * @return int
9802
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9803
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9804
     *             <strong>0</strong> if they are equal
9805
     */
9806
    public static function strcmp(string $str1, string $str2): int
9807
    {
9808 29
        if ($str1 === $str2) {
9809 21
            return 0;
9810
        }
9811
9812 24
        return \strcmp(
9813 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9814 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9815
        );
9816
    }
9817
9818
    /**
9819
     * Find length of initial segment not matching mask.
9820
     *
9821
     * @param string   $str
9822
     * @param string   $char_list
9823
     * @param int      $offset
9824
     * @param int|null $length
9825
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9826
     *
9827
     * @psalm-pure
9828
     *
9829
     * @return int
9830
     */
9831
    public static function strcspn(
9832
        string $str,
9833
        string $char_list,
9834
        int $offset = 0,
9835
        int $length = null,
9836
        string $encoding = 'UTF-8'
9837
    ): int {
9838 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9839
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9840
        }
9841
9842 12
        if ($char_list === '') {
9843 2
            return (int) self::strlen($str, $encoding);
9844
        }
9845
9846 11
        if ($offset || $length !== null) {
9847 3
            if ($encoding === 'UTF-8') {
9848 3
                if ($length === null) {
9849 2
                    $str_tmp = \mb_substr($str, $offset);
9850
                } else {
9851 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9852
                }
9853
            } else {
9854
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9855
            }
9856
9857 3
            if ($str_tmp === false) {
9858
                return 0;
9859
            }
9860
9861
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9862 3
            $str = $str_tmp;
9863
        }
9864
9865 11
        if ($str === '') {
9866 2
            return 0;
9867
        }
9868
9869 10
        $matches = [];
9870 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9871 9
            $return = self::strlen($matches[1], $encoding);
9872 9
            if ($return === false) {
9873
                return 0;
9874
            }
9875
9876 9
            return $return;
9877
        }
9878
9879 2
        return (int) self::strlen($str, $encoding);
9880
    }
9881
9882
    /**
9883
     * alias for "UTF8::stristr()"
9884
     *
9885
     * @param string $haystack
9886
     * @param string $needle
9887
     * @param bool   $before_needle
9888
     * @param string $encoding
9889
     * @param bool   $clean_utf8
9890
     *
9891
     * @psalm-pure
9892
     *
9893
     * @return false|string
9894
     *
9895
     * @see        UTF8::stristr()
9896
     * @deprecated <p>please use "UTF8::stristr()"</p>
9897
     */
9898
    public static function strichr(
9899
        string $haystack,
9900
        string $needle,
9901
        bool $before_needle = false,
9902
        string $encoding = 'UTF-8',
9903
        bool $clean_utf8 = false
9904
    ) {
9905 1
        return self::stristr(
9906 1
            $haystack,
9907 1
            $needle,
9908 1
            $before_needle,
9909 1
            $encoding,
9910 1
            $clean_utf8
9911
        );
9912
    }
9913
9914
    /**
9915
     * Create a UTF-8 string from code points.
9916
     *
9917
     * INFO: opposite to UTF8::codepoints()
9918
     *
9919
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9920
     *
9921
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9922
     *
9923
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9924
     *
9925
     * @psalm-pure
9926
     *
9927
     * @return string
9928
     *                <p>A UTF-8 encoded string.</p>
9929
     */
9930
    public static function string($intOrHex): string
9931
    {
9932 4
        if ($intOrHex === []) {
9933 4
            return '';
9934
        }
9935
9936 4
        if (!\is_array($intOrHex)) {
9937 1
            $intOrHex = [$intOrHex];
9938
        }
9939
9940 4
        $str = '';
9941 4
        foreach ($intOrHex as $strPart) {
9942 4
            $str .= '&#' . (int) $strPart . ';';
9943
        }
9944
9945 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9946
    }
9947
9948
    /**
9949
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9950
     *
9951
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9952
     *
9953
     * @param string $str <p>The input string.</p>
9954
     *
9955
     * @psalm-pure
9956
     *
9957
     * @return bool
9958
     *              <p>
9959
     *              <strong>true</strong> if the string has BOM at the start,<br>
9960
     *              <strong>false</strong> otherwise
9961
     *              </p>
9962
     */
9963
    public static function string_has_bom(string $str): bool
9964
    {
9965
        /** @noinspection PhpUnusedLocalVariableInspection */
9966 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9967 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9968 6
                return true;
9969
            }
9970
        }
9971
9972 6
        return false;
9973
    }
9974
9975
    /**
9976
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9977
     *
9978
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9979
     *
9980
     * @see http://php.net/manual/en/function.strip-tags.php
9981
     *
9982
     * @param string      $str            <p>
9983
     *                                    The input string.
9984
     *                                    </p>
9985
     * @param string|null $allowable_tags [optional] <p>
9986
     *                                    You can use the optional second parameter to specify tags which should
9987
     *                                    not be stripped.
9988
     *                                    </p>
9989
     *                                    <p>
9990
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9991
     *                                    can not be changed with allowable_tags.
9992
     *                                    </p>
9993
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9994
     *
9995
     * @psalm-pure
9996
     *
9997
     * @return string
9998
     *                <p>The stripped string.</p>
9999
     */
10000
    public static function strip_tags(
10001
        string $str,
10002
        string $allowable_tags = null,
10003
        bool $clean_utf8 = false
10004
    ): string {
10005 4
        if ($str === '') {
10006 1
            return '';
10007
        }
10008
10009 4
        if ($clean_utf8) {
10010 2
            $str = self::clean($str);
10011
        }
10012
10013 4
        if ($allowable_tags === null) {
10014 4
            return \strip_tags($str);
10015
        }
10016
10017 2
        return \strip_tags($str, $allowable_tags);
10018
    }
10019
10020
    /**
10021
     * Strip all whitespace characters. This includes tabs and newline
10022
     * characters, as well as multibyte whitespace such as the thin space
10023
     * and ideographic space.
10024
     *
10025
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10026
     *
10027
     * @param string $str
10028
     *
10029
     * @psalm-pure
10030
     *
10031
     * @return string
10032
     */
10033
    public static function strip_whitespace(string $str): string
10034
    {
10035 36
        if ($str === '') {
10036 3
            return '';
10037
        }
10038
10039 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10040
    }
10041
10042
    /**
10043
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10044
     *
10045
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10046
     *
10047
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10048
     *
10049
     * @see http://php.net/manual/en/function.mb-stripos.php
10050
     *
10051
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10052
     * @param string $needle     <p>The string to find in haystack.</p>
10053
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10054
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10055
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10056
     *
10057
     * @psalm-pure
10058
     *
10059
     * @return false|int
10060
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10061
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10062
     */
10063
    public static function stripos(
10064
        string $haystack,
10065
        string $needle,
10066
        int $offset = 0,
10067
        string $encoding = 'UTF-8',
10068
        bool $clean_utf8 = false
10069
    ) {
10070 25
        if ($haystack === '') {
10071 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10072
                return 0;
10073
            }
10074
10075 5
            return false;
10076
        }
10077
10078 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10079 2
            return false;
10080
        }
10081
10082 24
        if ($clean_utf8) {
10083
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10084
            // if invalid characters are found in $haystack before $needle
10085 1
            $haystack = self::clean($haystack);
10086 1
            $needle = self::clean($needle);
10087
        }
10088
10089 24
        if (self::$SUPPORT['mbstring'] === true) {
10090 24
            if ($encoding === 'UTF-8') {
10091 24
                return \mb_stripos($haystack, $needle, $offset);
10092
            }
10093
10094 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10095
10096 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10097
        }
10098
10099 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10100
10101
        if (
10102 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10103
            &&
10104 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10105
            &&
10106 2
            self::$SUPPORT['intl'] === true
10107
        ) {
10108
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10109
            if ($return_tmp !== false) {
10110
                return $return_tmp;
10111
            }
10112
        }
10113
10114
        //
10115
        // fallback for ascii only
10116
        //
10117
10118 2
        if (ASCII::is_ascii($haystack . $needle)) {
10119 2
            return \stripos($haystack, $needle, $offset);
10120
        }
10121
10122
        //
10123
        // fallback via vanilla php
10124
        //
10125
10126 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10127 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10128
10129 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10130
    }
10131
10132
    /**
10133
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10134
     *
10135
     * EXAMPLE: <code>
10136
     * $str = 'iñtërnâtiônàlizætiøn';
10137
     * $search = 'NÂT';
10138
     *
10139
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10140
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10141
     * </code>
10142
     *
10143
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10144
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10145
     * @param bool   $before_needle [optional] <p>
10146
     *                              If <b>TRUE</b>, it returns the part of the
10147
     *                              haystack before the first occurrence of the needle (excluding the needle).
10148
     *                              </p>
10149
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10150
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10151
     *
10152
     * @psalm-pure
10153
     *
10154
     * @return false|string
10155
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10156
     */
10157
    public static function stristr(
10158
        string $haystack,
10159
        string $needle,
10160
        bool $before_needle = false,
10161
        string $encoding = 'UTF-8',
10162
        bool $clean_utf8 = false
10163
    ) {
10164 13
        if ($haystack === '') {
10165 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10166
                return '';
10167
            }
10168
10169 3
            return false;
10170
        }
10171
10172 11
        if ($clean_utf8) {
10173
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10174
            // if invalid characters are found in $haystack before $needle
10175 1
            $needle = self::clean($needle);
10176 1
            $haystack = self::clean($haystack);
10177
        }
10178
10179 11
        if ($needle === '') {
10180 2
            if (\PHP_VERSION_ID >= 80000) {
10181
                return $haystack;
10182
            }
10183
10184 2
            return false;
10185
        }
10186
10187 10
        if (self::$SUPPORT['mbstring'] === true) {
10188 10
            if ($encoding === 'UTF-8') {
10189 10
                return \mb_stristr($haystack, $needle, $before_needle);
10190
            }
10191
10192 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10193
10194 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10195
        }
10196
10197
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10198
10199
        if (
10200
            $encoding !== 'UTF-8'
10201
            &&
10202
            self::$SUPPORT['mbstring'] === false
10203
        ) {
10204
            /**
10205
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10206
             */
10207
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10208
        }
10209
10210
        if (
10211
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10212
            &&
10213
            self::$SUPPORT['intl'] === true
10214
        ) {
10215
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10216
            if ($return_tmp !== false) {
10217
                return $return_tmp;
10218
            }
10219
        }
10220
10221
        if (ASCII::is_ascii($needle . $haystack)) {
10222
            return \stristr($haystack, $needle, $before_needle);
10223
        }
10224
10225
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10226
10227
        if (!isset($match[1])) {
10228
            return false;
10229
        }
10230
10231
        if ($before_needle) {
10232
            return $match[1];
10233
        }
10234
10235
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10236
    }
10237
10238
    /**
10239
     * Get the string length, not the byte-length!
10240
     *
10241
     * INFO: use UTF8::strwidth() for the char-length
10242
     *
10243
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10244
     *
10245
     * @see http://php.net/manual/en/function.mb-strlen.php
10246
     *
10247
     * @param string $str        <p>The string being checked for length.</p>
10248
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10249
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10250
     *
10251
     * @psalm-pure
10252
     *
10253
     * @return false|int
10254
     *                   <p>
10255
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10256
     *                   $encoding.
10257
     *                   (One multi-byte character counted as +1).
10258
     *                   <br>
10259
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10260
     *                   chars.
10261
     *                   </p>
10262
     */
10263
    public static function strlen(
10264
        string $str,
10265
        string $encoding = 'UTF-8',
10266
        bool $clean_utf8 = false
10267
    ) {
10268 174
        if ($str === '') {
10269 21
            return 0;
10270
        }
10271
10272 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10273 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10274
        }
10275
10276 172
        if ($clean_utf8) {
10277
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10278
            // if invalid characters are found in $str
10279 5
            $str = self::clean($str);
10280
        }
10281
10282
        //
10283
        // fallback via mbstring
10284
        //
10285
10286 172
        if (self::$SUPPORT['mbstring'] === true) {
10287 166
            if ($encoding === 'UTF-8') {
10288
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10289 166
                return @\mb_strlen($str);
10290
            }
10291
10292
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10293 4
            return @\mb_strlen($str, $encoding);
10294
        }
10295
10296
        //
10297
        // fallback for binary || ascii only
10298
        //
10299
10300
        if (
10301 8
            $encoding === 'CP850'
10302
            ||
10303 8
            $encoding === 'ASCII'
10304
        ) {
10305
            return \strlen($str);
10306
        }
10307
10308
        if (
10309 8
            $encoding !== 'UTF-8'
10310
            &&
10311 8
            self::$SUPPORT['mbstring'] === false
10312
            &&
10313 8
            self::$SUPPORT['iconv'] === false
10314
        ) {
10315
            /**
10316
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10317
             */
10318 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10319
        }
10320
10321
        //
10322
        // fallback via iconv
10323
        //
10324
10325 8
        if (self::$SUPPORT['iconv'] === true) {
10326
            $return_tmp = \iconv_strlen($str, $encoding);
10327
            if ($return_tmp !== false) {
10328
                return $return_tmp;
10329
            }
10330
        }
10331
10332
        //
10333
        // fallback via intl
10334
        //
10335
10336
        if (
10337 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10338
            &&
10339 8
            self::$SUPPORT['intl'] === true
10340
        ) {
10341
            $return_tmp = \grapheme_strlen($str);
10342
            if ($return_tmp !== null) {
10343
                return $return_tmp;
10344
            }
10345
        }
10346
10347
        //
10348
        // fallback for ascii only
10349
        //
10350
10351 8
        if (ASCII::is_ascii($str)) {
10352 4
            return \strlen($str);
10353
        }
10354
10355
        //
10356
        // fallback via vanilla php
10357
        //
10358
10359 8
        \preg_match_all('/./us', $str, $parts);
10360
10361 8
        $return_tmp = \count($parts[0]);
10362 8
        if ($return_tmp === 0) {
10363
            return false;
10364
        }
10365
10366 8
        return $return_tmp;
10367
    }
10368
10369
    /**
10370
     * Get string length in byte.
10371
     *
10372
     * @param string $str
10373
     *
10374
     * @psalm-pure
10375
     *
10376
     * @return int
10377
     */
10378
    public static function strlen_in_byte(string $str): int
10379
    {
10380 1
        if ($str === '') {
10381
            return 0;
10382
        }
10383
10384 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10385
            // "mb_" is available if overload is used, so use it ...
10386
            return \mb_strlen($str, 'CP850'); // 8-BIT
10387
        }
10388
10389 1
        return \strlen($str);
10390
    }
10391
10392
    /**
10393
     * Case-insensitive string comparisons using a "natural order" algorithm.
10394
     *
10395
     * INFO: natural order version of UTF8::strcasecmp()
10396
     *
10397
     * EXAMPLES: <code>
10398
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10399
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10400
     *
10401
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10402
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10403
     * </code>
10404
     *
10405
     * @param string $str1     <p>The first string.</p>
10406
     * @param string $str2     <p>The second string.</p>
10407
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10408
     *
10409
     * @psalm-pure
10410
     *
10411
     * @return int
10412
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10413
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10414
     *             <strong>0</strong> if they are equal
10415
     */
10416
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10417
    {
10418 2
        return self::strnatcmp(
10419 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10420 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10421
        );
10422
    }
10423
10424
    /**
10425
     * String comparisons using a "natural order" algorithm
10426
     *
10427
     * INFO: natural order version of UTF8::strcmp()
10428
     *
10429
     * EXAMPLES: <code>
10430
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10431
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10432
     *
10433
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10434
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10435
     * </code>
10436
     *
10437
     * @see http://php.net/manual/en/function.strnatcmp.php
10438
     *
10439
     * @param string $str1 <p>The first string.</p>
10440
     * @param string $str2 <p>The second string.</p>
10441
     *
10442
     * @psalm-pure
10443
     *
10444
     * @return int
10445
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10446
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10447
     *             <strong>0</strong> if they are equal
10448
     */
10449
    public static function strnatcmp(string $str1, string $str2): int
10450
    {
10451 4
        if ($str1 === $str2) {
10452 4
            return 0;
10453
        }
10454
10455 4
        return \strnatcmp(
10456 4
            (string) self::strtonatfold($str1),
10457 4
            (string) self::strtonatfold($str2)
10458
        );
10459
    }
10460
10461
    /**
10462
     * Case-insensitive string comparison of the first n characters.
10463
     *
10464
     * EXAMPLE: <code>
10465
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10466
     * </code>
10467
     *
10468
     * @see http://php.net/manual/en/function.strncasecmp.php
10469
     *
10470
     * @param string $str1     <p>The first string.</p>
10471
     * @param string $str2     <p>The second string.</p>
10472
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10473
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10474
     *
10475
     * @psalm-pure
10476
     *
10477
     * @return int
10478
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10479
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10480
     *             <strong>0</strong> if they are equal
10481
     */
10482
    public static function strncasecmp(
10483
        string $str1,
10484
        string $str2,
10485
        int $len,
10486
        string $encoding = 'UTF-8'
10487
    ): int {
10488 2
        return self::strncmp(
10489 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10490 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10491 2
            $len
10492
        );
10493
    }
10494
10495
    /**
10496
     * String comparison of the first n characters.
10497
     *
10498
     * EXAMPLE: <code>
10499
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10500
     * </code>
10501
     *
10502
     * @see http://php.net/manual/en/function.strncmp.php
10503
     *
10504
     * @param string $str1     <p>The first string.</p>
10505
     * @param string $str2     <p>The second string.</p>
10506
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10507
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10508
     *
10509
     * @psalm-pure
10510
     *
10511
     * @return int
10512
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10513
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10514
     *             <strong>0</strong> if they are equal
10515
     */
10516
    public static function strncmp(
10517
        string $str1,
10518
        string $str2,
10519
        int $len,
10520
        string $encoding = 'UTF-8'
10521
    ): int {
10522 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10523
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10524
        }
10525
10526 4
        if ($encoding === 'UTF-8') {
10527 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10528 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10529
        } else {
10530
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10531
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10532
        }
10533
10534 4
        return self::strcmp($str1, $str2);
10535
    }
10536
10537
    /**
10538
     * Search a string for any of a set of characters.
10539
     *
10540
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10541
     *
10542
     * @see http://php.net/manual/en/function.strpbrk.php
10543
     *
10544
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10545
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10546
     *
10547
     * @psalm-pure
10548
     *
10549
     * @return false|string
10550
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10551
     */
10552
    public static function strpbrk(string $haystack, string $char_list)
10553
    {
10554 2
        if ($haystack === '' || $char_list === '') {
10555 2
            return false;
10556
        }
10557
10558 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10559 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10560
        }
10561
10562 2
        return false;
10563
    }
10564
10565
    /**
10566
     * Find the position of the first occurrence of a substring in a string.
10567
     *
10568
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10569
     *
10570
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10571
     *
10572
     * @see http://php.net/manual/en/function.mb-strpos.php
10573
     *
10574
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10575
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10576
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10577
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10578
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10579
     *
10580
     * @psalm-pure
10581
     *
10582
     * @return false|int
10583
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10584
     *                   string.<br> If needle is not found it returns false.
10585
     */
10586
    public static function strpos(
10587
        string $haystack,
10588
        $needle,
10589
        int $offset = 0,
10590
        string $encoding = 'UTF-8',
10591
        bool $clean_utf8 = false
10592
    ) {
10593 52
        if ($haystack === '') {
10594 4
            if (\PHP_VERSION_ID >= 80000) {
10595
                if ($needle === '') {
10596
                    return 0;
10597
                }
10598
            } else {
10599 4
                return false;
10600
            }
10601
        }
10602
10603
        // iconv and mbstring do not support integer $needle
10604 51
        if ((int) $needle === $needle) {
10605
            $needle = (string) self::chr($needle);
10606
        }
10607 51
        $needle = (string) $needle;
10608
10609 51
        if ($haystack === '') {
10610
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10611
                return 0;
10612
            }
10613
10614
            return false;
10615
        }
10616
10617 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10618 2
            return false;
10619
        }
10620
10621 51
        if ($clean_utf8) {
10622
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10623
            // if invalid characters are found in $haystack before $needle
10624 3
            $needle = self::clean($needle);
10625 3
            $haystack = self::clean($haystack);
10626
        }
10627
10628 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10629 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10630
        }
10631
10632
        //
10633
        // fallback via mbstring
10634
        //
10635
10636 51
        if (self::$SUPPORT['mbstring'] === true) {
10637 49
            if ($encoding === 'UTF-8') {
10638
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10639 49
                return @\mb_strpos($haystack, $needle, $offset);
10640
            }
10641
10642
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10643 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10644
        }
10645
10646
        //
10647
        // fallback for binary || ascii only
10648
        //
10649
        if (
10650 4
            $encoding === 'CP850'
10651
            ||
10652 4
            $encoding === 'ASCII'
10653
        ) {
10654 2
            return \strpos($haystack, $needle, $offset);
10655
        }
10656
10657
        if (
10658 4
            $encoding !== 'UTF-8'
10659
            &&
10660 4
            self::$SUPPORT['iconv'] === false
10661
            &&
10662 4
            self::$SUPPORT['mbstring'] === false
10663
        ) {
10664
            /**
10665
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10666
             */
10667 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10668
        }
10669
10670
        //
10671
        // fallback via intl
10672
        //
10673
10674
        if (
10675 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10676
            &&
10677 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10678
            &&
10679 4
            self::$SUPPORT['intl'] === true
10680
        ) {
10681
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10682
            if ($return_tmp !== false) {
10683
                return $return_tmp;
10684
            }
10685
        }
10686
10687
        //
10688
        // fallback via iconv
10689
        //
10690
10691
        if (
10692 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10693
            &&
10694 4
            self::$SUPPORT['iconv'] === true
10695
        ) {
10696
            // ignore invalid negative offset to keep compatibility
10697
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10698
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10699
            if ($return_tmp !== false) {
10700
                return $return_tmp;
10701
            }
10702
        }
10703
10704
        //
10705
        // fallback for ascii only
10706
        //
10707
10708 4
        if (ASCII::is_ascii($haystack . $needle)) {
10709
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10710 2
            return @\strpos($haystack, $needle, $offset);
10711
        }
10712
10713
        //
10714
        // fallback via vanilla php
10715
        //
10716
10717 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10718 4
        if ($haystack_tmp === false) {
10719
            $haystack_tmp = '';
10720
        }
10721 4
        $haystack = (string) $haystack_tmp;
10722
10723 4
        if ($offset < 0) {
10724
            $offset = 0;
10725
        }
10726
10727 4
        $pos = \strpos($haystack, $needle);
10728 4
        if ($pos === false) {
10729 3
            return false;
10730
        }
10731
10732 4
        if ($pos) {
10733 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10734
        }
10735
10736 2
        return $offset + 0;
10737
    }
10738
10739
    /**
10740
     * Find the position of the first occurrence of a substring in a string.
10741
     *
10742
     * @param string $haystack <p>
10743
     *                         The string being checked.
10744
     *                         </p>
10745
     * @param string $needle   <p>
10746
     *                         The position counted from the beginning of haystack.
10747
     *                         </p>
10748
     * @param int    $offset   [optional] <p>
10749
     *                         The search offset. If it is not specified, 0 is used.
10750
     *                         </p>
10751
     *
10752
     * @psalm-pure
10753
     *
10754
     * @return false|int
10755
     *                   <p>The numeric position of the first occurrence of needle in the
10756
     *                   haystack string. If needle is not found, it returns false.</p>
10757
     */
10758
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10759
    {
10760 2
        if ($haystack === '' || $needle === '') {
10761
            return false;
10762
        }
10763
10764 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10765
            // "mb_" is available if overload is used, so use it ...
10766
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10767
        }
10768
10769 2
        return \strpos($haystack, $needle, $offset);
10770
    }
10771
10772
    /**
10773
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10774
     *
10775
     * @param string $haystack <p>
10776
     *                         The string being checked.
10777
     *                         </p>
10778
     * @param string $needle   <p>
10779
     *                         The position counted from the beginning of haystack.
10780
     *                         </p>
10781
     * @param int    $offset   [optional] <p>
10782
     *                         The search offset. If it is not specified, 0 is used.
10783
     *                         </p>
10784
     *
10785
     * @psalm-pure
10786
     *
10787
     * @return false|int
10788
     *                   <p>The numeric position of the first occurrence of needle in the
10789
     *                   haystack string. If needle is not found, it returns false.</p>
10790
     */
10791
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10792
    {
10793 2
        if ($haystack === '' || $needle === '') {
10794
            return false;
10795
        }
10796
10797 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10798
            // "mb_" is available if overload is used, so use it ...
10799
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10800
        }
10801
10802 2
        return \stripos($haystack, $needle, $offset);
10803
    }
10804
10805
    /**
10806
     * Find the last occurrence of a character in a string within another.
10807
     *
10808
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10809
     *
10810
     * @see http://php.net/manual/en/function.mb-strrchr.php
10811
     *
10812
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10813
     * @param string $needle        <p>The string to find in haystack</p>
10814
     * @param bool   $before_needle [optional] <p>
10815
     *                              Determines which portion of haystack
10816
     *                              this function returns.
10817
     *                              If set to true, it returns all of haystack
10818
     *                              from the beginning to the last occurrence of needle.
10819
     *                              If set to false, it returns all of haystack
10820
     *                              from the last occurrence of needle to the end,
10821
     *                              </p>
10822
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10823
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10824
     *
10825
     * @psalm-pure
10826
     *
10827
     * @return false|string
10828
     *                      <p>The portion of haystack or false if needle is not found.</p>
10829
     */
10830
    public static function strrchr(
10831
        string $haystack,
10832
        string $needle,
10833
        bool $before_needle = false,
10834
        string $encoding = 'UTF-8',
10835
        bool $clean_utf8 = false
10836
    ) {
10837 2
        if ($haystack === '' || $needle === '') {
10838 2
            return false;
10839
        }
10840
10841 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10842 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10843
        }
10844
10845 2
        if ($clean_utf8) {
10846
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10847
            // if invalid characters are found in $haystack before $needle
10848 2
            $needle = self::clean($needle);
10849 2
            $haystack = self::clean($haystack);
10850
        }
10851
10852
        //
10853
        // fallback via mbstring
10854
        //
10855
10856 2
        if (self::$SUPPORT['mbstring'] === true) {
10857 2
            if ($encoding === 'UTF-8') {
10858 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10859
            }
10860
10861 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10862
        }
10863
10864
        //
10865
        // fallback for binary || ascii only
10866
        //
10867
10868
        if (
10869
            !$before_needle
10870
            &&
10871
            (
10872
                $encoding === 'CP850'
10873
                ||
10874
                $encoding === 'ASCII'
10875
            )
10876
        ) {
10877
            return \strrchr($haystack, $needle);
10878
        }
10879
10880
        if (
10881
            $encoding !== 'UTF-8'
10882
            &&
10883
            self::$SUPPORT['mbstring'] === false
10884
        ) {
10885
            /**
10886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10887
             */
10888
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10889
        }
10890
10891
        //
10892
        // fallback via iconv
10893
        //
10894
10895
        if (self::$SUPPORT['iconv'] === true) {
10896
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10897
            if ($needle_tmp === false) {
10898
                return false;
10899
            }
10900
            $needle = (string) $needle_tmp;
10901
10902
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10903
            if ($pos === false) {
10904
                return false;
10905
            }
10906
10907
            if ($before_needle) {
10908
                return self::substr($haystack, 0, $pos, $encoding);
10909
            }
10910
10911
            return self::substr($haystack, $pos, null, $encoding);
10912
        }
10913
10914
        //
10915
        // fallback via vanilla php
10916
        //
10917
10918
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10919
        if ($needle_tmp === false) {
10920
            return false;
10921
        }
10922
        $needle = (string) $needle_tmp;
10923
10924
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10925
        if ($pos === false) {
10926
            return false;
10927
        }
10928
10929
        if ($before_needle) {
10930
            return self::substr($haystack, 0, $pos, $encoding);
10931
        }
10932
10933
        return self::substr($haystack, $pos, null, $encoding);
10934
    }
10935
10936
    /**
10937
     * Reverses characters order in the string.
10938
     *
10939
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10940
     *
10941
     * @param string $str      <p>The input string.</p>
10942
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10943
     *
10944
     * @psalm-pure
10945
     *
10946
     * @return string
10947
     *                <p>The string with characters in the reverse sequence.</p>
10948
     */
10949
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10950
    {
10951 10
        if ($str === '') {
10952 4
            return '';
10953
        }
10954
10955
        // init
10956 8
        $reversed = '';
10957
10958 8
        $str = self::emoji_encode($str, true);
10959
10960 8
        if ($encoding === 'UTF-8') {
10961 8
            if (self::$SUPPORT['intl'] === true) {
10962
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10963 8
                $i = (int) \grapheme_strlen($str);
10964 8
                while ($i--) {
10965 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10966 8
                    if ($reversed_tmp !== false) {
10967 8
                        $reversed .= $reversed_tmp;
10968
                    }
10969
                }
10970
            } else {
10971
                $i = (int) \mb_strlen($str);
10972 8
                while ($i--) {
10973
                    $reversed_tmp = \mb_substr($str, $i, 1);
10974
                    if ($reversed_tmp !== false) {
10975
                        $reversed .= $reversed_tmp;
10976
                    }
10977
                }
10978
            }
10979
        } else {
10980
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10981
10982
            $i = (int) self::strlen($str, $encoding);
10983
            while ($i--) {
10984
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10985
                if ($reversed_tmp !== false) {
10986
                    $reversed .= $reversed_tmp;
10987
                }
10988
            }
10989
        }
10990
10991 8
        return self::emoji_decode($reversed, true);
10992
    }
10993
10994
    /**
10995
     * Find the last occurrence of a character in a string within another, case-insensitive.
10996
     *
10997
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10998
     *
10999
     * @see http://php.net/manual/en/function.mb-strrichr.php
11000
     *
11001
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
11002
     * @param string $needle        <p>The string to find in haystack.</p>
11003
     * @param bool   $before_needle [optional] <p>
11004
     *                              Determines which portion of haystack
11005
     *                              this function returns.
11006
     *                              If set to true, it returns all of haystack
11007
     *                              from the beginning to the last occurrence of needle.
11008
     *                              If set to false, it returns all of haystack
11009
     *                              from the last occurrence of needle to the end,
11010
     *                              </p>
11011
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11012
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11013
     *
11014
     * @psalm-pure
11015
     *
11016
     * @return false|string
11017
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
11018
     */
11019
    public static function strrichr(
11020
        string $haystack,
11021
        string $needle,
11022
        bool $before_needle = false,
11023
        string $encoding = 'UTF-8',
11024
        bool $clean_utf8 = false
11025
    ) {
11026 3
        if ($haystack === '' || $needle === '') {
11027 2
            return false;
11028
        }
11029
11030 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11031 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11032
        }
11033
11034 3
        if ($clean_utf8) {
11035
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11036
            // if invalid characters are found in $haystack before $needle
11037 2
            $needle = self::clean($needle);
11038 2
            $haystack = self::clean($haystack);
11039
        }
11040
11041
        //
11042
        // fallback via mbstring
11043
        //
11044
11045 3
        if (self::$SUPPORT['mbstring'] === true) {
11046 3
            if ($encoding === 'UTF-8') {
11047 3
                return \mb_strrichr($haystack, $needle, $before_needle);
11048
            }
11049
11050 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11051
        }
11052
11053
        //
11054
        // fallback via vanilla php
11055
        //
11056
11057
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11058
        if ($needle_tmp === false) {
11059
            return false;
11060
        }
11061
        $needle = (string) $needle_tmp;
11062
11063
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11064
        if ($pos === false) {
11065
            return false;
11066
        }
11067
11068
        if ($before_needle) {
11069
            return self::substr($haystack, 0, $pos, $encoding);
11070
        }
11071
11072
        return self::substr($haystack, $pos, null, $encoding);
11073
    }
11074
11075
    /**
11076
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11077
     *
11078
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11079
     *
11080
     * @param string     $haystack   <p>The string to look in.</p>
11081
     * @param int|string $needle     <p>The string to look for.</p>
11082
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11083
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11084
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11085
     *
11086
     * @psalm-pure
11087
     *
11088
     * @return false|int
11089
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11090
     *                   string.<br>If needle is not found, it returns false.</p>
11091
     */
11092
    public static function strripos(
11093
        string $haystack,
11094
        $needle,
11095
        int $offset = 0,
11096
        string $encoding = 'UTF-8',
11097
        bool $clean_utf8 = false
11098
    ) {
11099 14
        if ($haystack === '') {
11100 3
            if (\PHP_VERSION_ID >= 80000) {
11101
                if ($needle === '') {
11102
                    return 0;
11103
                }
11104
            } else {
11105 3
                return false;
11106
            }
11107
        }
11108
11109
        // iconv and mbstring do not support integer $needle
11110 14
        if ((int) $needle === $needle && $needle >= 0) {
11111
            $needle = (string) self::chr($needle);
11112
        }
11113 14
        $needle = (string) $needle;
11114
11115 14
        if ($haystack === '') {
11116
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11117
                return 0;
11118
            }
11119
11120
            return false;
11121
        }
11122
11123 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11124 3
            return false;
11125
        }
11126
11127 14
        if ($clean_utf8) {
11128
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11129 3
            $needle = self::clean($needle);
11130 3
            $haystack = self::clean($haystack);
11131
        }
11132
11133 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11134 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11135
        }
11136
11137
        //
11138
        // fallback via mbstrig
11139
        //
11140
11141 14
        if (self::$SUPPORT['mbstring'] === true) {
11142 14
            if ($encoding === 'UTF-8') {
11143 14
                return \mb_strripos($haystack, $needle, $offset);
11144
            }
11145
11146
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11147
        }
11148
11149
        //
11150
        // fallback for binary || ascii only
11151
        //
11152
11153
        if (
11154
            $encoding === 'CP850'
11155
            ||
11156
            $encoding === 'ASCII'
11157
        ) {
11158
            return \strripos($haystack, $needle, $offset);
11159
        }
11160
11161
        if (
11162
            $encoding !== 'UTF-8'
11163
            &&
11164
            self::$SUPPORT['mbstring'] === false
11165
        ) {
11166
            /**
11167
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11168
             */
11169
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11170
        }
11171
11172
        //
11173
        // fallback via intl
11174
        //
11175
11176
        if (
11177
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11178
            &&
11179
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11180
            &&
11181
            self::$SUPPORT['intl'] === true
11182
        ) {
11183
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11184
            if ($return_tmp !== false) {
11185
                return $return_tmp;
11186
            }
11187
        }
11188
11189
        //
11190
        // fallback for ascii only
11191
        //
11192
11193
        if (ASCII::is_ascii($haystack . $needle)) {
11194
            return \strripos($haystack, $needle, $offset);
11195
        }
11196
11197
        //
11198
        // fallback via vanilla php
11199
        //
11200
11201
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11202
        $needle = self::strtocasefold($needle, true, false, $encoding);
11203
11204
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11205
    }
11206
11207
    /**
11208
     * Finds position of last occurrence of a string within another, case-insensitive.
11209
     *
11210
     * @param string $haystack <p>
11211
     *                         The string from which to get the position of the last occurrence
11212
     *                         of needle.
11213
     *                         </p>
11214
     * @param string $needle   <p>
11215
     *                         The string to find in haystack.
11216
     *                         </p>
11217
     * @param int    $offset   [optional] <p>
11218
     *                         The position in haystack
11219
     *                         to start searching.
11220
     *                         </p>
11221
     *
11222
     * @psalm-pure
11223
     *
11224
     * @return false|int
11225
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11226
     *                   haystack string, or false if needle is not found.</p>
11227
     */
11228
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11229
    {
11230 2
        if ($haystack === '' || $needle === '') {
11231
            return false;
11232
        }
11233
11234 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11235
            // "mb_" is available if overload is used, so use it ...
11236
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11237
        }
11238
11239 2
        return \strripos($haystack, $needle, $offset);
11240
    }
11241
11242
    /**
11243
     * Find the position of the last occurrence of a substring in a string.
11244
     *
11245
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11246
     *
11247
     * @see http://php.net/manual/en/function.mb-strrpos.php
11248
     *
11249
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11250
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11251
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11252
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11253
     *                               the end of the string.
11254
     *                               </p>
11255
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11256
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11257
     *
11258
     * @psalm-pure
11259
     *
11260
     * @return false|int
11261
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11262
     *                   string.<br>If needle is not found, it returns false.</p>
11263
     */
11264
    public static function strrpos(
11265
        string $haystack,
11266
        $needle,
11267
        int $offset = 0,
11268
        string $encoding = 'UTF-8',
11269
        bool $clean_utf8 = false
11270
    ) {
11271 35
        if ($haystack === '') {
11272 4
            if (\PHP_VERSION_ID >= 80000) {
11273
                if ($needle === '') {
11274
                    return 0;
11275
                }
11276
            } else {
11277 4
                return false;
11278
            }
11279
        }
11280
11281
        // iconv and mbstring do not support integer $needle
11282 34
        if ((int) $needle === $needle && $needle >= 0) {
11283 1
            $needle = (string) self::chr($needle);
11284
        }
11285 34
        $needle = (string) $needle;
11286
11287 34
        if ($haystack === '') {
11288
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11289
                return 0;
11290
            }
11291
11292
            return false;
11293
        }
11294
11295 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11296 2
            return false;
11297
        }
11298
11299 34
        if ($clean_utf8) {
11300
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11301 4
            $needle = self::clean($needle);
11302 4
            $haystack = self::clean($haystack);
11303
        }
11304
11305 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11306 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11307
        }
11308
11309
        //
11310
        // fallback via mbstring
11311
        //
11312
11313 34
        if (self::$SUPPORT['mbstring'] === true) {
11314 34
            if ($encoding === 'UTF-8') {
11315 34
                return \mb_strrpos($haystack, $needle, $offset);
11316
            }
11317
11318 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11319
        }
11320
11321
        //
11322
        // fallback for binary || ascii only
11323
        //
11324
11325
        if (
11326
            $encoding === 'CP850'
11327
            ||
11328
            $encoding === 'ASCII'
11329
        ) {
11330
            return \strrpos($haystack, $needle, $offset);
11331
        }
11332
11333
        if (
11334
            $encoding !== 'UTF-8'
11335
            &&
11336
            self::$SUPPORT['mbstring'] === false
11337
        ) {
11338
            /**
11339
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11340
             */
11341
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11342
        }
11343
11344
        //
11345
        // fallback via intl
11346
        //
11347
11348
        if (
11349
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11350
            &&
11351
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11352
            &&
11353
            self::$SUPPORT['intl'] === true
11354
        ) {
11355
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11356
            if ($return_tmp !== false) {
11357
                return $return_tmp;
11358
            }
11359
        }
11360
11361
        //
11362
        // fallback for ascii only
11363
        //
11364
11365
        if (ASCII::is_ascii($haystack . $needle)) {
11366
            return \strrpos($haystack, $needle, $offset);
11367
        }
11368
11369
        //
11370
        // fallback via vanilla php
11371
        //
11372
11373
        $haystack_tmp = null;
11374
        if ($offset > 0) {
11375
            $haystack_tmp = self::substr($haystack, $offset);
11376
        } elseif ($offset < 0) {
11377
            $haystack_tmp = self::substr($haystack, 0, $offset);
11378
            $offset = 0;
11379
        }
11380
11381
        if ($haystack_tmp !== null) {
11382
            if ($haystack_tmp === false) {
11383
                $haystack_tmp = '';
11384
            }
11385
            $haystack = (string) $haystack_tmp;
11386
        }
11387
11388
        $pos = \strrpos($haystack, $needle);
11389
        if ($pos === false) {
11390
            return false;
11391
        }
11392
11393
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11394
        $str_tmp = \substr($haystack, 0, $pos);
11395
        if ($str_tmp === false) {
11396
            return false;
11397
        }
11398
11399
        return $offset + (int) self::strlen($str_tmp);
11400
    }
11401
11402
    /**
11403
     * Find the position of the last occurrence of a substring in a string.
11404
     *
11405
     * @param string $haystack <p>
11406
     *                         The string being checked, for the last occurrence
11407
     *                         of needle.
11408
     *                         </p>
11409
     * @param string $needle   <p>
11410
     *                         The string to find in haystack.
11411
     *                         </p>
11412
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11413
     *                         the string. Negative values will stop searching at an arbitrary point
11414
     *                         prior to the end of the string.
11415
     *                         </p>
11416
     *
11417
     * @psalm-pure
11418
     *
11419
     * @return false|int
11420
     *                   <p>The numeric position of the last occurrence of needle in the
11421
     *                   haystack string. If needle is not found, it returns false.</p>
11422
     */
11423
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11424
    {
11425 2
        if ($haystack === '' || $needle === '') {
11426
            return false;
11427
        }
11428
11429 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11430
            // "mb_" is available if overload is used, so use it ...
11431
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11432
        }
11433
11434 2
        return \strrpos($haystack, $needle, $offset);
11435
    }
11436
11437
    /**
11438
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11439
     * mask.
11440
     *
11441
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11442
     *
11443
     * @param string   $str      <p>The input string.</p>
11444
     * @param string   $mask     <p>The mask of chars</p>
11445
     * @param int      $offset   [optional]
11446
     * @param int|null $length   [optional]
11447
     * @param string   $encoding [optional] <p>Set the charset.</p>
11448
     *
11449
     * @psalm-pure
11450
     *
11451
     * @return false|int
11452
     */
11453
    public static function strspn(
11454
        string $str,
11455
        string $mask,
11456
        int $offset = 0,
11457
        int $length = null,
11458
        string $encoding = 'UTF-8'
11459
    ) {
11460 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11461
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11462
        }
11463
11464 10
        if ($offset || $length !== null) {
11465 2
            if ($encoding === 'UTF-8') {
11466 2
                if ($length === null) {
11467
                    $str = (string) \mb_substr($str, $offset);
11468
                } else {
11469 2
                    $str = (string) \mb_substr($str, $offset, $length);
11470
                }
11471
            } else {
11472
                $str = (string) self::substr($str, $offset, $length, $encoding);
11473
            }
11474
        }
11475
11476 10
        if ($str === '' || $mask === '') {
11477 2
            return 0;
11478
        }
11479
11480 8
        $matches = [];
11481
11482 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11483
    }
11484
11485
    /**
11486
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11487
     *
11488
     * EXAMPLE: <code>
11489
     * $str = 'iñtërnâtiônàlizætiøn';
11490
     * $search = 'nât';
11491
     *
11492
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11493
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11494
     * </code>
11495
     *
11496
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11497
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11498
     * @param bool   $before_needle [optional] <p>
11499
     *                              If <b>TRUE</b>, strstr() returns the part of the
11500
     *                              haystack before the first occurrence of the needle (excluding the needle).
11501
     *                              </p>
11502
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11503
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11504
     *
11505
     * @psalm-pure
11506
     *
11507
     * @return false|string
11508
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11509
     */
11510
    public static function strstr(
11511
        string $haystack,
11512
        string $needle,
11513
        bool $before_needle = false,
11514
        string $encoding = 'UTF-8',
11515
        bool $clean_utf8 = false
11516
    ) {
11517 3
        if ($haystack === '') {
11518 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11519
                return '';
11520
            }
11521
11522 2
            return false;
11523
        }
11524
11525 3
        if ($clean_utf8) {
11526
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11527
            // if invalid characters are found in $haystack before $needle
11528
            $needle = self::clean($needle);
11529
            $haystack = self::clean($haystack);
11530
        }
11531
11532 3
        if ($needle === '') {
11533 1
            if (\PHP_VERSION_ID >= 80000) {
11534
                return $haystack;
11535
            }
11536
11537 1
            return false;
11538
        }
11539
11540 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11541 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11542
        }
11543
11544
        //
11545
        // fallback via mbstring
11546
        //
11547
11548 3
        if (self::$SUPPORT['mbstring'] === true) {
11549 3
            if ($encoding === 'UTF-8') {
11550 3
                return \mb_strstr($haystack, $needle, $before_needle);
11551
            }
11552
11553 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11554
        }
11555
11556
        //
11557
        // fallback for binary || ascii only
11558
        //
11559
11560
        if (
11561
            $encoding === 'CP850'
11562
            ||
11563
            $encoding === 'ASCII'
11564
        ) {
11565
            return \strstr($haystack, $needle, $before_needle);
11566
        }
11567
11568
        if (
11569
            $encoding !== 'UTF-8'
11570
            &&
11571
            self::$SUPPORT['mbstring'] === false
11572
        ) {
11573
            /**
11574
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11575
             */
11576
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11577
        }
11578
11579
        //
11580
        // fallback via intl
11581
        //
11582
11583
        if (
11584
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11585
            &&
11586
            self::$SUPPORT['intl'] === true
11587
        ) {
11588
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11589
            if ($return_tmp !== false) {
11590
                return $return_tmp;
11591
            }
11592
        }
11593
11594
        //
11595
        // fallback for ascii only
11596
        //
11597
11598
        if (ASCII::is_ascii($haystack . $needle)) {
11599
            return \strstr($haystack, $needle, $before_needle);
11600
        }
11601
11602
        //
11603
        // fallback via vanilla php
11604
        //
11605
11606
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11607
11608
        if (!isset($match[1])) {
11609
            return false;
11610
        }
11611
11612
        if ($before_needle) {
11613
            return $match[1];
11614
        }
11615
11616
        return self::substr($haystack, (int) self::strlen($match[1]));
11617
    }
11618
11619
    /**
11620
     * Finds first occurrence of a string within another.
11621
     *
11622
     * @param string $haystack      <p>
11623
     *                              The string from which to get the first occurrence
11624
     *                              of needle.
11625
     *                              </p>
11626
     * @param string $needle        <p>
11627
     *                              The string to find in haystack.
11628
     *                              </p>
11629
     * @param bool   $before_needle [optional] <p>
11630
     *                              Determines which portion of haystack
11631
     *                              this function returns.
11632
     *                              If set to true, it returns all of haystack
11633
     *                              from the beginning to the first occurrence of needle.
11634
     *                              If set to false, it returns all of haystack
11635
     *                              from the first occurrence of needle to the end,
11636
     *                              </p>
11637
     *
11638
     * @psalm-pure
11639
     *
11640
     * @return false|string
11641
     *                      <p>The portion of haystack,
11642
     *                      or false if needle is not found.</p>
11643
     */
11644
    public static function strstr_in_byte(
11645
        string $haystack,
11646
        string $needle,
11647
        bool $before_needle = false
11648
    ) {
11649 2
        if ($haystack === '' || $needle === '') {
11650
            return false;
11651
        }
11652
11653 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11654
            // "mb_" is available if overload is used, so use it ...
11655
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11656
        }
11657
11658 2
        return \strstr($haystack, $needle, $before_needle);
11659
    }
11660
11661
    /**
11662
     * Unicode transformation for case-less matching.
11663
     *
11664
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11665
     *
11666
     * @see http://unicode.org/reports/tr21/tr21-5.html
11667
     *
11668
     * @param string      $str        <p>The input string.</p>
11669
     * @param bool        $full       [optional] <p>
11670
     *                                <b>true</b>, replace full case folding chars (default)<br>
11671
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11672
     *                                </p>
11673
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11674
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11675
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11676
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11677
     *                                is for some languages better ...</p>
11678
     *
11679
     * @psalm-pure
11680
     *
11681
     * @return string
11682
     */
11683
    public static function strtocasefold(
11684
        string $str,
11685
        bool $full = true,
11686
        bool $clean_utf8 = false,
11687
        string $encoding = 'UTF-8',
11688
        string $lang = null,
11689
        bool $lower = true
11690
    ): string {
11691 32
        if ($str === '') {
11692 5
            return '';
11693
        }
11694
11695 31
        if ($clean_utf8) {
11696
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11697
            // if invalid characters are found in $haystack before $needle
11698 2
            $str = self::clean($str);
11699
        }
11700
11701 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11702
11703 31
        if ($lang === null && $encoding === 'UTF-8') {
11704 31
            if ($lower) {
11705 2
                return \mb_strtolower($str);
11706
            }
11707
11708 29
            return \mb_strtoupper($str);
11709
        }
11710
11711 2
        if ($lower) {
11712
            return self::strtolower($str, $encoding, false, $lang);
11713
        }
11714
11715 2
        return self::strtoupper($str, $encoding, false, $lang);
11716
    }
11717
11718
    /**
11719
     * Make a string lowercase.
11720
     *
11721
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11722
     *
11723
     * @see http://php.net/manual/en/function.mb-strtolower.php
11724
     *
11725
     * @param string      $str                           <p>The string being lowercased.</p>
11726
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11727
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11728
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11729
     *                                                   tr</p>
11730
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11731
     *                                                   -> ß</p>
11732
     *
11733
     * @psalm-pure
11734
     *
11735
     * @return string
11736
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11737
     */
11738
    public static function strtolower(
11739
        $str,
11740
        string $encoding = 'UTF-8',
11741
        bool $clean_utf8 = false,
11742
        string $lang = null,
11743
        bool $try_to_keep_the_string_length = false
11744
    ): string {
11745
        // init
11746 73
        $str = (string) $str;
11747
11748 73
        if ($str === '') {
11749 1
            return '';
11750
        }
11751
11752 72
        if ($clean_utf8) {
11753
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11754
            // if invalid characters are found in $haystack before $needle
11755 2
            $str = self::clean($str);
11756
        }
11757
11758
        // hack for old php version or for the polyfill ...
11759 72
        if ($try_to_keep_the_string_length) {
11760
            $str = self::fixStrCaseHelper($str, true);
11761
        }
11762
11763 72
        if ($lang === null && $encoding === 'UTF-8') {
11764 13
            return \mb_strtolower($str);
11765
        }
11766
11767 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11768
11769 61
        if ($lang !== null) {
11770 2
            if (self::$SUPPORT['intl'] === true) {
11771 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11772
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11773
                }
11774
11775 2
                $language_code = $lang . '-Lower';
11776 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11777
                    /**
11778
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11779
                     */
11780
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11781
11782
                    $language_code = 'Any-Lower';
11783
                }
11784
11785
                /** @noinspection PhpComposerExtensionStubsInspection */
11786
                /** @noinspection UnnecessaryCastingInspection */
11787 2
                return (string) \transliterator_transliterate($language_code, $str);
11788
            }
11789
11790
            /**
11791
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11792
             */
11793
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11794
        }
11795
11796
        // always fallback via symfony polyfill
11797 61
        return \mb_strtolower($str, $encoding);
11798
    }
11799
11800
    /**
11801
     * Make a string uppercase.
11802
     *
11803
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11804
     *
11805
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11806
     *
11807
     * @param string      $str                           <p>The string being uppercased.</p>
11808
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11809
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11810
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11811
     *                                                   tr</p>
11812
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11813
     *                                                   -> ß</p>
11814
     *
11815
     * @psalm-pure
11816
     *
11817
     * @return string
11818
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11819
     */
11820
    public static function strtoupper(
11821
        $str,
11822
        string $encoding = 'UTF-8',
11823
        bool $clean_utf8 = false,
11824
        string $lang = null,
11825
        bool $try_to_keep_the_string_length = false
11826
    ): string {
11827
        // init
11828 17
        $str = (string) $str;
11829
11830 17
        if ($str === '') {
11831 1
            return '';
11832
        }
11833
11834 16
        if ($clean_utf8) {
11835
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11836
            // if invalid characters are found in $haystack before $needle
11837 2
            $str = self::clean($str);
11838
        }
11839
11840
        // hack for old php version or for the polyfill ...
11841 16
        if ($try_to_keep_the_string_length) {
11842 2
            $str = self::fixStrCaseHelper($str);
11843
        }
11844
11845 16
        if ($lang === null && $encoding === 'UTF-8') {
11846 8
            return \mb_strtoupper($str);
11847
        }
11848
11849 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11850
11851 10
        if ($lang !== null) {
11852 2
            if (self::$SUPPORT['intl'] === true) {
11853 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11854
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11855
                }
11856
11857 2
                $language_code = $lang . '-Upper';
11858 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11859
                    /**
11860
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11861
                     */
11862
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11863
11864
                    $language_code = 'Any-Upper';
11865
                }
11866
11867
                /** @noinspection PhpComposerExtensionStubsInspection */
11868
                /** @noinspection UnnecessaryCastingInspection */
11869 2
                return (string) \transliterator_transliterate($language_code, $str);
11870
            }
11871
11872
            /**
11873
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11874
             */
11875
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11876
        }
11877
11878
        // always fallback via symfony polyfill
11879 10
        return \mb_strtoupper($str, $encoding);
11880
    }
11881
11882
    /**
11883
     * Translate characters or replace sub-strings.
11884
     *
11885
     * EXAMPLE:
11886
     * <code>
11887
     * $array = [
11888
     *     'Hello'   => '○●◎',
11889
     *     '中文空白' => 'earth',
11890
     * ];
11891
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11892
     * </code>
11893
     *
11894
     * @see http://php.net/manual/en/function.strtr.php
11895
     *
11896
     * @param string          $str  <p>The string being translated.</p>
11897
     * @param string|string[] $from <p>The string replacing from.</p>
11898
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11899
     *
11900
     * @psalm-pure
11901
     *
11902
     * @return string
11903
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11904
     *                to the corresponding character in "to".</p>
11905
     */
11906
    public static function strtr(string $str, $from, $to = ''): string
11907
    {
11908 2
        if ($str === '') {
11909
            return '';
11910
        }
11911
11912 2
        if ($from === $to) {
11913
            return $str;
11914
        }
11915
11916 2
        if ($to !== '') {
11917 2
            if (!\is_array($from)) {
11918 2
                $from = self::str_split($from);
11919
            }
11920
11921 2
            if (!\is_array($to)) {
11922 2
                $to = self::str_split($to);
11923
            }
11924
11925 2
            $count_from = \count($from);
11926 2
            $count_to = \count($to);
11927
11928 2
            if ($count_from !== $count_to) {
11929 2
                if ($count_from > $count_to) {
11930 2
                    $from = \array_slice($from, 0, $count_to);
11931 2
                } elseif ($count_from < $count_to) {
11932 2
                    $to = \array_slice($to, 0, $count_from);
11933
                }
11934
            }
11935
11936 2
            $from = \array_combine($from, $to);
11937
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11938 2
            if ($from === false) {
11939
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11940
            }
11941
        }
11942
11943 2
        if (\is_string($from)) {
11944 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11944
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11945
        }
11946
11947 2
        return \strtr($str, $from);
11948
    }
11949
11950
    /**
11951
     * Return the width of a string.
11952
     *
11953
     * INFO: use UTF8::strlen() for the byte-length
11954
     *
11955
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11956
     *
11957
     * @param string $str        <p>The input string.</p>
11958
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11959
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11960
     *
11961
     * @psalm-pure
11962
     *
11963
     * @return int
11964
     */
11965
    public static function strwidth(
11966
        string $str,
11967
        string $encoding = 'UTF-8',
11968
        bool $clean_utf8 = false
11969
    ): int {
11970 2
        if ($str === '') {
11971 2
            return 0;
11972
        }
11973
11974 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11975 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11976
        }
11977
11978 2
        if ($clean_utf8) {
11979
            // iconv and mbstring are not tolerant to invalid encoding
11980
            // further, their behaviour is inconsistent with that of PHP's substr
11981 2
            $str = self::clean($str);
11982
        }
11983
11984
        //
11985
        // fallback via mbstring
11986
        //
11987
11988 2
        if (self::$SUPPORT['mbstring'] === true) {
11989 2
            if ($encoding === 'UTF-8') {
11990 2
                return \mb_strwidth($str);
11991
            }
11992
11993
            return \mb_strwidth($str, $encoding);
11994
        }
11995
11996
        //
11997
        // fallback via vanilla php
11998
        //
11999
12000
        if ($encoding !== 'UTF-8') {
12001
            $str = self::encode('UTF-8', $str, false, $encoding);
12002
        }
12003
12004
        $wide = 0;
12005
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
12006
12007
        return ($wide << 1) + (int) self::strlen($str);
12008
    }
12009
12010
    /**
12011
     * Get part of a string.
12012
     *
12013
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
12014
     *
12015
     * @see http://php.net/manual/en/function.mb-substr.php
12016
     *
12017
     * @param string   $str        <p>The string being checked.</p>
12018
     * @param int      $offset     <p>The first position used in str.</p>
12019
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12020
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12021
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12022
     *
12023
     * @psalm-pure
12024
     *
12025
     * @return false|string
12026
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12027
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12028
     *                      characters long, <b>FALSE</b> will be returned.
12029
     */
12030
    public static function substr(
12031
        string $str,
12032
        int $offset = 0,
12033
        int $length = null,
12034
        string $encoding = 'UTF-8',
12035
        bool $clean_utf8 = false
12036
    ) {
12037
        // empty string
12038 172
        if ($str === '' || $length === 0) {
12039 8
            return '';
12040
        }
12041
12042 168
        if ($clean_utf8) {
12043
            // iconv and mbstring are not tolerant to invalid encoding
12044
            // further, their behaviour is inconsistent with that of PHP's substr
12045 2
            $str = self::clean($str);
12046
        }
12047
12048
        // whole string
12049 168
        if (!$offset && $length === null) {
12050 7
            return $str;
12051
        }
12052
12053 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12054 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12055
        }
12056
12057
        //
12058
        // fallback via mbstring
12059
        //
12060
12061 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12062 161
            if ($length === null) {
12063 64
                return \mb_substr($str, $offset);
12064
            }
12065
12066 102
            return \mb_substr($str, $offset, $length);
12067
        }
12068
12069
        //
12070
        // fallback for binary || ascii only
12071
        //
12072
12073
        if (
12074 4
            $encoding === 'CP850'
12075
            ||
12076 4
            $encoding === 'ASCII'
12077
        ) {
12078
            if ($length === null) {
12079
                return \substr($str, $offset);
12080
            }
12081
12082
            return \substr($str, $offset, $length);
12083
        }
12084
12085
        // otherwise we need the string-length
12086 4
        $str_length = 0;
12087 4
        if ($offset || $length === null) {
12088 4
            $str_length = self::strlen($str, $encoding);
12089
        }
12090
12091
        // e.g.: invalid chars + mbstring not installed
12092 4
        if ($str_length === false) {
12093
            return false;
12094
        }
12095
12096
        // empty string
12097 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12098
            return '';
12099
        }
12100
12101
        // impossible
12102 4
        if ($offset && $offset > $str_length) {
12103
            return '';
12104
        }
12105
12106 4
        $length = $length ?? (int) $str_length;
12107
12108
        if (
12109 4
            $encoding !== 'UTF-8'
12110
            &&
12111 4
            self::$SUPPORT['mbstring'] === false
12112
        ) {
12113
            /**
12114
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12115
             */
12116 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12117
        }
12118
12119
        //
12120
        // fallback via intl
12121
        //
12122
12123
        if (
12124 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12125
            &&
12126 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12127
            &&
12128 4
            self::$SUPPORT['intl'] === true
12129
        ) {
12130
            $return_tmp = \grapheme_substr($str, $offset, $length);
12131
            if ($return_tmp !== false) {
12132
                return $return_tmp;
12133
            }
12134
        }
12135
12136
        //
12137
        // fallback via iconv
12138
        //
12139
12140
        if (
12141 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12142
            &&
12143 4
            self::$SUPPORT['iconv'] === true
12144
        ) {
12145
            $return_tmp = \iconv_substr($str, $offset, $length);
12146
            if ($return_tmp !== false) {
12147
                return $return_tmp;
12148
            }
12149
        }
12150
12151
        //
12152
        // fallback for ascii only
12153
        //
12154
12155 4
        if (ASCII::is_ascii($str)) {
12156
            return \substr($str, $offset, $length);
12157
        }
12158
12159
        //
12160
        // fallback via vanilla php
12161
        //
12162
12163
        // split to array, and remove invalid characters
12164 4
        $array = self::str_split($str);
12165
12166
        // extract relevant part, and join to make sting again
12167 4
        return \implode('', \array_slice($array, $offset, $length));
12168
    }
12169
12170
    /**
12171
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12172
     *
12173
     * EXAMPLE: <code>
12174
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12175
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12176
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12177
     * </code>
12178
     *
12179
     * @param string   $str1               <p>The main string being compared.</p>
12180
     * @param string   $str2               <p>The secondary string being compared.</p>
12181
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12182
     *                                     counting from the end of the string.</p>
12183
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12184
     *                                     of the length of the str compared to the length of main_str less the
12185
     *                                     offset.</p>
12186
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12187
     *                                     insensitive.</p>
12188
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12189
     *
12190
     * @psalm-pure
12191
     *
12192
     * @return int
12193
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12194
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12195
     *             <strong>0</strong> if they are equal
12196
     */
12197
    public static function substr_compare(
12198
        string $str1,
12199
        string $str2,
12200
        int $offset = 0,
12201
        int $length = null,
12202
        bool $case_insensitivity = false,
12203
        string $encoding = 'UTF-8'
12204
    ): int {
12205
        if (
12206 2
            $offset !== 0
12207
            ||
12208 2
            $length !== null
12209
        ) {
12210 2
            if ($encoding === 'UTF-8') {
12211 2
                if ($length === null) {
12212 2
                    $str1 = (string) \mb_substr($str1, $offset);
12213
                } else {
12214 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12215
                }
12216 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12217
            } else {
12218
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12219
12220
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12221
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12222
            }
12223
        }
12224
12225 2
        if ($case_insensitivity) {
12226 2
            return self::strcasecmp($str1, $str2, $encoding);
12227
        }
12228
12229 2
        return self::strcmp($str1, $str2);
12230
    }
12231
12232
    /**
12233
     * Count the number of substring occurrences.
12234
     *
12235
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12236
     *
12237
     * @see http://php.net/manual/en/function.substr-count.php
12238
     *
12239
     * @param string   $haystack   <p>The string to search in.</p>
12240
     * @param string   $needle     <p>The substring to search for.</p>
12241
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12242
     * @param int|null $length     [optional] <p>
12243
     *                             The maximum length after the specified offset to search for the
12244
     *                             substring. It outputs a warning if the offset plus the length is
12245
     *                             greater than the haystack length.
12246
     *                             </p>
12247
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12248
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12249
     *
12250
     * @psalm-pure
12251
     *
12252
     * @return false|int
12253
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12254
     */
12255
    public static function substr_count(
12256
        string $haystack,
12257
        string $needle,
12258
        int $offset = 0,
12259
        int $length = null,
12260
        string $encoding = 'UTF-8',
12261
        bool $clean_utf8 = false
12262
    ) {
12263 5
        if ($needle === '') {
12264 2
            return false;
12265
        }
12266
12267 5
        if ($haystack === '') {
12268 2
            if (\PHP_VERSION_ID >= 80000) {
12269
                return 0;
12270
            }
12271
12272 2
            return 0;
12273
        }
12274
12275 5
        if ($length === 0) {
12276 2
            return 0;
12277
        }
12278
12279 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12280 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12281
        }
12282
12283 5
        if ($clean_utf8) {
12284
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12285
            // if invalid characters are found in $haystack before $needle
12286
            $needle = self::clean($needle);
12287
            $haystack = self::clean($haystack);
12288
        }
12289
12290 5
        if ($offset || $length > 0) {
12291 2
            if ($length === null) {
12292 2
                $length_tmp = self::strlen($haystack, $encoding);
12293 2
                if ($length_tmp === false) {
12294
                    return false;
12295
                }
12296 2
                $length = (int) $length_tmp;
12297
            }
12298
12299 2
            if ($encoding === 'UTF-8') {
12300 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12301
            } else {
12302 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12303
            }
12304
        }
12305
12306
        if (
12307 5
            $encoding !== 'UTF-8'
12308
            &&
12309 5
            self::$SUPPORT['mbstring'] === false
12310
        ) {
12311
            /**
12312
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12313
             */
12314
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12315
        }
12316
12317 5
        if (self::$SUPPORT['mbstring'] === true) {
12318 5
            if ($encoding === 'UTF-8') {
12319 5
                return \mb_substr_count($haystack, $needle);
12320
            }
12321
12322 2
            return \mb_substr_count($haystack, $needle, $encoding);
12323
        }
12324
12325
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12326
12327
        return \count($matches);
12328
    }
12329
12330
    /**
12331
     * Count the number of substring occurrences.
12332
     *
12333
     * @param string   $haystack <p>
12334
     *                           The string being checked.
12335
     *                           </p>
12336
     * @param string   $needle   <p>
12337
     *                           The string being found.
12338
     *                           </p>
12339
     * @param int      $offset   [optional] <p>
12340
     *                           The offset where to start counting
12341
     *                           </p>
12342
     * @param int|null $length   [optional] <p>
12343
     *                           The maximum length after the specified offset to search for the
12344
     *                           substring. It outputs a warning if the offset plus the length is
12345
     *                           greater than the haystack length.
12346
     *                           </p>
12347
     *
12348
     * @psalm-pure
12349
     *
12350
     * @return false|int
12351
     *                   <p>The number of times the
12352
     *                   needle substring occurs in the
12353
     *                   haystack string.</p>
12354
     */
12355
    public static function substr_count_in_byte(
12356
        string $haystack,
12357
        string $needle,
12358
        int $offset = 0,
12359
        int $length = null
12360
    ) {
12361 4
        if ($haystack === '' || $needle === '') {
12362 1
            return 0;
12363
        }
12364
12365
        if (
12366 3
            ($offset || $length !== null)
12367
            &&
12368 3
            self::$SUPPORT['mbstring_func_overload'] === true
12369
        ) {
12370
            if ($length === null) {
12371
                $length_tmp = self::strlen($haystack);
12372
                if ($length_tmp === false) {
12373
                    return false;
12374
                }
12375
                $length = (int) $length_tmp;
12376
            }
12377
12378
            if (
12379
                (
12380
                    $length !== 0
12381
                    &&
12382
                    $offset !== 0
12383
                )
12384
                &&
12385
                ($length + $offset) <= 0
12386
                &&
12387
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12388
            ) {
12389
                return false;
12390
            }
12391
12392
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12393
            $haystack_tmp = \substr($haystack, $offset, $length);
12394
            if ($haystack_tmp === false) {
12395
                $haystack_tmp = '';
12396
            }
12397
            $haystack = (string) $haystack_tmp;
12398
        }
12399
12400 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12401
            // "mb_" is available if overload is used, so use it ...
12402
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12403
        }
12404
12405 3
        if ($length === null) {
12406 3
            return \substr_count($haystack, $needle, $offset);
12407
        }
12408
12409
        return \substr_count($haystack, $needle, $offset, $length);
12410
    }
12411
12412
    /**
12413
     * Returns the number of occurrences of $substring in the given string.
12414
     * By default, the comparison is case-sensitive, but can be made insensitive
12415
     * by setting $case_sensitive to false.
12416
     *
12417
     * @param string $str            <p>The input string.</p>
12418
     * @param string $substring      <p>The substring to search for.</p>
12419
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12420
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12421
     *
12422
     * @psalm-pure
12423
     *
12424
     * @return int
12425
     */
12426
    public static function substr_count_simple(
12427
        string $str,
12428
        string $substring,
12429
        bool $case_sensitive = true,
12430
        string $encoding = 'UTF-8'
12431
    ): int {
12432 15
        if ($str === '' || $substring === '') {
12433 2
            return 0;
12434
        }
12435
12436 13
        if ($encoding === 'UTF-8') {
12437 7
            if ($case_sensitive) {
12438
                return (int) \mb_substr_count($str, $substring);
12439
            }
12440
12441 7
            return (int) \mb_substr_count(
12442 7
                \mb_strtoupper($str),
12443 7
                \mb_strtoupper($substring)
12444
            );
12445
        }
12446
12447 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12448
12449 6
        if ($case_sensitive) {
12450 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12451
        }
12452
12453 3
        return (int) \mb_substr_count(
12454 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12455 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12456 3
            $encoding
12457
        );
12458
    }
12459
12460
    /**
12461
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12462
     *
12463
     * EXMAPLE: <code>
12464
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12465
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12466
     * </code>
12467
     *
12468
     * @param string $haystack <p>The string to search in.</p>
12469
     * @param string $needle   <p>The substring to search for.</p>
12470
     *
12471
     * @psalm-pure
12472
     *
12473
     * @return string
12474
     *                <p>Return the sub-string.</p>
12475
     */
12476
    public static function substr_ileft(string $haystack, string $needle): string
12477
    {
12478 2
        if ($haystack === '') {
12479 2
            return '';
12480
        }
12481
12482 2
        if ($needle === '') {
12483 2
            return $haystack;
12484
        }
12485
12486 2
        if (self::str_istarts_with($haystack, $needle)) {
12487 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12488
        }
12489
12490 2
        return $haystack;
12491
    }
12492
12493
    /**
12494
     * Get part of a string process in bytes.
12495
     *
12496
     * @param string   $str    <p>The string being checked.</p>
12497
     * @param int      $offset <p>The first position used in str.</p>
12498
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12499
     *
12500
     * @psalm-pure
12501
     *
12502
     * @return false|string
12503
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12504
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12505
     *                      characters long, <b>FALSE</b> will be returned.
12506
     */
12507
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12508
    {
12509
        // empty string
12510 1
        if ($str === '' || $length === 0) {
12511
            return '';
12512
        }
12513
12514
        // whole string
12515 1
        if (!$offset && $length === null) {
12516
            return $str;
12517
        }
12518
12519 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12520
            // "mb_" is available if overload is used, so use it ...
12521
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12522
        }
12523
12524 1
        return \substr($str, $offset, $length ?? 2147483647);
12525
    }
12526
12527
    /**
12528
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12529
     *
12530
     * EXAMPLE: <code>
12531
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12532
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12533
     * </code>
12534
     *
12535
     * @param string $haystack <p>The string to search in.</p>
12536
     * @param string $needle   <p>The substring to search for.</p>
12537
     *
12538
     * @psalm-pure
12539
     *
12540
     * @return string
12541
     *                <p>Return the sub-string.<p>
12542
     */
12543
    public static function substr_iright(string $haystack, string $needle): string
12544
    {
12545 2
        if ($haystack === '') {
12546 2
            return '';
12547
        }
12548
12549 2
        if ($needle === '') {
12550 2
            return $haystack;
12551
        }
12552
12553 2
        if (self::str_iends_with($haystack, $needle)) {
12554 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12555
        }
12556
12557 2
        return $haystack;
12558
    }
12559
12560
    /**
12561
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12562
     *
12563
     * EXAMPLE: <code>
12564
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12565
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12566
     * </code>
12567
     *
12568
     * @param string $haystack <p>The string to search in.</p>
12569
     * @param string $needle   <p>The substring to search for.</p>
12570
     *
12571
     * @psalm-pure
12572
     *
12573
     * @return string
12574
     *                <p>Return the sub-string.</p>
12575
     */
12576
    public static function substr_left(string $haystack, string $needle): string
12577
    {
12578 2
        if ($haystack === '') {
12579 2
            return '';
12580
        }
12581
12582 2
        if ($needle === '') {
12583 2
            return $haystack;
12584
        }
12585
12586 2
        if (self::str_starts_with($haystack, $needle)) {
12587 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12588
        }
12589
12590 2
        return $haystack;
12591
    }
12592
12593
    /**
12594
     * Replace text within a portion of a string.
12595
     *
12596
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12597
     *
12598
     * source: https://gist.github.com/stemar/8287074
12599
     *
12600
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12601
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12602
     * @param int|int[]       $offset      <p>
12603
     *                                     If start is positive, the replacing will begin at the start'th offset
12604
     *                                     into string.
12605
     *                                     <br><br>
12606
     *                                     If start is negative, the replacing will begin at the start'th character
12607
     *                                     from the end of string.
12608
     *                                     </p>
12609
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12610
     *                                     portion of string which is to be replaced. If it is negative, it
12611
     *                                     represents the number of characters from the end of string at which to
12612
     *                                     stop replacing. If it is not given, then it will default to strlen(
12613
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12614
     *                                     length is zero then this function will have the effect of inserting
12615
     *                                     replacement into string at the given start offset.</p>
12616
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12617
     *
12618
     * @psalm-pure
12619
     *
12620
     * @return string|string[]
12621
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12622
     */
12623
    public static function substr_replace(
12624
        $str,
12625
        $replacement,
12626
        $offset,
12627
        $length = null,
12628
        string $encoding = 'UTF-8'
12629
    ) {
12630 10
        if (\is_array($str)) {
12631 1
            $num = \count($str);
12632
12633
            // the replacement
12634 1
            if (\is_array($replacement)) {
12635 1
                $replacement = \array_slice($replacement, 0, $num);
12636
            } else {
12637 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12638
            }
12639
12640
            // the offset
12641 1
            if (\is_array($offset)) {
12642 1
                $offset = \array_slice($offset, 0, $num);
12643 1
                foreach ($offset as &$value_tmp) {
12644 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12645
                }
12646 1
                unset($value_tmp);
12647
            } else {
12648 1
                $offset = \array_pad([$offset], $num, $offset);
12649
            }
12650
12651
            // the length
12652 1
            if ($length === null) {
12653 1
                $length = \array_fill(0, $num, 0);
12654 1
            } elseif (\is_array($length)) {
12655 1
                $length = \array_slice($length, 0, $num);
12656 1
                foreach ($length as &$value_tmp_V2) {
12657 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12658
                }
12659 1
                unset($value_tmp_V2);
12660
            } else {
12661 1
                $length = \array_pad([$length], $num, $length);
12662
            }
12663
12664
            // recursive call
12665 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12666
        }
12667
12668 10
        if (\is_array($replacement)) {
12669 1
            if ($replacement !== []) {
12670 1
                $replacement = $replacement[0];
12671
            } else {
12672 1
                $replacement = '';
12673
            }
12674
        }
12675
12676
        // init
12677 10
        $str = (string) $str;
12678 10
        $replacement = (string) $replacement;
12679
12680 10
        if (\is_array($length)) {
12681
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12682
        }
12683
12684 10
        if (\is_array($offset)) {
12685
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12686
        }
12687
12688 10
        if ($str === '') {
12689 1
            return $replacement;
12690
        }
12691
12692 9
        if (self::$SUPPORT['mbstring'] === true) {
12693 9
            $string_length = (int) self::strlen($str, $encoding);
12694
12695 9
            if ($offset < 0) {
12696 1
                $offset = (int) \max(0, $string_length + $offset);
12697 9
            } elseif ($offset > $string_length) {
12698 1
                $offset = $string_length;
12699
            }
12700
12701 9
            if ($length !== null && $length < 0) {
12702 1
                $length = (int) \max(0, $string_length - $offset + $length);
12703 9
            } elseif ($length === null || $length > $string_length) {
12704 4
                $length = $string_length;
12705
            }
12706
12707
            /** @noinspection AdditionOperationOnArraysInspection */
12708 9
            if (($offset + $length) > $string_length) {
12709 4
                $length = $string_length - $offset;
12710
            }
12711
12712
            /** @noinspection AdditionOperationOnArraysInspection */
12713 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12714 9
                   $replacement .
12715 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12716
        }
12717
12718
        //
12719
        // fallback for ascii only
12720
        //
12721
12722
        if (ASCII::is_ascii($str)) {
12723
            return ($length === null) ?
12724
                \substr_replace($str, $replacement, $offset) :
12725
                \substr_replace($str, $replacement, $offset, $length);
12726
        }
12727
12728
        //
12729
        // fallback via vanilla php
12730
        //
12731
12732
        \preg_match_all('/./us', $str, $str_matches);
12733
        \preg_match_all('/./us', $replacement, $replacement_matches);
12734
12735
        if ($length === null) {
12736
            $length_tmp = self::strlen($str, $encoding);
12737
            if ($length_tmp === false) {
12738
                // e.g.: non mbstring support + invalid chars
12739
                return '';
12740
            }
12741
            $length = (int) $length_tmp;
12742
        }
12743
12744
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12745
12746
        return \implode('', $str_matches[0]);
12747
    }
12748
12749
    /**
12750
     * Removes a suffix ($needle) from the end of the string ($haystack).
12751
     *
12752
     * EXAMPLE: <code>
12753
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12754
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12755
     * </code>
12756
     *
12757
     * @param string $haystack <p>The string to search in.</p>
12758
     * @param string $needle   <p>The substring to search for.</p>
12759
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12760
     *
12761
     * @psalm-pure
12762
     *
12763
     * @return string
12764
     *                <p>Return the sub-string.</p>
12765
     */
12766
    public static function substr_right(
12767
        string $haystack,
12768
        string $needle,
12769
        string $encoding = 'UTF-8'
12770
    ): string {
12771 2
        if ($haystack === '') {
12772 2
            return '';
12773
        }
12774
12775 2
        if ($needle === '') {
12776 2
            return $haystack;
12777
        }
12778
12779
        if (
12780 2
            $encoding === 'UTF-8'
12781
            &&
12782 2
            \substr($haystack, -\strlen($needle)) === $needle
12783
        ) {
12784 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12785
        }
12786
12787 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12788
            return (string) self::substr(
12789
                $haystack,
12790
                0,
12791
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12792
                $encoding
12793
            );
12794
        }
12795
12796 2
        return $haystack;
12797
    }
12798
12799
    /**
12800
     * Returns a case swapped version of the string.
12801
     *
12802
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12803
     *
12804
     * @param string $str        <p>The input string.</p>
12805
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12806
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12807
     *
12808
     * @psalm-pure
12809
     *
12810
     * @return string
12811
     *                <p>Each character's case swapped.</p>
12812
     */
12813
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12814
    {
12815 6
        if ($str === '') {
12816 1
            return '';
12817
        }
12818
12819 6
        if ($clean_utf8) {
12820
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12821
            // if invalid characters are found in $haystack before $needle
12822 2
            $str = self::clean($str);
12823
        }
12824
12825 6
        if ($encoding === 'UTF-8') {
12826 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12827
        }
12828
12829 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12830
    }
12831
12832
    /**
12833
     * Checks whether symfony-polyfills are used.
12834
     *
12835
     * @psalm-pure
12836
     *
12837
     * @return bool
12838
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12839
     *
12840
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12841
     */
12842
    public static function symfony_polyfill_used(): bool
12843
    {
12844
        // init
12845
        $return = false;
12846
12847
        $return_tmp = \extension_loaded('mbstring');
12848
        if (!$return_tmp && \function_exists('mb_strlen')) {
12849
            $return = true;
12850
        }
12851
12852
        $return_tmp = \extension_loaded('iconv');
12853
        if (!$return_tmp && \function_exists('iconv')) {
12854
            $return = true;
12855
        }
12856
12857
        return $return;
12858
    }
12859
12860
    /**
12861
     * @param string $str
12862
     * @param int    $tab_length
12863
     *
12864
     * @psalm-pure
12865
     *
12866
     * @return string
12867
     */
12868
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12869
    {
12870 6
        if ($tab_length === 4) {
12871 3
            $spaces = '    ';
12872 3
        } elseif ($tab_length === 2) {
12873 1
            $spaces = '  ';
12874
        } else {
12875 2
            $spaces = \str_repeat(' ', $tab_length);
12876
        }
12877
12878 6
        return \str_replace("\t", $spaces, $str);
12879
    }
12880
12881
    /**
12882
     * Converts the first character of each word in the string to uppercase
12883
     * and all other chars to lowercase.
12884
     *
12885
     * @param string      $str                           <p>The input string.</p>
12886
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12887
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12888
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12889
     *                                                   tr</p>
12890
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12891
     *                                                   -> ß</p>
12892
     *
12893
     * @psalm-pure
12894
     *
12895
     * @return string
12896
     *                <p>A string with all characters of $str being title-cased.</p>
12897
     */
12898
    public static function titlecase(
12899
        string $str,
12900
        string $encoding = 'UTF-8',
12901
        bool $clean_utf8 = false,
12902
        string $lang = null,
12903
        bool $try_to_keep_the_string_length = false
12904
    ): string {
12905 5
        if ($clean_utf8) {
12906
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12907
            // if invalid characters are found in $haystack before $needle
12908
            $str = self::clean($str);
12909
        }
12910
12911
        if (
12912 5
            $lang === null
12913
            &&
12914 5
            !$try_to_keep_the_string_length
12915
        ) {
12916 5
            if ($encoding === 'UTF-8') {
12917 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12918
            }
12919
12920 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12921
12922 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12923
        }
12924
12925
        return self::str_titleize(
12926
            $str,
12927
            null,
12928
            $encoding,
12929
            false,
12930
            $lang,
12931
            $try_to_keep_the_string_length,
12932
            false
12933
        );
12934
    }
12935
12936
    /**
12937
     * alias for "UTF8::to_ascii()"
12938
     *
12939
     * @param string $str
12940
     * @param string $subst_chr
12941
     * @param bool   $strict
12942
     *
12943
     * @psalm-pure
12944
     *
12945
     * @return string
12946
     *
12947
     * @see        UTF8::to_ascii()
12948
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12949
     */
12950
    public static function toAscii(
12951
        string $str,
12952
        string $subst_chr = '?',
12953
        bool $strict = false
12954
    ): string {
12955 7
        return self::to_ascii($str, $subst_chr, $strict);
12956
    }
12957
12958
    /**
12959
     * alias for "UTF8::to_iso8859()"
12960
     *
12961
     * @param string|string[] $str
12962
     *
12963
     * @psalm-pure
12964
     *
12965
     * @return string|string[]
12966
     *
12967
     * @see        UTF8::to_iso8859()
12968
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12969
     */
12970
    public static function toIso8859($str)
12971
    {
12972 2
        return self::to_iso8859($str);
12973
    }
12974
12975
    /**
12976
     * alias for "UTF8::to_latin1()"
12977
     *
12978
     * @param string|string[] $str
12979
     *
12980
     * @psalm-pure
12981
     *
12982
     * @return string|string[]
12983
     *
12984
     * @see        UTF8::to_iso8859()
12985
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12986
     */
12987
    public static function toLatin1($str)
12988
    {
12989 2
        return self::to_iso8859($str);
12990
    }
12991
12992
    /**
12993
     * alias for "UTF8::to_utf8()"
12994
     *
12995
     * @param string|string[] $str
12996
     *
12997
     * @psalm-pure
12998
     *
12999
     * @return string|string[]
13000
     *
13001
     * @see        UTF8::to_utf8()
13002
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
13003
     */
13004
    public static function toUTF8($str)
13005
    {
13006 2
        return self::to_utf8($str);
13007
    }
13008
13009
    /**
13010
     * Convert a string into ASCII.
13011
     *
13012
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
13013
     *
13014
     * @param string $str     <p>The input string.</p>
13015
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
13016
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
13017
     *                        performance</p>
13018
     *
13019
     * @psalm-pure
13020
     *
13021
     * @return string
13022
     */
13023
    public static function to_ascii(
13024
        string $str,
13025
        string $unknown = '?',
13026
        bool $strict = false
13027
    ): string {
13028 37
        return ASCII::to_transliterate($str, $unknown, $strict);
13029
    }
13030
13031
    /**
13032
     * @param bool|int|string $str
13033
     *
13034
     * @phpstan-param bool|int|numeric-string $str
13035
     *
13036
     * @psalm-pure
13037
     *
13038
     * @return bool
13039
     */
13040
    public static function to_boolean($str): bool
13041
    {
13042
        // init
13043 19
        $str = (string) $str;
13044
13045 19
        if ($str === '') {
13046 2
            return false;
13047
        }
13048
13049
        // Info: http://php.net/manual/en/filter.filters.validate.php
13050
        $map = [
13051 17
            'true'  => true,
13052
            '1'     => true,
13053
            'on'    => true,
13054
            'yes'   => true,
13055
            'false' => false,
13056
            '0'     => false,
13057
            'off'   => false,
13058
            'no'    => false,
13059
        ];
13060
13061 17
        if (isset($map[$str])) {
13062 11
            return $map[$str];
13063
        }
13064
13065 6
        $key = \strtolower($str);
13066 6
        if (isset($map[$key])) {
13067 2
            return $map[$key];
13068
        }
13069
13070 4
        if (\is_numeric($str)) {
13071 2
            return ((float) $str + 0) > 0;
13072
        }
13073
13074 2
        return (bool) \trim($str);
13075
    }
13076
13077
    /**
13078
     * Convert given string to safe filename (and keep string case).
13079
     *
13080
     * @param string $str
13081
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13082
     *                                  simply replaced with hyphen.
13083
     * @param string $fallback_char
13084
     *
13085
     * @psalm-pure
13086
     *
13087
     * @return string
13088
     */
13089
    public static function to_filename(
13090
        string $str,
13091
        bool $use_transliterate = false,
13092
        string $fallback_char = '-'
13093
    ): string {
13094 1
        return ASCII::to_filename(
13095 1
            $str,
13096 1
            $use_transliterate,
13097 1
            $fallback_char
13098
        );
13099
    }
13100
13101
    /**
13102
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13103
     *
13104
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13105
     *
13106
     * @param string|string[] $str
13107
     *
13108
     * @psalm-pure
13109
     *
13110
     * @return string|string[]
13111
     */
13112
    public static function to_iso8859($str)
13113
    {
13114 8
        if (\is_array($str)) {
13115 2
            foreach ($str as $k => &$v) {
13116 2
                $v = self::to_iso8859($v);
13117
            }
13118
13119 2
            return $str;
13120
        }
13121
13122 8
        $str = (string) $str;
13123 8
        if ($str === '') {
13124 2
            return '';
13125
        }
13126
13127 8
        return self::utf8_decode($str);
13128
    }
13129
13130
    /**
13131
     * alias for "UTF8::to_iso8859()"
13132
     *
13133
     * @param string|string[] $str
13134
     *
13135
     * @psalm-pure
13136
     *
13137
     * @return string|string[]
13138
     *
13139
     * @see        UTF8::to_iso8859()
13140
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13141
     */
13142
    public static function to_latin1($str)
13143
    {
13144 2
        return self::to_iso8859($str);
13145
    }
13146
13147
    /**
13148
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13149
     *
13150
     * <ul>
13151
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13152
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13153
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13154
     * case.</li>
13155
     * </ul>
13156
     *
13157
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13158
     *
13159
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13160
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13161
     *
13162
     * @psalm-pure
13163
     *
13164
     * @return string|string[]
13165
     *                         <p>The UTF-8 encoded string</p>
13166
     *
13167
     * @template TToUtf8
13168
     * @phpstan-param TToUtf8 $str
13169
     * @phpstan-return TToUtf8
13170
     *
13171
     * @noinspection SuspiciousBinaryOperationInspection
13172
     */
13173
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13174
    {
13175 44
        if (\is_array($str)) {
13176 4
            foreach ($str as $k => &$v) {
13177 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13178
            }
13179
13180 4
            return $str;
13181
        }
13182
13183
        /** @phpstan-var TToUtf8 $str */
13184 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13185
13186 44
        return $str;
13187
    }
13188
13189
    /**
13190
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13191
     *
13192
     * <ul>
13193
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13194
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13195
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13196
     * case.</li>
13197
     * </ul>
13198
     *
13199
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13200
     *
13201
     * @param string $str                        <p>Any string.</p>
13202
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13203
     *
13204
     * @psalm-pure
13205
     *
13206
     * @return string
13207
     *                <p>The UTF-8 encoded string</p>
13208
     *
13209
     * @noinspection SuspiciousBinaryOperationInspection
13210
     */
13211
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13212
    {
13213 44
        if ($str === '') {
13214 7
            return $str;
13215
        }
13216
13217 44
        $max = \strlen($str);
13218 44
        $buf = '';
13219
13220 44
        for ($i = 0; $i < $max; ++$i) {
13221 44
            $c1 = $str[$i];
13222
13223 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13224
13225 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13226
13227 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13228
13229 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13230 22
                        $buf .= $c1 . $c2;
13231 22
                        ++$i;
13232
                    } else { // not valid UTF8 - convert it
13233 36
                        $buf .= self::to_utf8_convert_helper($c1);
13234
                    }
13235 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13236
13237 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13238 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13239
13240 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13241 17
                        $buf .= $c1 . $c2 . $c3;
13242 17
                        $i += 2;
13243
                    } else { // not valid UTF8 - convert it
13244 36
                        $buf .= self::to_utf8_convert_helper($c1);
13245
                    }
13246 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13247
13248 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13249 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13250 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13251
13252 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13253 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13254 10
                        $i += 3;
13255
                    } else { // not valid UTF8 - convert it
13256 28
                        $buf .= self::to_utf8_convert_helper($c1);
13257
                    }
13258
                } else { // doesn't look like UTF8, but should be converted
13259
13260 40
                    $buf .= self::to_utf8_convert_helper($c1);
13261
                }
13262 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13263
13264 4
                $buf .= self::to_utf8_convert_helper($c1);
13265
            } else { // it doesn't need conversion
13266
13267 41
                $buf .= $c1;
13268
            }
13269
        }
13270
13271
        // decode unicode escape sequences + unicode surrogate pairs
13272 44
        $buf = \preg_replace_callback(
13273 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13274
            /**
13275
             * @param array $matches
13276
             *
13277
             * @psalm-pure
13278
             *
13279
             * @return string
13280
             */
13281
            static function (array $matches): string {
13282 13
                if (isset($matches[3])) {
13283 13
                    $cp = (int) \hexdec($matches[3]);
13284
                } else {
13285
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13286 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13287 1
                          + (int) \hexdec($matches[2])
13288 1
                          + 0x10000
13289 1
                          - (0xD800 << 10)
13290 1
                          - 0xDC00;
13291
                }
13292
13293
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13294
                //
13295
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13296
13297 13
                if ($cp < 0x80) {
13298 8
                    return (string) self::chr($cp);
13299
                }
13300
13301 10
                if ($cp < 0xA0) {
13302
                    /** @noinspection UnnecessaryCastingInspection */
13303
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13304
                }
13305
13306 10
                return self::decimal_to_chr($cp);
13307 44
            },
13308 44
            $buf
13309
        );
13310
13311 44
        if ($buf === null) {
13312
            return '';
13313
        }
13314
13315
        // decode UTF-8 codepoints
13316 44
        if ($decode_html_entity_to_utf8) {
13317 3
            $buf = self::html_entity_decode($buf);
13318
        }
13319
13320 44
        return $buf;
13321
    }
13322
13323
    /**
13324
     * Returns the given string as an integer, or null if the string isn't numeric.
13325
     *
13326
     * @param string $str
13327
     *
13328
     * @psalm-pure
13329
     *
13330
     * @return int|null
13331
     *                  <p>null if the string isn't numeric</p>
13332
     */
13333
    public static function to_int(string $str)
13334
    {
13335 1
        if (\is_numeric($str)) {
13336 1
            return (int) $str;
13337
        }
13338
13339 1
        return null;
13340
    }
13341
13342
    /**
13343
     * Returns the given input as string, or null if the input isn't int|float|string
13344
     * and do not implement the "__toString()" method.
13345
     *
13346
     * @param float|int|object|string|null $input
13347
     *
13348
     * @psalm-pure
13349
     *
13350
     * @return string|null
13351
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13352
     */
13353
    public static function to_string($input)
13354
    {
13355 1
        if ($input === null) {
13356
            return null;
13357
        }
13358
13359
        /** @var string $input_type - hack for psalm */
13360 1
        $input_type = \gettype($input);
13361
13362
        if (
13363 1
            $input_type === 'string'
13364
            ||
13365 1
            $input_type === 'integer'
13366
            ||
13367 1
            $input_type === 'float'
13368
            ||
13369 1
            $input_type === 'double'
13370
        ) {
13371 1
            return (string) $input;
13372
        }
13373
13374 1
        if ($input_type === 'object') {
13375
            /** @noinspection PhpSillyAssignmentInspection */
13376
            /** @var object $input - hack for psalm / phpstan */
13377 1
            $input = $input;
13378
            /** @noinspection NestedPositiveIfStatementsInspection */
13379
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13380 1
            if (\method_exists($input, '__toString')) {
13381 1
                return (string) $input;
13382
            }
13383
        }
13384
13385 1
        return null;
13386
    }
13387
13388
    /**
13389
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13390
     *
13391
     * INFO: This is slower then "trim()"
13392
     *
13393
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13394
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13395
     *
13396
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13397
     *
13398
     * @param string      $str   <p>The string to be trimmed</p>
13399
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13400
     *
13401
     * @psalm-pure
13402
     *
13403
     * @return string
13404
     *                <p>The trimmed string.</p>
13405
     */
13406
    public static function trim(string $str = '', string $chars = null): string
13407
    {
13408 57
        if ($str === '') {
13409 9
            return '';
13410
        }
13411
13412 50
        if (self::$SUPPORT['mbstring'] === true) {
13413 50
            if ($chars !== null) {
13414
                /** @noinspection PregQuoteUsageInspection */
13415 28
                $chars = \preg_quote($chars);
13416 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13417
            } else {
13418 22
                $pattern = '^[\\s]+|[\\s]+$';
13419
            }
13420
13421
            /** @noinspection PhpComposerExtensionStubsInspection */
13422 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13423
        }
13424
13425 8
        if ($chars !== null) {
13426
            $chars = \preg_quote($chars, '/');
13427
            $pattern = "^[${chars}]+|[${chars}]+\$";
13428
        } else {
13429 8
            $pattern = '^[\\s]+|[\\s]+$';
13430
        }
13431
13432 8
        return self::regex_replace($str, $pattern, '');
13433
    }
13434
13435
    /**
13436
     * Makes string's first char uppercase.
13437
     *
13438
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13439
     *
13440
     * @param string      $str                           <p>The input string.</p>
13441
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13442
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13443
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13444
     *                                                   tr</p>
13445
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13446
     *                                                   -> ß</p>
13447
     *
13448
     * @psalm-pure
13449
     *
13450
     * @return string
13451
     *                <p>The resulting string with with char uppercase.</p>
13452
     */
13453
    public static function ucfirst(
13454
        string $str,
13455
        string $encoding = 'UTF-8',
13456
        bool $clean_utf8 = false,
13457
        string $lang = null,
13458
        bool $try_to_keep_the_string_length = false
13459
    ): string {
13460 69
        if ($str === '') {
13461 3
            return '';
13462
        }
13463
13464 68
        if ($clean_utf8) {
13465
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13466
            // if invalid characters are found in $haystack before $needle
13467 1
            $str = self::clean($str);
13468
        }
13469
13470 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13471
13472 68
        if ($encoding === 'UTF-8') {
13473 22
            $str_part_two = (string) \mb_substr($str, 1);
13474
13475 22
            if ($use_mb_functions) {
13476 22
                $str_part_one = \mb_strtoupper(
13477 22
                    (string) \mb_substr($str, 0, 1)
13478
                );
13479
            } else {
13480
                $str_part_one = self::strtoupper(
13481
                    (string) \mb_substr($str, 0, 1),
13482
                    $encoding,
13483
                    false,
13484
                    $lang,
13485 22
                    $try_to_keep_the_string_length
13486
                );
13487
            }
13488
        } else {
13489 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13490
13491 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13492
13493 47
            if ($use_mb_functions) {
13494 47
                $str_part_one = \mb_strtoupper(
13495 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13496 47
                    $encoding
13497
                );
13498
            } else {
13499
                $str_part_one = self::strtoupper(
13500
                    (string) self::substr($str, 0, 1, $encoding),
13501
                    $encoding,
13502
                    false,
13503
                    $lang,
13504
                    $try_to_keep_the_string_length
13505
                );
13506
            }
13507
        }
13508
13509 68
        return $str_part_one . $str_part_two;
13510
    }
13511
13512
    /**
13513
     * alias for "UTF8::ucfirst()"
13514
     *
13515
     * @param string $str
13516
     * @param string $encoding
13517
     * @param bool   $clean_utf8
13518
     *
13519
     * @psalm-pure
13520
     *
13521
     * @return string
13522
     *
13523
     * @see        UTF8::ucfirst()
13524
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13525
     */
13526
    public static function ucword(
13527
        string $str,
13528
        string $encoding = 'UTF-8',
13529
        bool $clean_utf8 = false
13530
    ): string {
13531 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13532
    }
13533
13534
    /**
13535
     * Uppercase for all words in the string.
13536
     *
13537
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13538
     *
13539
     * @param string   $str        <p>The input string.</p>
13540
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13541
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13542
     *                             word.</p>
13543
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13544
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13545
     *
13546
     * @psalm-pure
13547
     *
13548
     * @return string
13549
     */
13550
    public static function ucwords(
13551
        string $str,
13552
        array $exceptions = [],
13553
        string $char_list = '',
13554
        string $encoding = 'UTF-8',
13555
        bool $clean_utf8 = false
13556
    ): string {
13557 8
        if (!$str) {
13558 2
            return '';
13559
        }
13560
13561
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13562
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13563
13564 7
        if ($clean_utf8) {
13565
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13566
            // if invalid characters are found in $haystack before $needle
13567 1
            $str = self::clean($str);
13568
        }
13569
13570 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13571
13572
        if (
13573 7
            $use_php_default_functions
13574
            &&
13575 7
            ASCII::is_ascii($str)
13576
        ) {
13577
            return \ucwords($str);
13578
        }
13579
13580 7
        $words = self::str_to_words($str, $char_list);
13581 7
        $use_exceptions = $exceptions !== [];
13582
13583 7
        $words_str = '';
13584 7
        foreach ($words as &$word) {
13585 7
            if (!$word) {
13586 7
                continue;
13587
            }
13588
13589
            if (
13590 7
                !$use_exceptions
13591
                ||
13592 7
                !\in_array($word, $exceptions, true)
13593
            ) {
13594 7
                $words_str .= self::ucfirst($word, $encoding);
13595
            } else {
13596 7
                $words_str .= $word;
13597
            }
13598
        }
13599
13600 7
        return $words_str;
13601
    }
13602
13603
    /**
13604
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13605
     *
13606
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13607
     *
13608
     * e.g:
13609
     * 'test+test'                     => 'test test'
13610
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13611
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13612
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13613
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13614
     * 'Düsseldorf'                   => 'Düsseldorf'
13615
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13616
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13617
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13618
     *
13619
     * @param string $str          <p>The input string.</p>
13620
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13621
     *
13622
     * @psalm-pure
13623
     *
13624
     * @return string
13625
     */
13626
    public static function urldecode(string $str, bool $multi_decode = true): string
13627
    {
13628 4
        if ($str === '') {
13629 3
            return '';
13630
        }
13631
13632
        if (
13633 4
            \strpos($str, '&') === false
13634
            &&
13635 4
            \strpos($str, '%') === false
13636
            &&
13637 4
            \strpos($str, '+') === false
13638
            &&
13639 4
            \strpos($str, '\u') === false
13640
        ) {
13641 3
            return self::fix_simple_utf8($str);
13642
        }
13643
13644 4
        $str = self::urldecode_unicode_helper($str);
13645
13646 4
        if ($multi_decode) {
13647
            do {
13648 3
                $str_compare = $str;
13649
13650
                /**
13651
                 * @psalm-suppress PossiblyInvalidArgument
13652
                 */
13653 3
                $str = self::fix_simple_utf8(
13654 3
                    \urldecode(
13655 3
                        self::html_entity_decode(
13656 3
                            self::to_utf8($str),
13657 3
                            \ENT_QUOTES | \ENT_HTML5
13658
                        )
13659
                    )
13660
                );
13661 3
            } while ($str_compare !== $str);
13662
        } else {
13663
            /**
13664
             * @psalm-suppress PossiblyInvalidArgument
13665
             */
13666 1
            $str = self::fix_simple_utf8(
13667 1
                \urldecode(
13668 1
                    self::html_entity_decode(
13669 1
                        self::to_utf8($str),
13670 1
                        \ENT_QUOTES | \ENT_HTML5
13671
                    )
13672
                )
13673
            );
13674
        }
13675
13676 4
        return $str;
13677
    }
13678
13679
    /**
13680
     * Return a array with "urlencoded"-win1252 -> UTF-8
13681
     *
13682
     * @psalm-pure
13683
     *
13684
     * @return string[]
13685
     *
13686
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13687
     */
13688
    public static function urldecode_fix_win1252_chars(): array
13689
    {
13690
        return [
13691 2
            '%20' => ' ',
13692
            '%21' => '!',
13693
            '%22' => '"',
13694
            '%23' => '#',
13695
            '%24' => '$',
13696
            '%25' => '%',
13697
            '%26' => '&',
13698
            '%27' => "'",
13699
            '%28' => '(',
13700
            '%29' => ')',
13701
            '%2A' => '*',
13702
            '%2B' => '+',
13703
            '%2C' => ',',
13704
            '%2D' => '-',
13705
            '%2E' => '.',
13706
            '%2F' => '/',
13707
            '%30' => '0',
13708
            '%31' => '1',
13709
            '%32' => '2',
13710
            '%33' => '3',
13711
            '%34' => '4',
13712
            '%35' => '5',
13713
            '%36' => '6',
13714
            '%37' => '7',
13715
            '%38' => '8',
13716
            '%39' => '9',
13717
            '%3A' => ':',
13718
            '%3B' => ';',
13719
            '%3C' => '<',
13720
            '%3D' => '=',
13721
            '%3E' => '>',
13722
            '%3F' => '?',
13723
            '%40' => '@',
13724
            '%41' => 'A',
13725
            '%42' => 'B',
13726
            '%43' => 'C',
13727
            '%44' => 'D',
13728
            '%45' => 'E',
13729
            '%46' => 'F',
13730
            '%47' => 'G',
13731
            '%48' => 'H',
13732
            '%49' => 'I',
13733
            '%4A' => 'J',
13734
            '%4B' => 'K',
13735
            '%4C' => 'L',
13736
            '%4D' => 'M',
13737
            '%4E' => 'N',
13738
            '%4F' => 'O',
13739
            '%50' => 'P',
13740
            '%51' => 'Q',
13741
            '%52' => 'R',
13742
            '%53' => 'S',
13743
            '%54' => 'T',
13744
            '%55' => 'U',
13745
            '%56' => 'V',
13746
            '%57' => 'W',
13747
            '%58' => 'X',
13748
            '%59' => 'Y',
13749
            '%5A' => 'Z',
13750
            '%5B' => '[',
13751
            '%5C' => '\\',
13752
            '%5D' => ']',
13753
            '%5E' => '^',
13754
            '%5F' => '_',
13755
            '%60' => '`',
13756
            '%61' => 'a',
13757
            '%62' => 'b',
13758
            '%63' => 'c',
13759
            '%64' => 'd',
13760
            '%65' => 'e',
13761
            '%66' => 'f',
13762
            '%67' => 'g',
13763
            '%68' => 'h',
13764
            '%69' => 'i',
13765
            '%6A' => 'j',
13766
            '%6B' => 'k',
13767
            '%6C' => 'l',
13768
            '%6D' => 'm',
13769
            '%6E' => 'n',
13770
            '%6F' => 'o',
13771
            '%70' => 'p',
13772
            '%71' => 'q',
13773
            '%72' => 'r',
13774
            '%73' => 's',
13775
            '%74' => 't',
13776
            '%75' => 'u',
13777
            '%76' => 'v',
13778
            '%77' => 'w',
13779
            '%78' => 'x',
13780
            '%79' => 'y',
13781
            '%7A' => 'z',
13782
            '%7B' => '{',
13783
            '%7C' => '|',
13784
            '%7D' => '}',
13785
            '%7E' => '~',
13786
            '%7F' => '',
13787
            '%80' => '`',
13788
            '%81' => '',
13789
            '%82' => '‚',
13790
            '%83' => 'ƒ',
13791
            '%84' => '„',
13792
            '%85' => '…',
13793
            '%86' => '†',
13794
            '%87' => '‡',
13795
            '%88' => 'ˆ',
13796
            '%89' => '‰',
13797
            '%8A' => 'Š',
13798
            '%8B' => '‹',
13799
            '%8C' => 'Œ',
13800
            '%8D' => '',
13801
            '%8E' => 'Ž',
13802
            '%8F' => '',
13803
            '%90' => '',
13804
            '%91' => '‘',
13805
            '%92' => '’',
13806
            '%93' => '“',
13807
            '%94' => '”',
13808
            '%95' => '•',
13809
            '%96' => '–',
13810
            '%97' => '—',
13811
            '%98' => '˜',
13812
            '%99' => '™',
13813
            '%9A' => 'š',
13814
            '%9B' => '›',
13815
            '%9C' => 'œ',
13816
            '%9D' => '',
13817
            '%9E' => 'ž',
13818
            '%9F' => 'Ÿ',
13819
            '%A0' => '',
13820
            '%A1' => '¡',
13821
            '%A2' => '¢',
13822
            '%A3' => '£',
13823
            '%A4' => '¤',
13824
            '%A5' => '¥',
13825
            '%A6' => '¦',
13826
            '%A7' => '§',
13827
            '%A8' => '¨',
13828
            '%A9' => '©',
13829
            '%AA' => 'ª',
13830
            '%AB' => '«',
13831
            '%AC' => '¬',
13832
            '%AD' => '',
13833
            '%AE' => '®',
13834
            '%AF' => '¯',
13835
            '%B0' => '°',
13836
            '%B1' => '±',
13837
            '%B2' => '²',
13838
            '%B3' => '³',
13839
            '%B4' => '´',
13840
            '%B5' => 'µ',
13841
            '%B6' => '¶',
13842
            '%B7' => '·',
13843
            '%B8' => '¸',
13844
            '%B9' => '¹',
13845
            '%BA' => 'º',
13846
            '%BB' => '»',
13847
            '%BC' => '¼',
13848
            '%BD' => '½',
13849
            '%BE' => '¾',
13850
            '%BF' => '¿',
13851
            '%C0' => 'À',
13852
            '%C1' => 'Á',
13853
            '%C2' => 'Â',
13854
            '%C3' => 'Ã',
13855
            '%C4' => 'Ä',
13856
            '%C5' => 'Å',
13857
            '%C6' => 'Æ',
13858
            '%C7' => 'Ç',
13859
            '%C8' => 'È',
13860
            '%C9' => 'É',
13861
            '%CA' => 'Ê',
13862
            '%CB' => 'Ë',
13863
            '%CC' => 'Ì',
13864
            '%CD' => 'Í',
13865
            '%CE' => 'Î',
13866
            '%CF' => 'Ï',
13867
            '%D0' => 'Ð',
13868
            '%D1' => 'Ñ',
13869
            '%D2' => 'Ò',
13870
            '%D3' => 'Ó',
13871
            '%D4' => 'Ô',
13872
            '%D5' => 'Õ',
13873
            '%D6' => 'Ö',
13874
            '%D7' => '×',
13875
            '%D8' => 'Ø',
13876
            '%D9' => 'Ù',
13877
            '%DA' => 'Ú',
13878
            '%DB' => 'Û',
13879
            '%DC' => 'Ü',
13880
            '%DD' => 'Ý',
13881
            '%DE' => 'Þ',
13882
            '%DF' => 'ß',
13883
            '%E0' => 'à',
13884
            '%E1' => 'á',
13885
            '%E2' => 'â',
13886
            '%E3' => 'ã',
13887
            '%E4' => 'ä',
13888
            '%E5' => 'å',
13889
            '%E6' => 'æ',
13890
            '%E7' => 'ç',
13891
            '%E8' => 'è',
13892
            '%E9' => 'é',
13893
            '%EA' => 'ê',
13894
            '%EB' => 'ë',
13895
            '%EC' => 'ì',
13896
            '%ED' => 'í',
13897
            '%EE' => 'î',
13898
            '%EF' => 'ï',
13899
            '%F0' => 'ð',
13900
            '%F1' => 'ñ',
13901
            '%F2' => 'ò',
13902
            '%F3' => 'ó',
13903
            '%F4' => 'ô',
13904
            '%F5' => 'õ',
13905
            '%F6' => 'ö',
13906
            '%F7' => '÷',
13907
            '%F8' => 'ø',
13908
            '%F9' => 'ù',
13909
            '%FA' => 'ú',
13910
            '%FB' => 'û',
13911
            '%FC' => 'ü',
13912
            '%FD' => 'ý',
13913
            '%FE' => 'þ',
13914
            '%FF' => 'ÿ',
13915
        ];
13916
    }
13917
13918
    /**
13919
     * Decodes a UTF-8 string to ISO-8859-1.
13920
     *
13921
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13922
     *
13923
     * @param string $str             <p>The input string.</p>
13924
     * @param bool   $keep_utf8_chars
13925
     *
13926
     * @psalm-pure
13927
     *
13928
     * @return string
13929
     *
13930
     * @noinspection SuspiciousBinaryOperationInspection
13931
     */
13932
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13933
    {
13934 14
        if ($str === '') {
13935 6
            return '';
13936
        }
13937
13938
        // save for later comparision
13939 14
        $str_backup = $str;
13940 14
        $len = \strlen($str);
13941
13942 14
        if (self::$ORD === null) {
13943
            self::$ORD = self::getData('ord');
13944
        }
13945
13946 14
        if (self::$CHR === null) {
13947
            self::$CHR = self::getData('chr');
13948
        }
13949
13950 14
        $no_char_found = '?';
13951
        /** @noinspection ForeachInvariantsInspection */
13952 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13953 14
            switch ($str[$i] & "\xF0") {
13954 14
                case "\xC0":
13955 13
                case "\xD0":
13956 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13957 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13958
13959 13
                    break;
13960
13961
                /** @noinspection PhpMissingBreakStatementInspection */
13962 13
                case "\xF0":
13963
                    ++$i;
13964
13965
                // no break
13966
13967 13
                case "\xE0":
13968 11
                    $str[$j] = $no_char_found;
13969 11
                    $i += 2;
13970
13971 11
                    break;
13972
13973
                default:
13974 12
                    $str[$j] = $str[$i];
13975
            }
13976
        }
13977
13978
        /** @var false|string $return - needed for PhpStan (stubs error) */
13979 14
        $return = \substr($str, 0, $j);
13980 14
        if ($return === false) {
13981
            $return = '';
13982
        }
13983
13984
        if (
13985 14
            $keep_utf8_chars
13986
            &&
13987 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13988
        ) {
13989 2
            return $str_backup;
13990
        }
13991
13992 14
        return $return;
13993
    }
13994
13995
    /**
13996
     * Encodes an ISO-8859-1 string to UTF-8.
13997
     *
13998
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13999
     *
14000
     * @param string $str <p>The input string.</p>
14001
     *
14002
     * @psalm-pure
14003
     *
14004
     * @return string
14005
     */
14006
    public static function utf8_encode(string $str): string
14007
    {
14008 16
        if ($str === '') {
14009 14
            return '';
14010
        }
14011
14012
        /** @var false|string $str - the polyfill maybe return false */
14013 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

14013
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
14014
14015
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
14016
        /** @psalm-suppress TypeDoesNotContainType */
14017 16
        if ($str === false) {
14018
            return '';
14019
        }
14020
14021 16
        return $str;
14022
    }
14023
14024
    /**
14025
     * fix -> utf8-win1252 chars
14026
     *
14027
     * @param string $str <p>The input string.</p>
14028
     *
14029
     * @psalm-pure
14030
     *
14031
     * @return string
14032
     *
14033
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
14034
     */
14035
    public static function utf8_fix_win1252_chars(string $str): string
14036
    {
14037 2
        return self::fix_simple_utf8($str);
14038
    }
14039
14040
    /**
14041
     * Returns an array with all utf8 whitespace characters.
14042
     *
14043
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14044
     *
14045
     * @psalm-pure
14046
     *
14047
     * @return string[]
14048
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14049
     *                  as defined in above URL
14050
     */
14051
    public static function whitespace_table(): array
14052
    {
14053 2
        return self::$WHITESPACE_TABLE;
14054
    }
14055
14056
    /**
14057
     * Limit the number of words in a string.
14058
     *
14059
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14060
     *
14061
     * @param string $str        <p>The input string.</p>
14062
     * @param int    $limit      <p>The limit of words as integer.</p>
14063
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14064
     *
14065
     * @psalm-pure
14066
     *
14067
     * @return string
14068
     */
14069
    public static function words_limit(
14070
        string $str,
14071
        int $limit = 100,
14072
        string $str_add_on = '…'
14073
    ): string {
14074 2
        if ($str === '' || $limit < 1) {
14075 2
            return '';
14076
        }
14077
14078 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14079
14080
        if (
14081 2
            !isset($matches[0])
14082
            ||
14083 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14084
        ) {
14085 2
            return $str;
14086
        }
14087
14088 2
        return \rtrim($matches[0]) . $str_add_on;
14089
    }
14090
14091
    /**
14092
     * Wraps a string to a given number of characters
14093
     *
14094
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14095
     *
14096
     * @see http://php.net/manual/en/function.wordwrap.php
14097
     *
14098
     * @param string $str   <p>The input string.</p>
14099
     * @param int    $width [optional] <p>The column width.</p>
14100
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14101
     * @param bool   $cut   [optional] <p>
14102
     *                      If the cut is set to true, the string is
14103
     *                      always wrapped at or before the specified width. So if you have
14104
     *                      a word that is larger than the given width, it is broken apart.
14105
     *                      </p>
14106
     *
14107
     * @psalm-pure
14108
     *
14109
     * @return string
14110
     *                <p>The given string wrapped at the specified column.</p>
14111
     */
14112
    public static function wordwrap(
14113
        string $str,
14114
        int $width = 75,
14115
        string $break = "\n",
14116
        bool $cut = false
14117
    ): string {
14118 12
        if ($str === '' || $break === '') {
14119 4
            return '';
14120
        }
14121
14122 10
        $str_split = \explode($break, $str);
14123 10
        if ($str_split === false) {
14124
            return '';
14125
        }
14126
14127
        /** @var string[] $charsArray */
14128 10
        $charsArray = [];
14129 10
        $word_split = '';
14130 10
        foreach ($str_split as $i => $i_value) {
14131 10
            if ($i) {
14132 3
                $charsArray[] = $break;
14133 3
                $word_split .= '#';
14134
            }
14135
14136 10
            foreach (self::str_split($i_value) as $c) {
14137 10
                $charsArray[] = $c;
14138 10
                if ($c === ' ') {
14139 3
                    $word_split .= ' ';
14140
                } else {
14141 10
                    $word_split .= '?';
14142
                }
14143
            }
14144
        }
14145
14146 10
        $str_return = '';
14147 10
        $j = 0;
14148 10
        $b = -1;
14149 10
        $i = -1;
14150 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14151
14152 10
        $max = \mb_strlen($word_split);
14153 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14154 8
            for (++$i; $i < $b; ++$i) {
14155 8
                if (isset($charsArray[$j])) {
14156 8
                    $str_return .= $charsArray[$j];
14157 8
                    unset($charsArray[$j]);
14158
                }
14159 8
                ++$j;
14160
14161
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14162 8
                if ($i > $max) {
14163
                    break 2;
14164
                }
14165
            }
14166
14167
            if (
14168 8
                $break === $charsArray[$j]
14169
                ||
14170 8
                $charsArray[$j] === ' '
14171
            ) {
14172 5
                unset($charsArray[$j++]);
14173
            }
14174
14175 8
            $str_return .= $break;
14176
14177
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14178 8
            if ($b > $max) {
14179
                break;
14180
            }
14181
        }
14182
14183 10
        return $str_return . \implode('', $charsArray);
14184
    }
14185
14186
    /**
14187
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14188
     *    ... so that we wrap the per line.
14189
     *
14190
     * @param string      $str             <p>The input string.</p>
14191
     * @param int         $width           [optional] <p>The column width.</p>
14192
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14193
     * @param bool        $cut             [optional] <p>
14194
     *                                     If the cut is set to true, the string is
14195
     *                                     always wrapped at or before the specified width. So if you have
14196
     *                                     a word that is larger than the given width, it is broken apart.
14197
     *                                     </p>
14198
     * @param bool        $add_final_break [optional] <p>
14199
     *                                     If this flag is true, then the method will add a $break at the end
14200
     *                                     of the result string.
14201
     *                                     </p>
14202
     * @param string|null $delimiter       [optional] <p>
14203
     *                                     You can change the default behavior, where we split the string by newline.
14204
     *                                     </p>
14205
     *
14206
     * @psalm-pure
14207
     *
14208
     * @return string
14209
     */
14210
    public static function wordwrap_per_line(
14211
        string $str,
14212
        int $width = 75,
14213
        string $break = "\n",
14214
        bool $cut = false,
14215
        bool $add_final_break = true,
14216
        string $delimiter = null
14217
    ): string {
14218 1
        if ($delimiter === null) {
14219 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14220
        } else {
14221 1
            $strings = \explode($delimiter, $str);
14222
        }
14223
14224 1
        $string_helper_array = [];
14225 1
        if ($strings !== false) {
14226 1
            foreach ($strings as $value) {
14227 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14228
            }
14229
        }
14230
14231 1
        if ($add_final_break) {
14232 1
            $final_break = $break;
14233
        } else {
14234 1
            $final_break = '';
14235
        }
14236
14237 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14238
    }
14239
14240
    /**
14241
     * Returns an array of Unicode White Space characters.
14242
     *
14243
     * @psalm-pure
14244
     *
14245
     * @return string[]
14246
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14247
     */
14248
    public static function ws(): array
14249
    {
14250 2
        return self::$WHITESPACE;
14251
    }
14252
14253
    /**
14254
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14255
     *
14256
     * EXAMPLE: <code>
14257
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14258
     * //
14259
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14260
     * </code>
14261
     *
14262
     * @see          http://hsivonen.iki.fi/php-utf8/
14263
     *
14264
     * @param string $str    <p>The string to be checked.</p>
14265
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14266
     *
14267
     * @psalm-pure
14268
     *
14269
     * @return bool
14270
     *
14271
     * @noinspection ReturnTypeCanBeDeclaredInspection
14272
     */
14273
    private static function is_utf8_string(string $str, bool $strict = false)
14274
    {
14275 110
        if ($str === '') {
14276 15
            return true;
14277
        }
14278
14279 103
        if ($strict) {
14280 2
            $is_binary = self::is_binary($str, true);
14281
14282 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14283 2
                return false;
14284
            }
14285
14286
            if ($is_binary && self::is_utf32($str, false) !== false) {
14287
                return false;
14288
            }
14289
        }
14290
14291 103
        if (self::$SUPPORT['pcre_utf8']) {
14292
            // If even just the first character can be matched, when the /u
14293
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14294
            // invalid, nothing at all will match, even if the string contains
14295
            // some valid sequences
14296 103
            return \preg_match('/^./us', $str) === 1;
14297
        }
14298
14299 2
        $mState = 0; // cached expected number of octets after the current octet
14300
        // until the beginning of the next UTF8 character sequence
14301 2
        $mUcs4 = 0; // cached Unicode character
14302 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14303
14304 2
        if (self::$ORD === null) {
14305
            self::$ORD = self::getData('ord');
14306
        }
14307
14308 2
        $len = \strlen($str);
14309
        /** @noinspection ForeachInvariantsInspection */
14310 2
        for ($i = 0; $i < $len; ++$i) {
14311 2
            $in = self::$ORD[$str[$i]];
14312
14313 2
            if ($mState === 0) {
14314
                // When mState is zero we expect either a US-ASCII character or a
14315
                // multi-octet sequence.
14316 2
                if ((0x80 & $in) === 0) {
14317
                    // US-ASCII, pass straight through.
14318 2
                    $mBytes = 1;
14319 2
                } elseif ((0xE0 & $in) === 0xC0) {
14320
                    // First octet of 2 octet sequence.
14321 2
                    $mUcs4 = $in;
14322 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14323 2
                    $mState = 1;
14324 2
                    $mBytes = 2;
14325 2
                } elseif ((0xF0 & $in) === 0xE0) {
14326
                    // First octet of 3 octet sequence.
14327 2
                    $mUcs4 = $in;
14328 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14329 2
                    $mState = 2;
14330 2
                    $mBytes = 3;
14331
                } elseif ((0xF8 & $in) === 0xF0) {
14332
                    // First octet of 4 octet sequence.
14333
                    $mUcs4 = $in;
14334
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14335
                    $mState = 3;
14336
                    $mBytes = 4;
14337
                } elseif ((0xFC & $in) === 0xF8) {
14338
                    /* First octet of 5 octet sequence.
14339
                     *
14340
                     * This is illegal because the encoded codepoint must be either
14341
                     * (a) not the shortest form or
14342
                     * (b) outside the Unicode range of 0-0x10FFFF.
14343
                     * Rather than trying to resynchronize, we will carry on until the end
14344
                     * of the sequence and let the later error handling code catch it.
14345
                     */
14346
                    $mUcs4 = $in;
14347
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14348
                    $mState = 4;
14349
                    $mBytes = 5;
14350
                } elseif ((0xFE & $in) === 0xFC) {
14351
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14352
                    $mUcs4 = $in;
14353
                    $mUcs4 = ($mUcs4 & 1) << 30;
14354
                    $mState = 5;
14355
                    $mBytes = 6;
14356
                } else {
14357
                    // Current octet is neither in the US-ASCII range nor a legal first
14358
                    // octet of a multi-octet sequence.
14359 2
                    return false;
14360
                }
14361 2
            } elseif ((0xC0 & $in) === 0x80) {
14362
14363
                // When mState is non-zero, we expect a continuation of the multi-octet
14364
                // sequence
14365
14366
                // Legal continuation.
14367 2
                $shift = ($mState - 1) * 6;
14368 2
                $tmp = $in;
14369 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14370 2
                $mUcs4 |= $tmp;
14371
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14372
                // Unicode code point to be output.
14373 2
                if (--$mState === 0) {
14374
                    // Check for illegal sequences and code points.
14375
                    //
14376
                    // From Unicode 3.1, non-shortest form is illegal
14377
                    if (
14378 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14379
                        ||
14380 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14381
                        ||
14382 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14383
                        ||
14384 2
                        ($mBytes > 4)
14385
                        ||
14386
                        // From Unicode 3.2, surrogate characters are illegal.
14387 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14388
                        ||
14389
                        // Code points outside the Unicode range are illegal.
14390 2
                        ($mUcs4 > 0x10FFFF)
14391
                    ) {
14392
                        return false;
14393
                    }
14394
                    // initialize UTF8 cache
14395 2
                    $mState = 0;
14396 2
                    $mUcs4 = 0;
14397 2
                    $mBytes = 1;
14398
                }
14399
            } else {
14400
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14401
                // Incomplete multi-octet sequence.
14402
                return false;
14403
            }
14404
        }
14405
14406 2
        return $mState === 0;
14407
    }
14408
14409
    /**
14410
     * @param string $str
14411
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14412
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14413
     *
14414
     * @psalm-pure
14415
     *
14416
     * @return string
14417
     *
14418
     * @noinspection ReturnTypeCanBeDeclaredInspection
14419
     */
14420
    private static function fixStrCaseHelper(
14421
        string $str,
14422
        bool $use_lowercase = false,
14423
        bool $use_full_case_fold = false
14424
    ) {
14425 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14426 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14427
14428 33
        if ($use_lowercase) {
14429 2
            $str = \str_replace(
14430 2
                $upper,
14431 2
                $lower,
14432 2
                $str
14433
            );
14434
        } else {
14435 31
            $str = \str_replace(
14436 31
                $lower,
14437 31
                $upper,
14438 31
                $str
14439
            );
14440
        }
14441
14442 33
        if ($use_full_case_fold) {
14443
            /**
14444
             * @psalm-suppress ImpureStaticVariable
14445
             *
14446
             * @var array<mixed>|null
14447
             */
14448 31
            static $FULL_CASE_FOLD = null;
14449 31
            if ($FULL_CASE_FOLD === null) {
14450 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14451
            }
14452
14453 31
            if ($use_lowercase) {
14454 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14455
            } else {
14456 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14457
            }
14458
        }
14459
14460 33
        return $str;
14461
    }
14462
14463
    /**
14464
     * get data from "/data/*.php"
14465
     *
14466
     * @param string $file
14467
     *
14468
     * @psalm-pure
14469
     *
14470
     * @return array
14471
     *
14472
     * @noinspection ReturnTypeCanBeDeclaredInspection
14473
     */
14474
    private static function getData(string $file)
14475
    {
14476
        /** @noinspection PhpIncludeInspection */
14477
        /** @noinspection UsingInclusionReturnValueInspection */
14478
        /** @psalm-suppress UnresolvableInclude */
14479 6
        return include __DIR__ . '/data/' . $file . '.php';
14480
    }
14481
14482
    /**
14483
     * @psalm-pure
14484
     *
14485
     * @return true|null
14486
     */
14487
    private static function initEmojiData()
14488
    {
14489 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14490 1
            if (self::$EMOJI === null) {
14491 1
                self::$EMOJI = self::getData('emoji');
14492
            }
14493
14494
            /**
14495
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14496
             */
14497 1
            \uksort(
14498 1
                self::$EMOJI,
14499
                static function (string $a, string $b): int {
14500 1
                    return \strlen($b) <=> \strlen($a);
14501 1
                }
14502
            );
14503
14504 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14505 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14506
14507 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14508 1
                $tmp_key = \crc32($key);
14509 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14510
            }
14511
14512 1
            return true;
14513
        }
14514
14515 12
        return null;
14516
    }
14517
14518
    /**
14519
     * Checks whether mbstring "overloaded" is active on the server.
14520
     *
14521
     * @psalm-pure
14522
     *
14523
     * @return bool
14524
     *
14525
     * @noinspection ReturnTypeCanBeDeclaredInspection
14526
     */
14527
    private static function mbstring_overloaded()
14528
    {
14529
        /**
14530
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14531
         */
14532
14533
        /** @noinspection PhpComposerExtensionStubsInspection */
14534
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14535
        return \defined('MB_OVERLOAD_STRING')
14536
               &&
14537
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14538
    }
14539
14540
    /**
14541
     * @param array    $strings
14542
     * @param bool     $remove_empty_values
14543
     * @param int|null $remove_short_values
14544
     *
14545
     * @psalm-pure
14546
     *
14547
     * @return array
14548
     *
14549
     * @noinspection ReturnTypeCanBeDeclaredInspection
14550
     */
14551
    private static function reduce_string_array(
14552
        array $strings,
14553
        bool $remove_empty_values,
14554
        int $remove_short_values = null
14555
    ) {
14556
        // init
14557 2
        $return = [];
14558
14559 2
        foreach ($strings as &$str) {
14560
            if (
14561 2
                $remove_short_values !== null
14562
                &&
14563 2
                \mb_strlen($str) <= $remove_short_values
14564
            ) {
14565 2
                continue;
14566
            }
14567
14568
            if (
14569 2
                $remove_empty_values
14570
                &&
14571 2
                \trim($str) === ''
14572
            ) {
14573 2
                continue;
14574
            }
14575
14576 2
            $return[] = $str;
14577
        }
14578
14579 2
        return $return;
14580
    }
14581
14582
    /**
14583
     * rxClass
14584
     *
14585
     * @param string $s
14586
     * @param string $class
14587
     *
14588
     * @psalm-pure
14589
     *
14590
     * @return string
14591
     *
14592
     * @noinspection ReturnTypeCanBeDeclaredInspection
14593
     */
14594
    private static function rxClass(string $s, string $class = '')
14595
    {
14596
        /**
14597
         * @psalm-suppress ImpureStaticVariable
14598
         *
14599
         * @var array<string,string>
14600
         */
14601 33
        static $RX_CLASS_CACHE = [];
14602
14603 33
        $cache_key = $s . '_' . $class;
14604
14605 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14606 21
            return $RX_CLASS_CACHE[$cache_key];
14607
        }
14608
14609
        /** @var string[] $class_array */
14610 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14611
14612
        /** @noinspection SuspiciousLoopInspection */
14613
        /** @noinspection AlterInForeachInspection */
14614 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14615 15
            if ($s === '-') {
14616
                $class_array[0] = '-' . $class_array[0];
14617 15
            } elseif (!isset($s[2])) {
14618 15
                $class_array[0] .= \preg_quote($s, '/');
14619 1
            } elseif (self::strlen($s) === 1) {
14620 1
                $class_array[0] .= $s;
14621
            } else {
14622 15
                $class_array[] = $s;
14623
            }
14624
        }
14625
14626 16
        if ($class_array[0]) {
14627 16
            $class_array[0] = '[' . $class_array[0] . ']';
14628
        }
14629
14630 16
        if (\count($class_array) === 1) {
14631 16
            $return = $class_array[0];
14632
        } else {
14633
            $return = '(?:' . \implode('|', $class_array) . ')';
14634
        }
14635
14636 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14637
14638 16
        return $return;
14639
    }
14640
14641
    /**
14642
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14643
     *
14644
     * @param string $names
14645
     * @param string $delimiter
14646
     * @param string $encoding
14647
     *
14648
     * @psalm-pure
14649
     *
14650
     * @return string
14651
     *
14652
     * @noinspection ReturnTypeCanBeDeclaredInspection
14653
     */
14654
    private static function str_capitalize_name_helper(
14655
        string $names,
14656
        string $delimiter,
14657
        string $encoding = 'UTF-8'
14658
    ) {
14659
        // init
14660 1
        $name_helper_array = \explode($delimiter, $names);
14661 1
        if ($name_helper_array === false) {
14662
            return '';
14663
        }
14664
14665
        $special_cases = [
14666 1
            'names' => [
14667
                'ab',
14668
                'af',
14669
                'al',
14670
                'and',
14671
                'ap',
14672
                'bint',
14673
                'binte',
14674
                'da',
14675
                'de',
14676
                'del',
14677
                'den',
14678
                'der',
14679
                'di',
14680
                'dit',
14681
                'ibn',
14682
                'la',
14683
                'mac',
14684
                'nic',
14685
                'of',
14686
                'ter',
14687
                'the',
14688
                'und',
14689
                'van',
14690
                'von',
14691
                'y',
14692
                'zu',
14693
            ],
14694
            'prefixes' => [
14695
                'al-',
14696
                "d'",
14697
                'ff',
14698
                "l'",
14699
                'mac',
14700
                'mc',
14701
                'nic',
14702
            ],
14703
        ];
14704
14705 1
        foreach ($name_helper_array as &$name) {
14706 1
            if (\in_array($name, $special_cases['names'], true)) {
14707 1
                continue;
14708
            }
14709
14710 1
            $continue = false;
14711
14712 1
            if ($delimiter === '-') {
14713
                /** @noinspection AlterInForeachInspection */
14714 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14715 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14716 1
                        $continue = true;
14717
14718 1
                        break;
14719
                    }
14720
                }
14721
            }
14722
14723
            /** @noinspection AlterInForeachInspection */
14724 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14725 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14726 1
                    $continue = true;
14727
14728 1
                    break;
14729
                }
14730
            }
14731
14732 1
            if ($continue) {
14733 1
                continue;
14734
            }
14735
14736 1
            $name = self::ucfirst($name);
14737
        }
14738
14739 1
        return \implode($delimiter, $name_helper_array);
14740
    }
14741
14742
    /**
14743
     * Generic case-sensitive transformation for collation matching.
14744
     *
14745
     * @param string $str <p>The input string</p>
14746
     *
14747
     * @psalm-pure
14748
     *
14749
     * @return string|null
14750
     */
14751
    private static function strtonatfold(string $str)
14752
    {
14753 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14754
        /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
14755 6
        if ($str === false) {
14756 2
            return '';
14757
        }
14758
14759
        /** @noinspection PhpUndefinedClassInspection */
14760 6
        return \preg_replace(
14761 6
            '/\p{Mn}+/u',
14762 6
            '',
14763 6
            $str
14764
        );
14765
    }
14766
14767
    /**
14768
     * @param int|string $input
14769
     *
14770
     * @psalm-pure
14771
     *
14772
     * @return string
14773
     *
14774
     * @noinspection ReturnTypeCanBeDeclaredInspection
14775
     * @noinspection SuspiciousBinaryOperationInspection
14776
     */
14777
    private static function to_utf8_convert_helper($input)
14778
    {
14779
        // init
14780 32
        $buf = '';
14781
14782 32
        if (self::$ORD === null) {
14783 1
            self::$ORD = self::getData('ord');
14784
        }
14785
14786 32
        if (self::$CHR === null) {
14787 1
            self::$CHR = self::getData('chr');
14788
        }
14789
14790 32
        if (self::$WIN1252_TO_UTF8 === null) {
14791 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14792
        }
14793
14794 32
        $ordC1 = self::$ORD[$input];
14795 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14796 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14797
        } else {
14798
            /** @noinspection OffsetOperationsInspection */
14799 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14800 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14801 1
            $buf .= $cc1 . $cc2;
14802
        }
14803
14804 32
        return $buf;
14805
    }
14806
14807
    /**
14808
     * @param string $str
14809
     *
14810
     * @psalm-pure
14811
     *
14812
     * @return string
14813
     *
14814
     * @noinspection ReturnTypeCanBeDeclaredInspection
14815
     */
14816
    private static function urldecode_unicode_helper(string $str)
14817
    {
14818 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14819 10
        if (\preg_match($pattern, $str)) {
14820 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14821
        }
14822
14823 10
        return $str;
14824
    }
14825
}
14826