Passed
Push — master ( b9cb9a...e03c84 )
by Lars
03:44
created

UTF8   F

Complexity

Total Complexity 1748

Size/Duplication

Total Lines 14688
Duplicated Lines 0 %

Test Coverage

Coverage 80.68%

Importance

Changes 106
Bugs 53 Features 6
Metric Value
eloc 4474
dl 0
loc 14688
ccs 3028
cts 3753
cp 0.8068
rs 0.8
c 106
b 53
f 6
wmc 1748

310 Methods

Rating   Name   Duplication   Size   Complexity  
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A count_chars() 0 11 1
D chr() 0 109 18
A chr_to_int() 0 3 1
A chunk_split() 0 3 1
A css_identifier() 0 56 6
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
A __construct() 0 2 1
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 8 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A chr_size_list() 0 17 3
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A get_unique_string() 0 22 3
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 26 5
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A isBinary() 0 3 1
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A lcword() 0 13 1
A html_escape() 0 6 1
B get_file_type() 0 65 7
C is_utf16() 0 71 16
A isHtml() 0 3 1
C filter() 0 57 12
A isBase64() 0 3 1
A is_html() 0 14 2
A decode_mimeheader() 0 8 3
A html_decode() 0 6 1
A isUtf32() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A isUtf8() 0 3 1
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A decimal_to_chr() 0 3 1
A has_whitespace() 0 8 2
A lowerCaseFirst() 0 13 1
B is_binary() 0 38 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
B is_url() 0 44 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A isAscii() 0 3 1
A filter_var() 0 15 2
A is_empty() 0 3 1
A isUtf16() 0 3 1
F encode() 0 147 37
C is_utf32() 0 71 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 32 4
A is_printable() 0 3 1
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A isJson() 0 3 1
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 59 13
B file_get_contents() 0 56 11
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 24 6
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 72 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
B str_obfuscate() 0 47 8
A string() 0 16 4
D normalize_encoding() 0 147 16
B rxClass() 0 45 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 71 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 65 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 21 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 72 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 51 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 138 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 122 25
A remove_html_breaks() 0 3 1
A showSupport() 0 17 3
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 99 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 15 3
A to_utf8() 0 14 3
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 13 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C ord() 0 77 16
B to_string() 0 33 8
A strtonatfold() 0 7 1
C strcspn() 0 49 12
A fixStrCaseHelper() 0 41 5
B str_split_pattern() 0 49 11
D strstr() 0 95 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 138 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 42 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 7 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 137 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @psalm-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @psalm-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @psalm-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @psalm-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @psalm-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @psalm-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @psalm-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @psalm-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @psalm-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @psalm-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @psalm-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @psalm-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
520
            if (self::$SUPPORT['mbstring'] === true) {
521
                \mb_internal_encoding('UTF-8');
522
                /** @noinspection UnusedFunctionResultInspection */
523
                /** @noinspection PhpComposerExtensionStubsInspection */
524
                \mb_regex_encoding('UTF-8');
525
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
526
            }
527
528
            // http://php.net/manual/en/book.iconv.php
529
            self::$SUPPORT['iconv'] = self::iconv_loaded();
530
531
            // http://php.net/manual/en/book.intl.php
532
            self::$SUPPORT['intl'] = self::intl_loaded();
533
534
            // http://php.net/manual/en/class.intlchar.php
535
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
536
537
            // http://php.net/manual/en/book.ctype.php
538
            self::$SUPPORT['ctype'] = self::ctype_loaded();
539
540
            // http://php.net/manual/en/class.finfo.php
541
            self::$SUPPORT['finfo'] = self::finfo_loaded();
542
543
            // http://php.net/manual/en/book.json.php
544
            self::$SUPPORT['json'] = self::json_loaded();
545
546
            // http://php.net/manual/en/book.pcre.php
547
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
548
549
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
550
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
551
                \mb_internal_encoding('UTF-8');
552
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
553
            }
554
555
            return true;
556
        }
557
558 5
        return null;
559
    }
560
561
    /**
562
     * Generates a UTF-8 encoded character from the given code point.
563
     *
564
     * INFO: opposite to UTF8::ord()
565
     *
566
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
567
     *
568
     * @param int    $code_point <p>The code point for which to generate a character.</p>
569
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
570
     *
571
     * @psalm-pure
572
     *
573
     * @return string|null
574
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
575
     */
576 21
    public static function chr($code_point, string $encoding = 'UTF-8')
577
    {
578
        // init
579
        /**
580
         * @psalm-suppress ImpureStaticVariable
581
         *
582
         * @var array<string,string>
583
         */
584 21
        static $CHAR_CACHE = [];
585
586 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
587 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
588
        }
589
590
        /** @noinspection InArrayCanBeUsedInspection */
591
        if (
592 21
            $encoding !== 'UTF-8'
593
            &&
594 21
            $encoding !== 'ISO-8859-1'
595
            &&
596 21
            $encoding !== 'WINDOWS-1252'
597
            &&
598 21
            self::$SUPPORT['mbstring'] === false
599
        ) {
600
            /**
601
             * @psalm-suppress ImpureFunctionCall - is is only a warning
602
             */
603
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
604
        }
605
606 21
        if ($code_point <= 0) {
607 5
            return null;
608
        }
609
610 21
        $cache_key = $code_point . '_' . $encoding;
611 21
        if (isset($CHAR_CACHE[$cache_key])) {
612 19
            return $CHAR_CACHE[$cache_key];
613
        }
614
615 10
        if ($code_point <= 0x80) { // only for "simple"-chars
616
617 9
            if (self::$CHR === null) {
618
                self::$CHR = self::getData('chr');
619
            }
620
621
            /**
622
             * @psalm-suppress PossiblyNullArrayAccess
623
             */
624 9
            $chr = self::$CHR[$code_point];
625
626 9
            if ($encoding !== 'UTF-8') {
627 1
                $chr = self::encode($encoding, $chr);
628
            }
629
630 9
            return $CHAR_CACHE[$cache_key] = $chr;
631
        }
632
633
        //
634
        // fallback via "IntlChar"
635
        //
636
637 6
        if (self::$SUPPORT['intlChar'] === true) {
638
            /** @noinspection PhpComposerExtensionStubsInspection */
639 6
            $chr = \IntlChar::chr($code_point);
640
641 6
            if ($encoding !== 'UTF-8') {
642
                $chr = self::encode($encoding, $chr);
643
            }
644
645 6
            return $CHAR_CACHE[$cache_key] = $chr;
646
        }
647
648
        //
649
        // fallback via vanilla php
650
        //
651
652
        if (self::$CHR === null) {
653
            self::$CHR = self::getData('chr');
654
        }
655
656
        $code_point = (int) $code_point;
657
        if ($code_point <= 0x7FF) {
658
            /**
659
             * @psalm-suppress PossiblyNullArrayAccess
660
             */
661
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
662
                   self::$CHR[($code_point & 0x3F) + 0x80];
663
        } elseif ($code_point <= 0xFFFF) {
664
            /**
665
             * @psalm-suppress PossiblyNullArrayAccess
666
             */
667
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
668
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
669
                   self::$CHR[($code_point & 0x3F) + 0x80];
670
        } else {
671
            /**
672
             * @psalm-suppress PossiblyNullArrayAccess
673
             */
674
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
675
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
676
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
677
                   self::$CHR[($code_point & 0x3F) + 0x80];
678
        }
679
680
        if ($encoding !== 'UTF-8') {
681
            $chr = self::encode($encoding, $chr);
682
        }
683
684
        return $CHAR_CACHE[$cache_key] = $chr;
685
    }
686
687
    /**
688
     * Applies callback to all characters of a string.
689
     *
690
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
691
     *
692
     * @param callable $callback <p>The callback function.</p>
693
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
694
     *
695
     * @psalm-pure
696
     *
697
     * @return string[]
698
     *                  <p>The outcome of the callback, as array.</p>
699
     */
700 2
    public static function chr_map($callback, string $str): array
701
    {
702 2
        return \array_map(
703 2
            $callback,
704 2
            self::str_split($str)
705
        );
706
    }
707
708
    /**
709
     * Generates an array of byte length of each character of a Unicode string.
710
     *
711
     * 1 byte => U+0000  - U+007F
712
     * 2 byte => U+0080  - U+07FF
713
     * 3 byte => U+0800  - U+FFFF
714
     * 4 byte => U+10000 - U+10FFFF
715
     *
716
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
717
     *
718
     * @param string $str <p>The original unicode string.</p>
719
     *
720
     * @psalm-pure
721
     *
722
     * @return int[]
723
     *               <p>An array of byte lengths of each character.</p>
724
     */
725 4
    public static function chr_size_list(string $str): array
726
    {
727 4
        if ($str === '') {
728 4
            return [];
729
        }
730
731 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
732
            return \array_map(
733
                static function (string $data): int {
734
                    // "mb_" is available if overload is used, so use it ...
735
                    return \mb_strlen($data, 'CP850'); // 8-BIT
736
                },
737
                self::str_split($str)
738
            );
739
        }
740
741 4
        return \array_map('\strlen', self::str_split($str));
742
    }
743
744
    /**
745
     * Get a decimal code representation of a specific character.
746
     *
747
     * INFO: opposite to UTF8::decimal_to_chr()
748
     *
749
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
750
     *
751
     * @param string $char <p>The input character.</p>
752
     *
753
     * @psalm-pure
754
     *
755
     * @return int
756
     */
757 5
    public static function chr_to_decimal(string $char): int
758
    {
759 5
        if (self::$SUPPORT['iconv'] === true) {
760 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
761 5
            if ($chr_tmp !== false) {
762
                /** @noinspection OffsetOperationsInspection */
763 5
                return \unpack('V', $chr_tmp)[1];
764
            }
765
        }
766
767
        $code = self::ord($char[0]);
768
        $bytes = 1;
769
770
        if (!($code & 0x80)) {
771
            // 0xxxxxxx
772
            return $code;
773
        }
774
775
        if (($code & 0xe0) === 0xc0) {
776
            // 110xxxxx
777
            $bytes = 2;
778
            $code &= ~0xc0;
779
        } elseif (($code & 0xf0) === 0xe0) {
780
            // 1110xxxx
781
            $bytes = 3;
782
            $code &= ~0xe0;
783
        } elseif (($code & 0xf8) === 0xf0) {
784
            // 11110xxx
785
            $bytes = 4;
786
            $code &= ~0xf0;
787
        }
788
789
        for ($i = 2; $i <= $bytes; ++$i) {
790
            // 10xxxxxx
791
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
792
        }
793
794
        return $code;
795
    }
796
797
    /**
798
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
799
     *
800
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
801
     *
802
     * @param int|string $char   <p>The input character</p>
803
     * @param string     $prefix [optional]
804
     *
805
     * @psalm-pure
806
     *
807
     * @return string
808
     *                <p>The code point encoded as U+xxxx.</p>
809
     */
810 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
811
    {
812 2
        if ($char === '') {
813 2
            return '';
814
        }
815
816 2
        if ($char === '&#0;') {
817 2
            $char = '';
818
        }
819
820 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
821
    }
822
823
    /**
824
     * alias for "UTF8::chr_to_decimal()"
825
     *
826
     * @param string $chr
827
     *
828
     * @psalm-pure
829
     *
830
     * @return int
831
     *
832
     * @see        UTF8::chr_to_decimal()
833
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
834
     */
835 2
    public static function chr_to_int(string $chr): int
836
    {
837 2
        return self::chr_to_decimal($chr);
838
    }
839
840
    /**
841
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
842
     *
843
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
844
     *
845
     * @param string $body         <p>The original string to be split.</p>
846
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
847
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
848
     *
849
     * @psalm-pure
850
     *
851
     * @return string
852
     *                <p>The chunked string.</p>
853
     */
854 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
855
    {
856 4
        return \implode($end, self::str_split($body, $chunk_length));
857
    }
858
859
    /**
860
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
861
     *
862
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
863
     *
864
     * @param string $str                                     <p>The string to be sanitized.</p>
865
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
866
     *                                                        UTF-BOM.</p>
867
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
868
     *                                                        whitespace.</p>
869
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
870
     *                                                        Word chars e.g.: "…"
871
     *                                                        => "..."</p>
872
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
873
     *                                                        in
874
     *                                                        combination with
875
     *                                                        $normalize_whitespace</p>
876
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
877
     *                                                        question mark e.g.: "�"</p>
878
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
879
     *                                                        invisible characters e.g.: "\0"</p>
880
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
881
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
882
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
883
     *                                                        </p>
884
     *
885
     * @psalm-pure
886
     *
887
     * @return string
888
     *                <p>An clean UTF-8 encoded string.</p>
889
     *
890
     * @noinspection PhpTooManyParametersInspection
891
     */
892 89
    public static function clean(
893
        string $str,
894
        bool $remove_bom = false,
895
        bool $normalize_whitespace = false,
896
        bool $normalize_msword = false,
897
        bool $keep_non_breaking_space = false,
898
        bool $replace_diamond_question_mark = false,
899
        bool $remove_invisible_characters = true,
900
        bool $remove_invisible_characters_url_encoded = false
901
    ): string {
902
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
903
        // caused connection reset problem on larger strings
904
905 89
        $regex = '/
906
          (
907
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
908
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
909
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
910
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
911
            ){1,100}                      # ...one or more times
912
          )
913
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
914
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
915
        /x';
916
        /** @noinspection NotOptimalRegularExpressionsInspection */
917 89
        $str = (string) \preg_replace($regex, '$1', $str);
918
919 89
        if ($replace_diamond_question_mark) {
920 33
            $str = self::replace_diamond_question_mark($str);
921
        }
922
923 89
        if ($remove_invisible_characters) {
924 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
925
        }
926
927 89
        if ($normalize_whitespace) {
928 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
929
        }
930
931 89
        if ($normalize_msword) {
932 4
            $str = self::normalize_msword($str);
933
        }
934
935 89
        if ($remove_bom) {
936 37
            $str = self::remove_bom($str);
937
        }
938
939 89
        return $str;
940
    }
941
942
    /**
943
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
944
     *
945
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
946
     *
947
     * @param string $str <p>The input string.</p>
948
     *
949
     * @psalm-pure
950
     *
951
     * @return string
952
     */
953 33
    public static function cleanup($str): string
954
    {
955
        // init
956 33
        $str = (string) $str;
957
958 33
        if ($str === '') {
959 5
            return '';
960
        }
961
962
        // fixed ISO <-> UTF-8 Errors
963 33
        $str = self::fix_simple_utf8($str);
964
965
        // remove all none UTF-8 symbols
966
        // && remove diamond question mark (�)
967
        // && remove remove invisible characters (e.g. "\0")
968
        // && remove BOM
969
        // && normalize whitespace chars (but keep non-breaking-spaces)
970 33
        return self::clean(
971 33
            $str,
972 33
            true,
973 33
            true,
974 33
            false,
975 33
            true,
976 33
            true
977
        );
978
    }
979
980
    /**
981
     * Accepts a string or a array of strings and returns an array of Unicode code points.
982
     *
983
     * INFO: opposite to UTF8::string()
984
     *
985
     * EXAMPLE: <code>
986
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
987
     * // ... OR ...
988
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
989
     * </code>
990
     *
991
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
992
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
993
     *                                     default, code points will be returned as integers.</p>
994
     *
995
     * @psalm-pure
996
     *
997
     * @return int[]|string[]
998
     *                        <p>
999
     *                        The array of code points:<br>
1000
     *                        int[] for $u_style === false<br>
1001
     *                        string[] for $u_style === true<br>
1002
     *                        </p>
1003
     */
1004 12
    public static function codepoints($arg, bool $use_u_style = false): array
1005
    {
1006 12
        if (\is_string($arg)) {
1007 12
            $arg = self::str_split($arg);
1008
        }
1009
1010
        /**
1011
         * @psalm-suppress DocblockTypeContradiction
1012
         */
1013 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1014 4
            return [];
1015
        }
1016
1017 12
        if ($arg === []) {
1018 7
            return [];
1019
        }
1020
1021 11
        $arg = \array_map(
1022
            [
1023 11
                self::class,
1024
                'ord',
1025
            ],
1026 11
            $arg
1027
        );
1028
1029 11
        if ($use_u_style) {
1030 2
            $arg = \array_map(
1031
                [
1032 2
                    self::class,
1033
                    'int_to_hex',
1034
                ],
1035 2
                $arg
1036
            );
1037
        }
1038
1039 11
        return $arg;
1040
    }
1041
1042
    /**
1043
     * Trims the string and replaces consecutive whitespace characters with a
1044
     * single space. This includes tabs and newline characters, as well as
1045
     * multibyte whitespace such as the thin space and ideographic space.
1046
     *
1047
     * @param string $str <p>The input string.</p>
1048
     *
1049
     * @psalm-pure
1050
     *
1051
     * @return string
1052
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1053
     */
1054 13
    public static function collapse_whitespace(string $str): string
1055
    {
1056 13
        if (self::$SUPPORT['mbstring'] === true) {
1057
            /** @noinspection PhpComposerExtensionStubsInspection */
1058 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1059
        }
1060
1061
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1062
    }
1063
1064
    /**
1065
     * Returns count of characters used in a string.
1066
     *
1067
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1068
     *
1069
     * @param string $str                     <p>The input string.</p>
1070
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1071
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1072
     *
1073
     * @psalm-pure
1074
     *
1075
     * @return int[]
1076
     *               <p>An associative array of Character as keys and
1077
     *               their count as values.</p>
1078
     */
1079 19
    public static function count_chars(
1080
        string $str,
1081
        bool $clean_utf8 = false,
1082
        bool $try_to_use_mb_functions = true
1083
    ): array {
1084 19
        return \array_count_values(
1085 19
            self::str_split(
1086 19
                $str,
1087 19
                1,
1088 19
                $clean_utf8,
1089 19
                $try_to_use_mb_functions
1090
            )
1091
        );
1092
    }
1093
1094
    /**
1095
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1096
     *
1097
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1098
     *
1099
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1100
     *
1101
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1102
     * @param string[] $filter
1103
     * @param bool     $strip_tags
1104
     * @param bool     $strtolower
1105
     *
1106
     * @psalm-pure
1107
     *
1108
     * @return string
1109
     *
1110
     * @psalm-param array<string,string> $filter
1111
     */
1112 1
    public static function css_identifier(
1113
        string $str = '',
1114
        array $filter = [
1115
            ' ' => '-',
1116
            '/' => '-',
1117
            '[' => '',
1118
            ']' => '',
1119
        ],
1120
        bool $strip_tags = false,
1121
        bool $strtolower = true
1122
    ): string {
1123
        // We could also use strtr() here but its much slower than str_replace(). In
1124
        // order to keep '__' to stay '__' we first replace it with a different
1125
        // placeholder after checking that it is not defined as a filter.
1126 1
        $double_underscore_replacements = 0;
1127
1128
        // Fallback ...
1129 1
        if (\trim($str) === '') {
1130 1
            $str = \uniqid('auto-generated-css-class', true);
1131
        } else {
1132 1
            $str = self::clean($str);
1133
        }
1134
1135 1
        if ($strip_tags) {
1136
            $str = \strip_tags($str);
1137
        }
1138
1139 1
        if ($strtolower) {
1140 1
            $str = \strtolower($str);
1141
        }
1142
1143 1
        if (!isset($filter['__'])) {
1144 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1145
        }
1146
1147
        /* @noinspection ArrayValuesMissUseInspection */
1148 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1149
        // Replace temporary placeholder '##' with '__' only if the original
1150
        // $identifier contained '__'.
1151 1
        if ($double_underscore_replacements > 0) {
1152
            $str = \str_replace('##', '__', $str);
1153
        }
1154
1155
        // Valid characters in a CSS identifier are:
1156
        // - the hyphen (U+002D)
1157
        // - a-z (U+0030 - U+0039)
1158
        // - A-Z (U+0041 - U+005A)
1159
        // - the underscore (U+005F)
1160
        // - 0-9 (U+0061 - U+007A)
1161
        // - ISO 10646 characters U+00A1 and higher
1162
        // We strip out any character not in the above list.
1163 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1164
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1165 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1166
1167 1
        return \trim($str, '-');
1168
    }
1169
1170
    /**
1171
     * Remove css media-queries.
1172
     *
1173
     * @param string $str
1174
     *
1175
     * @psalm-pure
1176
     *
1177
     * @return string
1178
     */
1179 1
    public static function css_stripe_media_queries(string $str): string
1180
    {
1181 1
        return (string) \preg_replace(
1182 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1183 1
            '',
1184 1
            $str
1185
        );
1186
    }
1187
1188
    /**
1189
     * Checks whether ctype is available on the server.
1190
     *
1191
     * @psalm-pure
1192
     *
1193
     * @return bool
1194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1195
     *
1196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1197
     */
1198
    public static function ctype_loaded(): bool
1199
    {
1200
        return \extension_loaded('ctype');
1201
    }
1202
1203
    /**
1204
     * Converts an int value into a UTF-8 character.
1205
     *
1206
     * INFO: opposite to UTF8::string()
1207
     *
1208
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1209
     *
1210
     * @param int|string $int
1211
     *
1212
     * @psalm-param int|numeric-string $int
1213
     *
1214
     * @psalm-pure
1215
     *
1216
     * @return string
1217
     */
1218 20
    public static function decimal_to_chr($int): string
1219
    {
1220 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1221
    }
1222
1223
    /**
1224
     * Decodes a MIME header field
1225
     *
1226
     * @param string $str
1227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1228
     *
1229
     * @psalm-pure
1230
     *
1231
     * @return false|string
1232
     *                      <p>A decoded MIME field on success,
1233
     *                      or false if an error occurs during the decoding.</p>
1234
     */
1235 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1236
    {
1237 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1238 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1239
        }
1240
1241
        // always fallback via symfony polyfill
1242 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1243
    }
1244
1245
    /**
1246
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1247
     *
1248
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1249
     *
1250
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1251
     *
1252
     * @return string
1253
     *                <p>Emoji or empty string on error.</p>
1254
     */
1255 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1256
    {
1257 1
        if ($country_code_iso_3166_1 === '') {
1258 1
            return '';
1259
        }
1260
1261 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1262 1
            return '';
1263
        }
1264
1265 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1266
1267 1
        $flagOffset = 0x1F1E6;
1268 1
        $asciiOffset = 0x41;
1269
1270 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1271 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1272
    }
1273
1274
    /**
1275
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1276
     *
1277
     * INFO: opposite to UTF8::emoji_encode()
1278
     *
1279
     * EXAMPLE: <code>
1280
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1281
     * //
1282
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1283
     * </code>
1284
     *
1285
     * @param string $str                            <p>The input string.</p>
1286
     * @param bool   $use_reversible_string_mappings [optional] <p>
1287
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1288
     *                                               between "emoji_encode" and "emoji_decode".</p>
1289
     *
1290
     * @psalm-pure
1291
     *
1292
     * @return string
1293
     */
1294 9
    public static function emoji_decode(
1295
        string $str,
1296
        bool $use_reversible_string_mappings = false
1297
    ): string {
1298 9
        self::initEmojiData();
1299
1300 9
        if ($use_reversible_string_mappings) {
1301 9
            return (string) \str_replace(
1302 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1303 9
                (array) self::$EMOJI_VALUES_CACHE,
1304 9
                $str
1305
            );
1306
        }
1307
1308 1
        return (string) \str_replace(
1309 1
            (array) self::$EMOJI_KEYS_CACHE,
1310 1
            (array) self::$EMOJI_VALUES_CACHE,
1311 1
            $str
1312
        );
1313
    }
1314
1315
    /**
1316
     * Encode a string with emoji chars into a non-emoji string.
1317
     *
1318
     * INFO: opposite to UTF8::emoji_decode()
1319
     *
1320
     * EXAMPLE: <code>
1321
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1322
     * //
1323
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1324
     * </code>
1325
     *
1326
     * @param string $str                            <p>The input string</p>
1327
     * @param bool   $use_reversible_string_mappings [optional] <p>
1328
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1329
     *                                               between "emoji_encode" and "emoji_decode"</p>
1330
     *
1331
     * @psalm-pure
1332
     *
1333
     * @return string
1334
     */
1335 12
    public static function emoji_encode(
1336
        string $str,
1337
        bool $use_reversible_string_mappings = false
1338
    ): string {
1339 12
        self::initEmojiData();
1340
1341 12
        if ($use_reversible_string_mappings) {
1342 9
            return (string) \str_replace(
1343 9
                (array) self::$EMOJI_VALUES_CACHE,
1344 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1345 9
                $str
1346
            );
1347
        }
1348
1349 4
        return (string) \str_replace(
1350 4
            (array) self::$EMOJI_VALUES_CACHE,
1351 4
            (array) self::$EMOJI_KEYS_CACHE,
1352 4
            $str
1353
        );
1354
    }
1355
1356
    /**
1357
     * Encode a string with a new charset-encoding.
1358
     *
1359
     * INFO:  This function will also try to fix broken / double encoding,
1360
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1361
     *
1362
     * EXAMPLE: <code>
1363
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1364
     * //
1365
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1366
     * //
1367
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1368
     * //
1369
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1370
     * </code>
1371
     *
1372
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1373
     * @param string $str                           <p>The input string</p>
1374
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1375
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1376
     *                                              string-encoding</p>
1377
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1378
     *                                              A empty string will trigger the autodetect anyway.</p>
1379
     *
1380
     * @psalm-pure
1381
     *
1382
     * @return string
1383
     *
1384
     * @psalm-suppress InvalidReturnStatement
1385
     */
1386 29
    public static function encode(
1387
        string $to_encoding,
1388
        string $str,
1389
        bool $auto_detect_the_from_encoding = true,
1390
        string $from_encoding = ''
1391
    ): string {
1392 29
        if ($str === '' || $to_encoding === '') {
1393 13
            return $str;
1394
        }
1395
1396 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1397 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1398
        }
1399
1400 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1401 2
            $from_encoding = self::normalize_encoding($from_encoding);
1402
        }
1403
1404
        if (
1405 29
            $to_encoding
1406
            &&
1407 29
            $from_encoding
1408
            &&
1409 29
            $from_encoding === $to_encoding
1410
        ) {
1411
            return $str;
1412
        }
1413
1414 29
        if ($to_encoding === 'JSON') {
1415 1
            $return = self::json_encode($str);
1416 1
            if ($return === false) {
1417
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1418
            }
1419
1420 1
            return $return;
1421
        }
1422 29
        if ($from_encoding === 'JSON') {
1423 1
            $str = self::json_decode($str);
1424 1
            $from_encoding = '';
1425
        }
1426
1427 29
        if ($to_encoding === 'BASE64') {
1428 2
            return \base64_encode($str);
1429
        }
1430 29
        if ($from_encoding === 'BASE64') {
1431 2
            $str = \base64_decode($str, true);
1432 2
            $from_encoding = '';
1433
        }
1434
1435 29
        if ($to_encoding === 'HTML-ENTITIES') {
1436 2
            return self::html_encode($str, true);
1437
        }
1438 29
        if ($from_encoding === 'HTML-ENTITIES') {
1439 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1440 2
            $from_encoding = '';
1441
        }
1442
1443 29
        $from_encoding_auto_detected = false;
1444
        if (
1445 29
            $auto_detect_the_from_encoding
1446
            ||
1447 29
            !$from_encoding
1448
        ) {
1449 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1450
        }
1451
1452
        // DEBUG
1453
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1454
1455 29
        if ($from_encoding_auto_detected !== false) {
1456
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1457 25
            $from_encoding = $from_encoding_auto_detected;
1458 7
        } elseif ($auto_detect_the_from_encoding) {
1459
            // fallback for the "autodetect"-mode
1460 7
            return self::to_utf8($str);
1461
        }
1462
1463
        if (
1464 25
            !$from_encoding
1465
            ||
1466 25
            $from_encoding === $to_encoding
1467
        ) {
1468 15
            return $str;
1469
        }
1470
1471
        if (
1472 20
            $to_encoding === 'UTF-8'
1473
            &&
1474
            (
1475 18
                $from_encoding === 'WINDOWS-1252'
1476
                ||
1477 20
                $from_encoding === 'ISO-8859-1'
1478
            )
1479
        ) {
1480 14
            return self::to_utf8($str);
1481
        }
1482
1483
        if (
1484 12
            $to_encoding === 'ISO-8859-1'
1485
            &&
1486
            (
1487 6
                $from_encoding === 'WINDOWS-1252'
1488
                ||
1489 12
                $from_encoding === 'UTF-8'
1490
            )
1491
        ) {
1492 6
            return self::to_iso8859($str);
1493
        }
1494
1495
        /** @noinspection InArrayCanBeUsedInspection */
1496
        if (
1497 10
            $to_encoding !== 'UTF-8'
1498
            &&
1499 10
            $to_encoding !== 'ISO-8859-1'
1500
            &&
1501 10
            $to_encoding !== 'WINDOWS-1252'
1502
            &&
1503 10
            self::$SUPPORT['mbstring'] === false
1504
        ) {
1505
            /**
1506
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1507
             */
1508
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1509
        }
1510
1511 10
        if (self::$SUPPORT['mbstring'] === true) {
1512
            // warning: do not use the symfony polyfill here
1513 10
            $str_encoded = \mb_convert_encoding(
1514 10
                $str,
1515 10
                $to_encoding,
1516 10
                $from_encoding
1517
            );
1518
1519 10
            if ($str_encoded) {
1520 10
                \assert(\is_string($str_encoded));
1521
1522 10
                return $str_encoded;
1523
            }
1524
        }
1525
1526
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1527
        $return = @\iconv($from_encoding, $to_encoding, $str);
1528
        if ($return !== false) {
1529
            return $return;
1530
        }
1531
1532
        return $str;
1533
    }
1534
1535
    /**
1536
     * @param string $str
1537
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1538
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1539
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1540
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1541
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1542
     *
1543
     * @psalm-pure
1544
     *
1545
     * @return false|string
1546
     *                      <p>An encoded MIME field on success,
1547
     *                      or false if an error occurs during the encoding.</p>
1548
     */
1549 1
    public static function encode_mimeheader(
1550
        string $str,
1551
        string $from_charset = 'UTF-8',
1552
        string $to_charset = 'UTF-8',
1553
        string $transfer_encoding = 'Q',
1554
        string $linefeed = "\r\n",
1555
        int $indent = 76
1556
    ) {
1557 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1558
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1559
        }
1560
1561 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1562 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1563
        }
1564
1565
        // always fallback via symfony polyfill
1566 1
        return \iconv_mime_encode(
1567 1
            '',
1568 1
            $str,
1569
            [
1570 1
                'scheme'           => $transfer_encoding,
1571 1
                'line-length'      => $indent,
1572 1
                'input-charset'    => $from_charset,
1573 1
                'output-charset'   => $to_charset,
1574 1
                'line-break-chars' => $linefeed,
1575
            ]
1576
        );
1577
    }
1578
1579
    /**
1580
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1581
     *
1582
     * @param string   $str                       <p>The input string.</p>
1583
     * @param string   $search                    <p>The searched string.</p>
1584
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1585
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1586
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1587
     *
1588
     * @psalm-pure
1589
     *
1590
     * @return string
1591
     */
1592 1
    public static function extract_text(
1593
        string $str,
1594
        string $search = '',
1595
        int $length = null,
1596
        string $replacer_for_skipped_text = '…',
1597
        string $encoding = 'UTF-8'
1598
    ): string {
1599 1
        if ($str === '') {
1600 1
            return '';
1601
        }
1602
1603 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1604
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1605
        }
1606
1607 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1608
1609 1
        if ($length === null) {
1610 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1611
        }
1612
1613 1
        if ($search === '') {
1614 1
            if ($encoding === 'UTF-8') {
1615 1
                if ($length > 0) {
1616 1
                    $string_length = (int) \mb_strlen($str);
1617 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1618
                } else {
1619 1
                    $end = 0;
1620
                }
1621
1622 1
                $pos = (int) \min(
1623 1
                    \mb_strpos($str, ' ', $end),
1624 1
                    \mb_strpos($str, '.', $end)
1625
                );
1626
            } else {
1627
                if ($length > 0) {
1628
                    $string_length = (int) self::strlen($str, $encoding);
1629
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1630
                } else {
1631
                    $end = 0;
1632
                }
1633
1634
                $pos = (int) \min(
1635
                    self::strpos($str, ' ', $end, $encoding),
1636
                    self::strpos($str, '.', $end, $encoding)
1637
                );
1638
            }
1639
1640 1
            if ($pos) {
1641 1
                if ($encoding === 'UTF-8') {
1642 1
                    $str_sub = \mb_substr($str, 0, $pos);
1643
                } else {
1644
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1645
                }
1646
1647 1
                if ($str_sub === false) {
1648
                    return '';
1649
                }
1650
1651 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1652
            }
1653
1654
            return $str;
1655
        }
1656
1657 1
        if ($encoding === 'UTF-8') {
1658 1
            $word_position = (int) \mb_stripos($str, $search);
1659 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1660
        } else {
1661
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1662
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1663
        }
1664
1665 1
        $pos_start = 0;
1666 1
        if ($half_side > 0) {
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $half_text = \mb_substr($str, 0, $half_side);
1669
            } else {
1670
                $half_text = self::substr($str, 0, $half_side, $encoding);
1671
            }
1672 1
            if ($half_text !== false) {
1673 1
                if ($encoding === 'UTF-8') {
1674 1
                    $pos_start = (int) \max(
1675 1
                        \mb_strrpos($half_text, ' '),
1676 1
                        \mb_strrpos($half_text, '.')
1677
                    );
1678
                } else {
1679
                    $pos_start = (int) \max(
1680
                        self::strrpos($half_text, ' ', 0, $encoding),
1681
                        self::strrpos($half_text, '.', 0, $encoding)
1682
                    );
1683
                }
1684
            }
1685
        }
1686
1687 1
        if ($word_position && $half_side > 0) {
1688 1
            $offset = $pos_start + $length - 1;
1689 1
            $real_length = (int) self::strlen($str, $encoding);
1690
1691 1
            if ($offset > $real_length) {
1692
                $offset = $real_length;
1693
            }
1694
1695 1
            if ($encoding === 'UTF-8') {
1696 1
                $pos_end = (int) \min(
1697 1
                    \mb_strpos($str, ' ', $offset),
1698 1
                    \mb_strpos($str, '.', $offset)
1699 1
                ) - $pos_start;
1700
            } else {
1701
                $pos_end = (int) \min(
1702
                    self::strpos($str, ' ', $offset, $encoding),
1703
                    self::strpos($str, '.', $offset, $encoding)
1704
                ) - $pos_start;
1705
            }
1706
1707 1
            if (!$pos_end || $pos_end <= 0) {
1708 1
                if ($encoding === 'UTF-8') {
1709 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1710
                } else {
1711
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1712
                }
1713 1
                if ($str_sub !== false) {
1714 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1715
                } else {
1716 1
                    $extract = '';
1717
                }
1718
            } else {
1719 1
                if ($encoding === 'UTF-8') {
1720 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1721
                } else {
1722
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1723
                }
1724 1
                if ($str_sub !== false) {
1725 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1726
                } else {
1727 1
                    $extract = '';
1728
                }
1729
            }
1730
        } else {
1731 1
            $offset = $length - 1;
1732 1
            $true_length = (int) self::strlen($str, $encoding);
1733
1734 1
            if ($offset > $true_length) {
1735
                $offset = $true_length;
1736
            }
1737
1738 1
            if ($encoding === 'UTF-8') {
1739 1
                $pos_end = (int) \min(
1740 1
                    \mb_strpos($str, ' ', $offset),
1741 1
                    \mb_strpos($str, '.', $offset)
1742
                );
1743
            } else {
1744
                $pos_end = (int) \min(
1745
                    self::strpos($str, ' ', $offset, $encoding),
1746
                    self::strpos($str, '.', $offset, $encoding)
1747
                );
1748
            }
1749
1750 1
            if ($pos_end) {
1751 1
                if ($encoding === 'UTF-8') {
1752 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1753
                } else {
1754
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1755
                }
1756 1
                if ($str_sub !== false) {
1757 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1758
                } else {
1759 1
                    $extract = '';
1760
                }
1761
            } else {
1762 1
                $extract = $str;
1763
            }
1764
        }
1765
1766 1
        return $extract;
1767
    }
1768
1769
    /**
1770
     * Reads entire file into a string.
1771
     *
1772
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1773
     *
1774
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1775
     *
1776
     * @see http://php.net/manual/en/function.file-get-contents.php
1777
     *
1778
     * @param string        $filename         <p>
1779
     *                                        Name of the file to read.
1780
     *                                        </p>
1781
     * @param bool          $use_include_path [optional] <p>
1782
     *                                        Prior to PHP 5, this parameter is called
1783
     *                                        use_include_path and is a bool.
1784
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1785
     *                                        to trigger include path
1786
     *                                        search.
1787
     *                                        </p>
1788
     * @param resource|null $context          [optional] <p>
1789
     *                                        A valid context resource created with
1790
     *                                        stream_context_create. If you don't need to use a
1791
     *                                        custom context, you can skip this parameter by &null;.
1792
     *                                        </p>
1793
     * @param int|null      $offset           [optional] <p>
1794
     *                                        The offset where the reading starts.
1795
     *                                        </p>
1796
     * @param int|null      $max_length       [optional] <p>
1797
     *                                        Maximum length of data read. The default is to read until end
1798
     *                                        of file is reached.
1799
     *                                        </p>
1800
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1801
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1802
     *                                        some files, because they used non default utf-8 chars. Binary files
1803
     *                                        like images or pdf will not be converted.</p>
1804
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1805
     *                                        A empty string will trigger the autodetect anyway.</p>
1806
     *
1807
     * @psalm-pure
1808
     *
1809
     * @return false|string
1810
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1811
     *
1812
     * @noinspection PhpTooManyParametersInspection
1813
     */
1814 12
    public static function file_get_contents(
1815
        string $filename,
1816
        bool $use_include_path = false,
1817
        $context = null,
1818
        int $offset = null,
1819
        int $max_length = null,
1820
        int $timeout = 10,
1821
        bool $convert_to_utf8 = true,
1822
        string $from_encoding = ''
1823
    ) {
1824
        // init
1825 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1826
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1827 12
        if ($filename === false) {
1828
            return false;
1829
        }
1830
1831 12
        if ($timeout && $context === null) {
1832 9
            $context = \stream_context_create(
1833
                [
1834
                    'http' => [
1835 9
                        'timeout' => $timeout,
1836
                    ],
1837
                ]
1838
            );
1839
        }
1840
1841 12
        if ($offset === null) {
1842 12
            $offset = 0;
1843
        }
1844
1845 12
        if (\is_int($max_length)) {
1846 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1847
        } else {
1848 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1849
        }
1850
1851
        // return false on error
1852 12
        if ($data === false) {
1853
            return false;
1854
        }
1855
1856 12
        if ($convert_to_utf8) {
1857
            if (
1858 12
                !self::is_binary($data, true)
1859
                ||
1860 9
                self::is_utf16($data, false) !== false
1861
                ||
1862 12
                self::is_utf32($data, false) !== false
1863
            ) {
1864 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1865 9
                $data = self::cleanup($data);
1866
            }
1867
        }
1868
1869 12
        return $data;
1870
    }
1871
1872
    /**
1873
     * Checks if a file starts with BOM (Byte Order Mark) character.
1874
     *
1875
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1876
     *
1877
     * @param string $file_path <p>Path to a valid file.</p>
1878
     *
1879
     * @throws \RuntimeException if file_get_contents() returned false
1880
     *
1881
     * @return bool
1882
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1883
     *
1884
     * @psalm-pure
1885
     */
1886 2
    public static function file_has_bom(string $file_path): bool
1887
    {
1888 2
        $file_content = \file_get_contents($file_path);
1889 2
        if ($file_content === false) {
1890
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1891
        }
1892
1893 2
        return self::string_has_bom($file_content);
1894
    }
1895
1896
    /**
1897
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1898
     *
1899
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1900
     *
1901
     * @param array|object|string $var
1902
     * @param int                 $normalization_form
1903
     * @param string              $leading_combining
1904
     *
1905
     * @psalm-pure
1906
     *
1907
     * @return mixed
1908
     *
1909
     * @template TFilter
1910
     * @psalm-param TFilter $var
1911
     * @psalm-return TFilter
1912
     */
1913 65
    public static function filter(
1914
        $var,
1915
        int $normalization_form = \Normalizer::NFC,
1916
        string $leading_combining = '◌'
1917
    ) {
1918 65
        switch (\gettype($var)) {
1919 65
            case 'object':
1920 65
            case 'array':
1921 6
                foreach ($var as $k => &$v) {
1922 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1923
                }
1924 6
                unset($v);
1925
1926 6
                break;
1927 65
            case 'string':
1928
1929 63
                if (\strpos($var, "\r") !== false) {
1930 3
                    $var = self::normalize_line_ending($var);
1931
                }
1932
1933 63
                if (!ASCII::is_ascii($var)) {
1934 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1935 27
                        $n = '-';
1936
                    } else {
1937 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1938
1939 13
                        if (isset($n[0])) {
1940 7
                            $var = $n;
1941
                        } else {
1942 9
                            $var = self::encode('UTF-8', $var);
1943
                        }
1944
                    }
1945
1946 33
                    \assert(\is_string($var));
1947
                    if (
1948 33
                        $var[0] >= "\x80"
1949
                        &&
1950 33
                        isset($n[0], $leading_combining[0])
1951
                        &&
1952 33
                        \preg_match('/^\\p{Mn}/u', $var)
1953
                    ) {
1954
                        // Prevent leading combining chars
1955
                        // for NFC-safe concatenations.
1956 3
                        $var = $leading_combining . $var;
1957
                    }
1958
                }
1959
1960 63
                break;
1961
            default:
1962
                // nothing
1963
        }
1964
1965
        /** @noinspection PhpSillyAssignmentInspection */
1966
        /** @psalm-var TFilter $var */
1967 65
        $var = $var;
1968
1969 65
        return $var;
1970
    }
1971
1972
    /**
1973
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1974
     *
1975
     * Gets a specific external variable by name and optionally filters it.
1976
     *
1977
     * EXAMPLE: <code>
1978
     * // _GET['foo'] = 'bar';
1979
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1980
     * </code>
1981
     *
1982
     * @see http://php.net/manual/en/function.filter-input.php
1983
     *
1984
     * @param int            $type          <p>
1985
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1986
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1987
     *                                      <b>INPUT_ENV</b>.
1988
     *                                      </p>
1989
     * @param string         $variable_name <p>
1990
     *                                      Name of a variable to get.
1991
     *                                      </p>
1992
     * @param int            $filter        [optional] <p>
1993
     *                                      The ID of the filter to apply. The
1994
     *                                      manual page lists the available filters.
1995
     *                                      </p>
1996
     * @param int|int[]|null $options       [optional] <p>
1997
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1998
     *                                      accepts options, flags can be provided in "flags" field of array.
1999
     *                                      </p>
2000
     *
2001
     * @psalm-pure
2002
     *
2003
     * @return mixed
2004
     *               <p>
2005
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2006
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2007
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2008
     *               </p>
2009
     */
2010 1
    public static function filter_input(
2011
        int $type,
2012
        string $variable_name,
2013
        int $filter = \FILTER_DEFAULT,
2014
        $options = null
2015
    ) {
2016
        /**
2017
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2018
         */
2019 1
        if ($options === null || \func_num_args() < 4) {
2020 1
            $var = \filter_input($type, $variable_name, $filter);
2021
        } else {
2022
            $var = \filter_input($type, $variable_name, $filter, $options);
2023
        }
2024
2025 1
        return self::filter($var);
2026
    }
2027
2028
    /**
2029
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2030
     *
2031
     * Gets external variables and optionally filters them.
2032
     *
2033
     * EXAMPLE: <code>
2034
     * // _GET['foo'] = 'bar';
2035
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2036
     * </code>
2037
     *
2038
     * @see http://php.net/manual/en/function.filter-input-array.php
2039
     *
2040
     * @param int        $type       <p>
2041
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2042
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2043
     *                               <b>INPUT_ENV</b>.
2044
     *                               </p>
2045
     * @param array|null $definition [optional] <p>
2046
     *                               An array defining the arguments. A valid key is a string
2047
     *                               containing a variable name and a valid value is either a filter type, or an array
2048
     *                               optionally specifying the filter, flags and options. If the value is an
2049
     *                               array, valid keys are filter which specifies the
2050
     *                               filter type,
2051
     *                               flags which specifies any flags that apply to the
2052
     *                               filter, and options which specifies any options that
2053
     *                               apply to the filter. See the example below for a better understanding.
2054
     *                               </p>
2055
     *                               <p>
2056
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2057
     *                               input array are filtered by this filter.
2058
     *                               </p>
2059
     * @param bool       $add_empty  [optional] <p>
2060
     *                               Add missing keys as <b>NULL</b> to the return value.
2061
     *                               </p>
2062
     *
2063
     * @psalm-pure
2064
     *
2065
     * @return mixed
2066
     *               <p>
2067
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2068
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2069
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2070
     *               is not set and <b>NULL</b> if the filter fails.
2071
     *               </p>
2072
     */
2073 1
    public static function filter_input_array(
2074
        int $type,
2075
        $definition = null,
2076
        bool $add_empty = true
2077
    ) {
2078
        /**
2079
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2080
         */
2081 1
        if ($definition === null || \func_num_args() < 2) {
2082
            $a = \filter_input_array($type);
2083
        } else {
2084 1
            $a = \filter_input_array($type, $definition, $add_empty);
2085
        }
2086
2087 1
        return self::filter($a);
2088
    }
2089
2090
    /**
2091
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2092
     *
2093
     * Filters a variable with a specified filter.
2094
     *
2095
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2096
     *
2097
     * @see http://php.net/manual/en/function.filter-var.php
2098
     *
2099
     * @param float|int|string|null $variable <p>
2100
     *                                        Value to filter.
2101
     *                                        </p>
2102
     * @param int                   $filter   [optional] <p>
2103
     *                                        The ID of the filter to apply. The
2104
     *                                        manual page lists the available filters.
2105
     *                                        </p>
2106
     * @param int|int[]|null        $options  [optional] <p>
2107
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2108
     *                                        accepts options, flags can be provided in "flags" field of array. For
2109
     *                                        the "callback" filter, callable type should be passed. The
2110
     *                                        callback must accept one argument, the value to be filtered, and return
2111
     *                                        the value after filtering/sanitizing it.
2112
     *                                        </p>
2113
     *                                        <p>
2114
     *                                        <code>
2115
     *                                        // for filters that accept options, use this format
2116
     *                                        $options = array(
2117
     *                                        'options' => array(
2118
     *                                        'default' => 3, // value to return if the filter fails
2119
     *                                        // other options here
2120
     *                                        'min_range' => 0
2121
     *                                        ),
2122
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2123
     *                                        );
2124
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2125
     *                                        // for filter that only accept flags, you can pass them directly
2126
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2127
     *                                        // for filter that only accept flags, you can also pass as an array
2128
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2129
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2130
     *                                        // callback validate filter
2131
     *                                        function foo($value)
2132
     *                                        {
2133
     *                                        // Expected format: Surname, GivenNames
2134
     *                                        if (strpos($value, ", ") === false) return false;
2135
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2136
     *                                        $empty = (empty($surname) || empty($givennames));
2137
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2138
     *                                        if ($empty || $notstrings) {
2139
     *                                        return false;
2140
     *                                        } else {
2141
     *                                        return $value;
2142
     *                                        }
2143
     *                                        }
2144
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2145
     *                                        </code>
2146
     *                                        </p>
2147
     *
2148
     * @psalm-pure
2149
     *
2150
     * @return mixed
2151
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2152
     */
2153 2
    public static function filter_var(
2154
        $variable,
2155
        int $filter = \FILTER_DEFAULT,
2156
        $options = null
2157
    ) {
2158
        /**
2159
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2160
         */
2161 2
        if (\func_num_args() < 3) {
2162 2
            $variable = \filter_var($variable, $filter);
2163
        } else {
2164 2
            $variable = \filter_var($variable, $filter, $options);
2165
        }
2166
2167 2
        return self::filter($variable);
2168
    }
2169
2170
    /**
2171
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2172
     *
2173
     * Gets multiple variables and optionally filters them.
2174
     *
2175
     * EXAMPLE: <code>
2176
     * $filters = [
2177
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2178
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2179
     *     'email' => FILTER_VALIDATE_EMAIL,
2180
     * ];
2181
     *
2182
     * $data = [
2183
     *     'name' => 'κόσμε',
2184
     *     'age' => '18',
2185
     *     'email' => '[email protected]'
2186
     * ];
2187
     *
2188
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2189
     * </code>
2190
     *
2191
     * @see http://php.net/manual/en/function.filter-var-array.php
2192
     *
2193
     * @param array<mixed>   $data       <p>
2194
     *                                   An array with string keys containing the data to filter.
2195
     *                                   </p>
2196
     * @param array|int|null $definition [optional] <p>
2197
     *                                   An array defining the arguments. A valid key is a string
2198
     *                                   containing a variable name and a valid value is either a
2199
     *                                   filter type, or an
2200
     *                                   array optionally specifying the filter, flags and options.
2201
     *                                   If the value is an array, valid keys are filter
2202
     *                                   which specifies the filter type,
2203
     *                                   flags which specifies any flags that apply to the
2204
     *                                   filter, and options which specifies any options that
2205
     *                                   apply to the filter. See the example below for a better understanding.
2206
     *                                   </p>
2207
     *                                   <p>
2208
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2209
     *                                   in the input array are filtered by this filter.
2210
     *                                   </p>
2211
     * @param bool           $add_empty  [optional] <p>
2212
     *                                   Add missing keys as <b>NULL</b> to the return value.
2213
     *                                   </p>
2214
     *
2215
     * @psalm-pure
2216
     *
2217
     * @return mixed
2218
     *               <p>
2219
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2220
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2221
     *               set.
2222
     *               </p>
2223
     */
2224 2
    public static function filter_var_array(
2225
        array $data,
2226
        $definition = null,
2227
        bool $add_empty = true
2228
    ) {
2229
        /**
2230
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2231
         */
2232 2
        if (\func_num_args() < 2) {
2233 2
            $a = \filter_var_array($data);
2234
        } else {
2235 2
            $a = \filter_var_array($data, $definition, $add_empty);
2236
        }
2237
2238 2
        return self::filter($a);
2239
    }
2240
2241
    /**
2242
     * Checks whether finfo is available on the server.
2243
     *
2244
     * @psalm-pure
2245
     *
2246
     * @return bool
2247
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2248
     *
2249
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2250
     */
2251
    public static function finfo_loaded(): bool
2252
    {
2253
        return \class_exists('finfo');
2254
    }
2255
2256
    /**
2257
     * Returns the first $n characters of the string.
2258
     *
2259
     * @param string $str      <p>The input string.</p>
2260
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2261
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2262
     *
2263
     * @psalm-pure
2264
     *
2265
     * @return string
2266
     */
2267 13
    public static function first_char(
2268
        string $str,
2269
        int $n = 1,
2270
        string $encoding = 'UTF-8'
2271
    ): string {
2272 13
        if ($str === '' || $n <= 0) {
2273 5
            return '';
2274
        }
2275
2276 8
        if ($encoding === 'UTF-8') {
2277 4
            return (string) \mb_substr($str, 0, $n);
2278
        }
2279
2280 4
        return (string) self::substr($str, 0, $n, $encoding);
2281
    }
2282
2283
    /**
2284
     * Check if the number of Unicode characters isn't greater than the specified integer.
2285
     *
2286
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2287
     *
2288
     * @param string $str      the original string to be checked
2289
     * @param int    $box_size the size in number of chars to be checked against string
2290
     *
2291
     * @psalm-pure
2292
     *
2293
     * @return bool
2294
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2295
     */
2296 2
    public static function fits_inside(string $str, int $box_size): bool
2297
    {
2298 2
        return (int) self::strlen($str) <= $box_size;
2299
    }
2300
2301
    /**
2302
     * Try to fix simple broken UTF-8 strings.
2303
     *
2304
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2305
     *
2306
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2307
     *
2308
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2309
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2310
     * See: http://en.wikipedia.org/wiki/Windows-1252
2311
     *
2312
     * @param string $str <p>The input string</p>
2313
     *
2314
     * @psalm-pure
2315
     *
2316
     * @return string
2317
     */
2318 47
    public static function fix_simple_utf8(string $str): string
2319
    {
2320 47
        if ($str === '') {
2321 4
            return '';
2322
        }
2323
2324
        /**
2325
         * @psalm-suppress ImpureStaticVariable
2326
         *
2327
         * @var array<mixed>|null
2328
         */
2329 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2330
2331
        /**
2332
         * @psalm-suppress ImpureStaticVariable
2333
         *
2334
         * @var array<mixed>|null
2335
         */
2336 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2337
2338 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2339 1
            if (self::$BROKEN_UTF8_FIX === null) {
2340 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2341
            }
2342
2343 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2344 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2345
        }
2346
2347 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2348
2349 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2350
    }
2351
2352
    /**
2353
     * Fix a double (or multiple) encoded UTF8 string.
2354
     *
2355
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2356
     *
2357
     * @param string|string[] $str you can use a string or an array of strings
2358
     *
2359
     * @psalm-pure
2360
     *
2361
     * @return string|string[]
2362
     *                         Will return the fixed input-"array" or
2363
     *                         the fixed input-"string"
2364
     *
2365
     * @psalm-suppress InvalidReturnType
2366
     */
2367 2
    public static function fix_utf8($str)
2368
    {
2369 2
        if (\is_array($str)) {
2370 2
            foreach ($str as $k => &$v) {
2371 2
                $v = self::fix_utf8($v);
2372
            }
2373 2
            unset($v);
2374
2375
            /**
2376
             * @psalm-suppress InvalidReturnStatement
2377
             */
2378 2
            return $str;
2379
        }
2380
2381 2
        $str = (string) $str;
2382 2
        $last = '';
2383 2
        while ($last !== $str) {
2384 2
            $last = $str;
2385
            /**
2386
             * @psalm-suppress PossiblyInvalidArgument
2387
             */
2388 2
            $str = self::to_utf8(
2389 2
                self::utf8_decode($str, true)
2390
            );
2391
        }
2392
2393
        /**
2394
         * @psalm-suppress InvalidReturnStatement
2395
         */
2396 2
        return $str;
2397
    }
2398
2399
    /**
2400
     * Get character of a specific character.
2401
     *
2402
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2403
     *
2404
     * @param string $char
2405
     *
2406
     * @psalm-pure
2407
     *
2408
     * @return string
2409
     *                <p>'RTL' or 'LTR'.</p>
2410
     */
2411 2
    public static function getCharDirection(string $char): string
2412
    {
2413 2
        if (self::$SUPPORT['intlChar'] === true) {
2414
            /** @noinspection PhpComposerExtensionStubsInspection */
2415 2
            $tmp_return = \IntlChar::charDirection($char);
2416
2417
            // from "IntlChar"-Class
2418
            $char_direction = [
2419 2
                'RTL' => [1, 13, 14, 15, 21],
2420
                'LTR' => [0, 11, 12, 20],
2421
            ];
2422
2423 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2424
                return 'LTR';
2425
            }
2426
2427 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2428 2
                return 'RTL';
2429
            }
2430
        }
2431
2432 2
        $c = static::chr_to_decimal($char);
2433
2434 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2435 2
            return 'LTR';
2436
        }
2437
2438 2
        if ($c <= 0x85e) {
2439 2
            if ($c === 0x5be ||
2440 2
                $c === 0x5c0 ||
2441 2
                $c === 0x5c3 ||
2442 2
                $c === 0x5c6 ||
2443 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2444 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2445 2
                $c === 0x608 ||
2446 2
                $c === 0x60b ||
2447 2
                $c === 0x60d ||
2448 2
                $c === 0x61b ||
2449 2
                ($c >= 0x61e && $c <= 0x64a) ||
2450
                ($c >= 0x66d && $c <= 0x66f) ||
2451
                ($c >= 0x671 && $c <= 0x6d5) ||
2452
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2453
                ($c >= 0x6ee && $c <= 0x6ef) ||
2454
                ($c >= 0x6fa && $c <= 0x70d) ||
2455
                $c === 0x710 ||
2456
                ($c >= 0x712 && $c <= 0x72f) ||
2457
                ($c >= 0x74d && $c <= 0x7a5) ||
2458
                $c === 0x7b1 ||
2459
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2460
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2461
                $c === 0x7fa ||
2462
                ($c >= 0x800 && $c <= 0x815) ||
2463
                $c === 0x81a ||
2464
                $c === 0x824 ||
2465
                $c === 0x828 ||
2466
                ($c >= 0x830 && $c <= 0x83e) ||
2467
                ($c >= 0x840 && $c <= 0x858) ||
2468 2
                $c === 0x85e
2469
            ) {
2470 2
                return 'RTL';
2471
            }
2472 2
        } elseif ($c === 0x200f) {
2473
            return 'RTL';
2474 2
        } elseif ($c >= 0xfb1d) {
2475 2
            if ($c === 0xfb1d ||
2476 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2477 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2478 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2479 2
                $c === 0xfb3e ||
2480 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2481 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2482 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2483 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2484 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2485 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2486 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2487 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2488 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2489 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2490 2
                $c === 0x10808 ||
2491 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2492 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2493 2
                $c === 0x1083c ||
2494 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2495 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2496 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2497 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2498 2
                $c === 0x1093f ||
2499 2
                $c === 0x10a00 ||
2500 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2501 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2502 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2503 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2504 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2505 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2506 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2507 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2508 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2509 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2510
            ) {
2511 2
                return 'RTL';
2512
            }
2513
        }
2514
2515 2
        return 'LTR';
2516
    }
2517
2518
    /**
2519
     * Check for php-support.
2520
     *
2521
     * @param string|null $key
2522
     *
2523
     * @psalm-pure
2524
     *
2525
     * @return mixed
2526
     *               Return the full support-"array", if $key === null<br>
2527
     *               return bool-value, if $key is used and available<br>
2528
     *               otherwise return <strong>null</strong>
2529
     */
2530 27
    public static function getSupportInfo(string $key = null)
2531
    {
2532 27
        if ($key === null) {
2533 4
            return self::$SUPPORT;
2534
        }
2535
2536 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2537 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2538
        }
2539
        // compatibility fix for old versions
2540 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2541
2542 25
        return self::$SUPPORT[$key] ?? null;
2543
    }
2544
2545
    /**
2546
     * Warning: this method only works for some file-types (png, jpg)
2547
     *          if you need more supported types, please use e.g. "finfo"
2548
     *
2549
     * @param string $str
2550
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2551
     *
2552
     * @psalm-pure
2553
     *
2554
     * @return null[]|string[]
2555
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2556
     *
2557
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2558
     */
2559 40
    public static function get_file_type(
2560
        string $str,
2561
        array $fallback = [
2562
            'ext'  => null,
2563
            'mime' => 'application/octet-stream',
2564
            'type' => null,
2565
        ]
2566
    ): array {
2567 40
        if ($str === '') {
2568
            return $fallback;
2569
        }
2570
2571
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2572 40
        $str_info = \substr($str, 0, 2);
2573 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2574 11
            return $fallback;
2575
        }
2576
2577
        // DEBUG
2578
        //var_dump($str_info);
2579
2580 36
        $str_info = \unpack('C2chars', $str_info);
2581
2582
        /** @noinspection PhpSillyAssignmentInspection */
2583
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2584 36
        $str_info = $str_info;
2585
2586 36
        if ($str_info === false) {
2587
            return $fallback;
2588
        }
2589
        /** @noinspection OffsetOperationsInspection */
2590 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2591
2592
        // DEBUG
2593
        //var_dump($type_code);
2594
2595
        //
2596
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2597
        //
2598
        switch ($type_code) {
2599
            // WARNING: do not add too simple comparisons, because of false-positive results:
2600
            //
2601
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2602
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2603
            //
2604 36
            case 255216:
2605
                $ext = 'jpg';
2606
                $mime = 'image/jpeg';
2607
                $type = 'binary';
2608
2609
                break;
2610 36
            case 13780:
2611 7
                $ext = 'png';
2612 7
                $mime = 'image/png';
2613 7
                $type = 'binary';
2614
2615 7
                break;
2616
            default:
2617 35
                return $fallback;
2618
        }
2619
2620
        return [
2621 7
            'ext'  => $ext,
2622 7
            'mime' => $mime,
2623 7
            'type' => $type,
2624
        ];
2625
    }
2626
2627
    /**
2628
     * @param int    $length         <p>Length of the random string.</p>
2629
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2630
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 1
    public static function get_random_string(
2635
        int $length,
2636
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2637
        string $encoding = 'UTF-8'
2638
    ): string {
2639
        // init
2640 1
        $i = 0;
2641 1
        $str = '';
2642
2643
        //
2644
        // add random chars
2645
        //
2646
2647 1
        if ($encoding === 'UTF-8') {
2648 1
            $max_length = (int) \mb_strlen($possible_chars);
2649 1
            if ($max_length === 0) {
2650 1
                return '';
2651
            }
2652
2653 1
            while ($i < $length) {
2654
                try {
2655 1
                    $rand_int = \random_int(0, $max_length - 1);
2656
                } catch (\Exception $e) {
2657
                    /** @noinspection RandomApiMigrationInspection */
2658
                    $rand_int = \mt_rand(0, $max_length - 1);
2659
                }
2660 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2661 1
                if ($char !== false) {
2662 1
                    $str .= $char;
2663 1
                    ++$i;
2664
                }
2665
            }
2666
        } else {
2667
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2668
2669
            $max_length = (int) self::strlen($possible_chars, $encoding);
2670
            if ($max_length === 0) {
2671
                return '';
2672
            }
2673
2674
            while ($i < $length) {
2675
                try {
2676
                    $rand_int = \random_int(0, $max_length - 1);
2677
                } catch (\Exception $e) {
2678
                    /** @noinspection RandomApiMigrationInspection */
2679
                    $rand_int = \mt_rand(0, $max_length - 1);
2680
                }
2681
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2682
                if ($char !== false) {
2683
                    $str .= $char;
2684
                    ++$i;
2685
                }
2686
            }
2687
        }
2688
2689 1
        return $str;
2690
    }
2691
2692
    /**
2693
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2694
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2695
     *
2696
     * @return string
2697
     */
2698 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2699
    {
2700
        try {
2701 1
            $rand_int = \random_int(0, \mt_getrandmax());
2702
        } catch (\Exception $e) {
2703
            /** @noinspection RandomApiMigrationInspection */
2704
            $rand_int = \mt_rand(0, \mt_getrandmax());
2705
        }
2706
2707
        $unique_helper = $rand_int .
2708 1
                         \session_id() .
2709 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2710 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2711 1
                         $extra_entropy;
2712
2713 1
        $unique_string = \uniqid($unique_helper, true);
2714
2715 1
        if ($use_md5) {
2716 1
            $unique_string = \md5($unique_string . $unique_helper);
2717
        }
2718
2719 1
        return $unique_string;
2720
    }
2721
2722
    /**
2723
     * alias for "UTF8::string_has_bom()"
2724
     *
2725
     * @param string $str
2726
     *
2727
     * @psalm-pure
2728
     *
2729
     * @return bool
2730
     *
2731
     * @see        UTF8::string_has_bom()
2732
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2733
     */
2734 2
    public static function hasBom(string $str): bool
2735
    {
2736 2
        return self::string_has_bom($str);
2737
    }
2738
2739
    /**
2740
     * Returns true if the string contains a lower case char, false otherwise.
2741
     *
2742
     * @param string $str <p>The input string.</p>
2743
     *
2744
     * @psalm-pure
2745
     *
2746
     * @return bool
2747
     *              <p>Whether or not the string contains a lower case character.</p>
2748
     */
2749 47
    public static function has_lowercase(string $str): bool
2750
    {
2751 47
        if (self::$SUPPORT['mbstring'] === true) {
2752
            /** @noinspection PhpComposerExtensionStubsInspection */
2753 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2754
        }
2755
2756
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2757
    }
2758
2759
    /**
2760
     * Returns true if the string contains whitespace, false otherwise.
2761
     *
2762
     * @param string $str <p>The input string.</p>
2763
     *
2764
     * @psalm-pure
2765
     *
2766
     * @return bool
2767
     *              <p>Whether or not the string contains whitespace.</p>
2768
     */
2769 11
    public static function has_whitespace(string $str): bool
2770
    {
2771 11
        if (self::$SUPPORT['mbstring'] === true) {
2772
            /** @noinspection PhpComposerExtensionStubsInspection */
2773 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2774
        }
2775
2776
        return self::str_matches_pattern($str, '.*[[:space:]]');
2777
    }
2778
2779
    /**
2780
     * Returns true if the string contains an upper case char, false otherwise.
2781
     *
2782
     * @param string $str <p>The input string.</p>
2783
     *
2784
     * @psalm-pure
2785
     *
2786
     * @return bool
2787
     *              <p>Whether or not the string contains an upper case character.</p>
2788
     */
2789 12
    public static function has_uppercase(string $str): bool
2790
    {
2791 12
        if (self::$SUPPORT['mbstring'] === true) {
2792
            /** @noinspection PhpComposerExtensionStubsInspection */
2793 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2794
        }
2795
2796
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2797
    }
2798
2799
    /**
2800
     * Converts a hexadecimal value into a UTF-8 character.
2801
     *
2802
     * INFO: opposite to UTF8::chr_to_hex()
2803
     *
2804
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2805
     *
2806
     * @param string $hexdec <p>The hexadecimal value.</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return false|string one single UTF-8 character
2811
     */
2812 4
    public static function hex_to_chr(string $hexdec)
2813
    {
2814
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2815 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2816
    }
2817
2818
    /**
2819
     * Converts hexadecimal U+xxxx code point representation to integer.
2820
     *
2821
     * INFO: opposite to UTF8::int_to_hex()
2822
     *
2823
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2824
     *
2825
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2826
     *
2827
     * @psalm-pure
2828
     *
2829
     * @return false|int
2830
     *                   <p>The code point, or false on failure.</p>
2831
     */
2832 2
    public static function hex_to_int($hexdec)
2833
    {
2834
        // init
2835 2
        $hexdec = (string) $hexdec;
2836
2837 2
        if ($hexdec === '') {
2838 2
            return false;
2839
        }
2840
2841 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2842 2
            return \intval($match[1], 16);
2843
        }
2844
2845 2
        return false;
2846
    }
2847
2848
    /**
2849
     * alias for "UTF8::html_entity_decode()"
2850
     *
2851
     * @param string   $str
2852
     * @param int|null $flags
2853
     * @param string   $encoding
2854
     *
2855
     * @psalm-pure
2856
     *
2857
     * @return string
2858
     *
2859
     * @see        UTF8::html_entity_decode()
2860
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2861
     */
2862 2
    public static function html_decode(
2863
        string $str,
2864
        int $flags = null,
2865
        string $encoding = 'UTF-8'
2866
    ): string {
2867 2
        return self::html_entity_decode($str, $flags, $encoding);
2868
    }
2869
2870
    /**
2871
     * Converts a UTF-8 string to a series of HTML numbered entities.
2872
     *
2873
     * INFO: opposite to UTF8::html_decode()
2874
     *
2875
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2876
     *
2877
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2878
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2879
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2880
     *
2881
     * @psalm-pure
2882
     *
2883
     * @return string HTML numbered entities
2884
     */
2885 14
    public static function html_encode(
2886
        string $str,
2887
        bool $keep_ascii_chars = false,
2888
        string $encoding = 'UTF-8'
2889
    ): string {
2890 14
        if ($str === '') {
2891 4
            return '';
2892
        }
2893
2894 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2895 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2896
        }
2897
2898
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2899 14
        if (self::$SUPPORT['mbstring'] === true) {
2900 14
            $start_code = 0x00;
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            }
2904
2905 14
            if ($encoding === 'UTF-8') {
2906
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2907 14
                $return = \mb_encode_numericentity(
2908 14
                    $str,
2909 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2910
                );
2911 14
                if ($return !== null && $return !== false) {
2912 14
                    return $return;
2913
                }
2914
            }
2915
2916
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2917 4
            $return = \mb_encode_numericentity(
2918 4
                $str,
2919 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2920 4
                $encoding
2921
            );
2922 4
            if ($return !== null && $return !== false) {
2923 4
                return $return;
2924
            }
2925
        }
2926
2927
        //
2928
        // fallback via vanilla php
2929
        //
2930
2931
        return \implode(
2932
            '',
2933
            \array_map(
2934
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2935
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2936
                },
2937
                self::str_split($str)
2938
            )
2939
        );
2940
    }
2941
2942
    /**
2943
     * UTF-8 version of html_entity_decode()
2944
     *
2945
     * The reason we are not using html_entity_decode() by itself is because
2946
     * while it is not technically correct to leave out the semicolon
2947
     * at the end of an entity most browsers will still interpret the entity
2948
     * correctly. html_entity_decode() does not convert entities without
2949
     * semicolons, so we are left with our own little solution here. Bummer.
2950
     *
2951
     * Convert all HTML entities to their applicable characters.
2952
     *
2953
     * INFO: opposite to UTF8::html_encode()
2954
     *
2955
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2956
     *
2957
     * @see http://php.net/manual/en/function.html-entity-decode.php
2958
     *
2959
     * @param string   $str      <p>
2960
     *                           The input string.
2961
     *                           </p>
2962
     * @param int|null $flags    [optional] <p>
2963
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2964
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2965
     *                           <table>
2966
     *                           Available <i>flags</i> constants
2967
     *                           <tr valign="top">
2968
     *                           <td>Constant Name</td>
2969
     *                           <td>Description</td>
2970
     *                           </tr>
2971
     *                           <tr valign="top">
2972
     *                           <td><b>ENT_COMPAT</b></td>
2973
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2974
     *                           </tr>
2975
     *                           <tr valign="top">
2976
     *                           <td><b>ENT_QUOTES</b></td>
2977
     *                           <td>Will convert both double and single quotes.</td>
2978
     *                           </tr>
2979
     *                           <tr valign="top">
2980
     *                           <td><b>ENT_NOQUOTES</b></td>
2981
     *                           <td>Will leave both double and single quotes unconverted.</td>
2982
     *                           </tr>
2983
     *                           <tr valign="top">
2984
     *                           <td><b>ENT_HTML401</b></td>
2985
     *                           <td>
2986
     *                           Handle code as HTML 4.01.
2987
     *                           </td>
2988
     *                           </tr>
2989
     *                           <tr valign="top">
2990
     *                           <td><b>ENT_XML1</b></td>
2991
     *                           <td>
2992
     *                           Handle code as XML 1.
2993
     *                           </td>
2994
     *                           </tr>
2995
     *                           <tr valign="top">
2996
     *                           <td><b>ENT_XHTML</b></td>
2997
     *                           <td>
2998
     *                           Handle code as XHTML.
2999
     *                           </td>
3000
     *                           </tr>
3001
     *                           <tr valign="top">
3002
     *                           <td><b>ENT_HTML5</b></td>
3003
     *                           <td>
3004
     *                           Handle code as HTML 5.
3005
     *                           </td>
3006
     *                           </tr>
3007
     *                           </table>
3008
     *                           </p>
3009
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3010
     *
3011
     * @psalm-pure
3012
     *
3013
     * @return string the decoded string
3014
     */
3015 51
    public static function html_entity_decode(
3016
        string $str,
3017
        int $flags = null,
3018
        string $encoding = 'UTF-8'
3019
    ): string {
3020
        if (
3021 51
            !isset($str[3]) // examples: &; || &x;
3022
            ||
3023 51
            \strpos($str, '&') === false // no "&"
3024
        ) {
3025 24
            return $str;
3026
        }
3027
3028 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3029 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3030
        }
3031
3032 49
        if ($flags === null) {
3033 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3034
        }
3035
3036
        /** @noinspection InArrayCanBeUsedInspection */
3037
        if (
3038 49
            $encoding !== 'UTF-8'
3039
            &&
3040 49
            $encoding !== 'ISO-8859-1'
3041
            &&
3042 49
            $encoding !== 'WINDOWS-1252'
3043
            &&
3044 49
            self::$SUPPORT['mbstring'] === false
3045
        ) {
3046
            /**
3047
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3048
             */
3049
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3050
        }
3051
3052
        do {
3053 49
            $str_compare = $str;
3054
3055 49
            if (\strpos($str, '&') !== false) {
3056 49
                if (\strpos($str, '&#') !== false) {
3057
                    // decode also numeric & UTF16 two byte entities
3058 41
                    $str = (string) \preg_replace(
3059 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3060 41
                        '$1;',
3061 41
                        $str
3062
                    );
3063
                }
3064
3065 49
                $str = \html_entity_decode(
3066 49
                    $str,
3067 49
                    $flags,
3068 49
                    $encoding
3069
                );
3070
            }
3071 49
        } while ($str_compare !== $str);
3072
3073 49
        return $str;
3074
    }
3075
3076
    /**
3077
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3078
     *
3079
     * @param string $str
3080
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3081
     *
3082
     * @psalm-pure
3083
     *
3084
     * @return string
3085
     */
3086 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3087
    {
3088 6
        return self::htmlspecialchars(
3089 6
            $str,
3090 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3091 6
            $encoding
3092
        );
3093
    }
3094
3095
    /**
3096
     * Remove empty html-tag.
3097
     *
3098
     * e.g.: <pre><tag></tag></pre>
3099
     *
3100
     * @param string $str
3101
     *
3102
     * @psalm-pure
3103
     *
3104
     * @return string
3105
     */
3106 1
    public static function html_stripe_empty_tags(string $str): string
3107
    {
3108 1
        return (string) \preg_replace(
3109 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3110 1
            '',
3111 1
            $str
3112
        );
3113
    }
3114
3115
    /**
3116
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3117
     *
3118
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3119
     *
3120
     * @see http://php.net/manual/en/function.htmlentities.php
3121
     *
3122
     * @param string $str           <p>
3123
     *                              The input string.
3124
     *                              </p>
3125
     * @param int    $flags         [optional] <p>
3126
     *                              A bitmask of one or more of the following flags, which specify how to handle
3127
     *                              quotes, invalid code unit sequences and the used document type. The default is
3128
     *                              ENT_COMPAT | ENT_HTML401.
3129
     *                              <table>
3130
     *                              Available <i>flags</i> constants
3131
     *                              <tr valign="top">
3132
     *                              <td>Constant Name</td>
3133
     *                              <td>Description</td>
3134
     *                              </tr>
3135
     *                              <tr valign="top">
3136
     *                              <td><b>ENT_COMPAT</b></td>
3137
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3138
     *                              </tr>
3139
     *                              <tr valign="top">
3140
     *                              <td><b>ENT_QUOTES</b></td>
3141
     *                              <td>Will convert both double and single quotes.</td>
3142
     *                              </tr>
3143
     *                              <tr valign="top">
3144
     *                              <td><b>ENT_NOQUOTES</b></td>
3145
     *                              <td>Will leave both double and single quotes unconverted.</td>
3146
     *                              </tr>
3147
     *                              <tr valign="top">
3148
     *                              <td><b>ENT_IGNORE</b></td>
3149
     *                              <td>
3150
     *                              Silently discard invalid code unit sequences instead of returning
3151
     *                              an empty string. Using this flag is discouraged as it
3152
     *                              may have security implications.
3153
     *                              </td>
3154
     *                              </tr>
3155
     *                              <tr valign="top">
3156
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3157
     *                              <td>
3158
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3159
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3160
     *                              string.
3161
     *                              </td>
3162
     *                              </tr>
3163
     *                              <tr valign="top">
3164
     *                              <td><b>ENT_DISALLOWED</b></td>
3165
     *                              <td>
3166
     *                              Replace invalid code points for the given document type with a
3167
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3168
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3169
     *                              instance, to ensure the well-formedness of XML documents with
3170
     *                              embedded external content.
3171
     *                              </td>
3172
     *                              </tr>
3173
     *                              <tr valign="top">
3174
     *                              <td><b>ENT_HTML401</b></td>
3175
     *                              <td>
3176
     *                              Handle code as HTML 4.01.
3177
     *                              </td>
3178
     *                              </tr>
3179
     *                              <tr valign="top">
3180
     *                              <td><b>ENT_XML1</b></td>
3181
     *                              <td>
3182
     *                              Handle code as XML 1.
3183
     *                              </td>
3184
     *                              </tr>
3185
     *                              <tr valign="top">
3186
     *                              <td><b>ENT_XHTML</b></td>
3187
     *                              <td>
3188
     *                              Handle code as XHTML.
3189
     *                              </td>
3190
     *                              </tr>
3191
     *                              <tr valign="top">
3192
     *                              <td><b>ENT_HTML5</b></td>
3193
     *                              <td>
3194
     *                              Handle code as HTML 5.
3195
     *                              </td>
3196
     *                              </tr>
3197
     *                              </table>
3198
     *                              </p>
3199
     * @param string $encoding      [optional] <p>
3200
     *                              Like <b>htmlspecialchars</b>,
3201
     *                              <b>htmlentities</b> takes an optional third argument
3202
     *                              <i>encoding</i> which defines encoding used in
3203
     *                              conversion.
3204
     *                              Although this argument is technically optional, you are highly
3205
     *                              encouraged to specify the correct value for your code.
3206
     *                              </p>
3207
     * @param bool   $double_encode [optional] <p>
3208
     *                              When <i>double_encode</i> is turned off PHP will not
3209
     *                              encode existing html entities. The default is to convert everything.
3210
     *                              </p>
3211
     *
3212
     * @psalm-pure
3213
     *
3214
     * @return string
3215
     *                <p>
3216
     *                The encoded string.
3217
     *                <br><br>
3218
     *                If the input <i>string</i> contains an invalid code unit
3219
     *                sequence within the given <i>encoding</i> an empty string
3220
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3221
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3222
     *                </p>
3223
     */
3224 9
    public static function htmlentities(
3225
        string $str,
3226
        int $flags = \ENT_COMPAT,
3227
        string $encoding = 'UTF-8',
3228
        bool $double_encode = true
3229
    ): string {
3230 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3231 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3232
        }
3233
3234 9
        $str = \htmlentities(
3235 9
            $str,
3236 9
            $flags,
3237 9
            $encoding,
3238 9
            $double_encode
3239
        );
3240
3241
        /**
3242
         * PHP doesn't replace a backslash to its html entity since this is something
3243
         * that's mostly used to escape characters when inserting in a database. Since
3244
         * we're using a decent database layer, we don't need this shit and we're replacing
3245
         * the double backslashes by its' html entity equivalent.
3246
         *
3247
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3248
         */
3249 9
        $str = \str_replace('\\', '&#92;', $str);
3250
3251 9
        return self::html_encode($str, true, $encoding);
3252
    }
3253
3254
    /**
3255
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3256
     *
3257
     * INFO: Take a look at "UTF8::htmlentities()"
3258
     *
3259
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3260
     *
3261
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3262
     *
3263
     * @param string $str           <p>
3264
     *                              The string being converted.
3265
     *                              </p>
3266
     * @param int    $flags         [optional] <p>
3267
     *                              A bitmask of one or more of the following flags, which specify how to handle
3268
     *                              quotes, invalid code unit sequences and the used document type. The default is
3269
     *                              ENT_COMPAT | ENT_HTML401.
3270
     *                              <table>
3271
     *                              Available <i>flags</i> constants
3272
     *                              <tr valign="top">
3273
     *                              <td>Constant Name</td>
3274
     *                              <td>Description</td>
3275
     *                              </tr>
3276
     *                              <tr valign="top">
3277
     *                              <td><b>ENT_COMPAT</b></td>
3278
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3279
     *                              </tr>
3280
     *                              <tr valign="top">
3281
     *                              <td><b>ENT_QUOTES</b></td>
3282
     *                              <td>Will convert both double and single quotes.</td>
3283
     *                              </tr>
3284
     *                              <tr valign="top">
3285
     *                              <td><b>ENT_NOQUOTES</b></td>
3286
     *                              <td>Will leave both double and single quotes unconverted.</td>
3287
     *                              </tr>
3288
     *                              <tr valign="top">
3289
     *                              <td><b>ENT_IGNORE</b></td>
3290
     *                              <td>
3291
     *                              Silently discard invalid code unit sequences instead of returning
3292
     *                              an empty string. Using this flag is discouraged as it
3293
     *                              may have security implications.
3294
     *                              </td>
3295
     *                              </tr>
3296
     *                              <tr valign="top">
3297
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3298
     *                              <td>
3299
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3300
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3301
     *                              string.
3302
     *                              </td>
3303
     *                              </tr>
3304
     *                              <tr valign="top">
3305
     *                              <td><b>ENT_DISALLOWED</b></td>
3306
     *                              <td>
3307
     *                              Replace invalid code points for the given document type with a
3308
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3309
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3310
     *                              instance, to ensure the well-formedness of XML documents with
3311
     *                              embedded external content.
3312
     *                              </td>
3313
     *                              </tr>
3314
     *                              <tr valign="top">
3315
     *                              <td><b>ENT_HTML401</b></td>
3316
     *                              <td>
3317
     *                              Handle code as HTML 4.01.
3318
     *                              </td>
3319
     *                              </tr>
3320
     *                              <tr valign="top">
3321
     *                              <td><b>ENT_XML1</b></td>
3322
     *                              <td>
3323
     *                              Handle code as XML 1.
3324
     *                              </td>
3325
     *                              </tr>
3326
     *                              <tr valign="top">
3327
     *                              <td><b>ENT_XHTML</b></td>
3328
     *                              <td>
3329
     *                              Handle code as XHTML.
3330
     *                              </td>
3331
     *                              </tr>
3332
     *                              <tr valign="top">
3333
     *                              <td><b>ENT_HTML5</b></td>
3334
     *                              <td>
3335
     *                              Handle code as HTML 5.
3336
     *                              </td>
3337
     *                              </tr>
3338
     *                              </table>
3339
     *                              </p>
3340
     * @param string $encoding      [optional] <p>
3341
     *                              Defines encoding used in conversion.
3342
     *                              </p>
3343
     *                              <p>
3344
     *                              For the purposes of this function, the encodings
3345
     *                              ISO-8859-1, ISO-8859-15,
3346
     *                              UTF-8, cp866,
3347
     *                              cp1251, cp1252, and
3348
     *                              KOI8-R are effectively equivalent, provided the
3349
     *                              <i>string</i> itself is valid for the encoding, as
3350
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3351
     *                              the same positions in all of these encodings.
3352
     *                              </p>
3353
     * @param bool   $double_encode [optional] <p>
3354
     *                              When <i>double_encode</i> is turned off PHP will not
3355
     *                              encode existing html entities, the default is to convert everything.
3356
     *                              </p>
3357
     *
3358
     * @psalm-pure
3359
     *
3360
     * @return string the converted string.
3361
     *                </p>
3362
     *                <p>
3363
     *                If the input <i>string</i> contains an invalid code unit
3364
     *                sequence within the given <i>encoding</i> an empty string
3365
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3366
     *                <b>ENT_SUBSTITUTE</b> flags are set
3367
     */
3368 8
    public static function htmlspecialchars(
3369
        string $str,
3370
        int $flags = \ENT_COMPAT,
3371
        string $encoding = 'UTF-8',
3372
        bool $double_encode = true
3373
    ): string {
3374 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3375 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3376
        }
3377
3378 8
        return \htmlspecialchars(
3379 8
            $str,
3380 8
            $flags,
3381 8
            $encoding,
3382 8
            $double_encode
3383
        );
3384
    }
3385
3386
    /**
3387
     * Checks whether iconv is available on the server.
3388
     *
3389
     * @psalm-pure
3390
     *
3391
     * @return bool
3392
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3393
     *
3394
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3395
     */
3396
    public static function iconv_loaded(): bool
3397
    {
3398
        return \extension_loaded('iconv');
3399
    }
3400
3401
    /**
3402
     * alias for "UTF8::decimal_to_chr()"
3403
     *
3404
     * @param int|string $int
3405
     *
3406
     * @psalm-param int|numeric-string $int
3407
     *
3408
     * @psalm-pure
3409
     *
3410
     * @return string
3411
     *
3412
     * @see        UTF8::decimal_to_chr()
3413
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3414
     */
3415 4
    public static function int_to_chr($int): string
3416
    {
3417 4
        return self::decimal_to_chr($int);
3418
    }
3419
3420
    /**
3421
     * Converts Integer to hexadecimal U+xxxx code point representation.
3422
     *
3423
     * INFO: opposite to UTF8::hex_to_int()
3424
     *
3425
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3426
     *
3427
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3428
     * @param string $prefix [optional]
3429
     *
3430
     * @psalm-pure
3431
     *
3432
     * @return string the code point, or empty string on failure
3433
     */
3434 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3435
    {
3436 6
        $hex = \dechex($int);
3437
3438 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3439
3440 6
        return $prefix . $hex . '';
3441
    }
3442
3443
    /**
3444
     * Checks whether intl-char is available on the server.
3445
     *
3446
     * @psalm-pure
3447
     *
3448
     * @return bool
3449
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3450
     *
3451
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3452
     */
3453
    public static function intlChar_loaded(): bool
3454
    {
3455
        return \class_exists('IntlChar');
3456
    }
3457
3458
    /**
3459
     * Checks whether intl is available on the server.
3460
     *
3461
     * @psalm-pure
3462
     *
3463
     * @return bool
3464
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3465
     *
3466
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3467
     */
3468 5
    public static function intl_loaded(): bool
3469
    {
3470 5
        return \extension_loaded('intl');
3471
    }
3472
3473
    /**
3474
     * alias for "UTF8::is_ascii()"
3475
     *
3476
     * @param string $str
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *
3482
     * @see        UTF8::is_ascii()
3483
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3484
     */
3485 2
    public static function isAscii(string $str): bool
3486
    {
3487 2
        return ASCII::is_ascii($str);
3488
    }
3489
3490
    /**
3491
     * alias for "UTF8::is_base64()"
3492
     *
3493
     * @param string $str
3494
     *
3495
     * @psalm-pure
3496
     *
3497
     * @return bool
3498
     *
3499
     * @see        UTF8::is_base64()
3500
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3501
     */
3502 2
    public static function isBase64($str): bool
3503
    {
3504 2
        return self::is_base64($str);
3505
    }
3506
3507
    /**
3508
     * alias for "UTF8::is_binary()"
3509
     *
3510
     * @param int|string $str
3511
     * @param bool       $strict
3512
     *
3513
     * @psalm-pure
3514
     *
3515
     * @return bool
3516
     *
3517
     * @see        UTF8::is_binary()
3518
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3519
     */
3520 4
    public static function isBinary($str, bool $strict = false): bool
3521
    {
3522 4
        return self::is_binary($str, $strict);
3523
    }
3524
3525
    /**
3526
     * alias for "UTF8::is_bom()"
3527
     *
3528
     * @param string $utf8_chr
3529
     *
3530
     * @psalm-pure
3531
     *
3532
     * @return bool
3533
     *
3534
     * @see        UTF8::is_bom()
3535
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3536
     */
3537 2
    public static function isBom(string $utf8_chr): bool
3538
    {
3539 2
        return self::is_bom($utf8_chr);
3540
    }
3541
3542
    /**
3543
     * alias for "UTF8::is_html()"
3544
     *
3545
     * @param string $str
3546
     *
3547
     * @psalm-pure
3548
     *
3549
     * @return bool
3550
     *
3551
     * @see        UTF8::is_html()
3552
     * @deprecated <p>please use "UTF8::is_html()"</p>
3553
     */
3554 2
    public static function isHtml(string $str): bool
3555
    {
3556 2
        return self::is_html($str);
3557
    }
3558
3559
    /**
3560
     * alias for "UTF8::is_json()"
3561
     *
3562
     * @param string $str
3563
     *
3564
     * @return bool
3565
     *
3566
     * @see        UTF8::is_json()
3567
     * @deprecated <p>please use "UTF8::is_json()"</p>
3568
     */
3569 1
    public static function isJson(string $str): bool
3570
    {
3571 1
        return self::is_json($str);
3572
    }
3573
3574
    /**
3575
     * alias for "UTF8::is_utf16()"
3576
     *
3577
     * @param string $str
3578
     *
3579
     * @psalm-pure
3580
     *
3581
     * @return false|int
3582
     *                   <strong>false</strong> if is't not UTF16,<br>
3583
     *                   <strong>1</strong> for UTF-16LE,<br>
3584
     *                   <strong>2</strong> for UTF-16BE
3585
     *
3586
     * @see        UTF8::is_utf16()
3587
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3588
     */
3589 2
    public static function isUtf16($str)
3590
    {
3591 2
        return self::is_utf16($str);
3592
    }
3593
3594
    /**
3595
     * alias for "UTF8::is_utf32()"
3596
     *
3597
     * @param string $str
3598
     *
3599
     * @psalm-pure
3600
     *
3601
     * @return false|int
3602
     *                   <strong>false</strong> if is't not UTF16,
3603
     *                   <strong>1</strong> for UTF-32LE,
3604
     *                   <strong>2</strong> for UTF-32BE
3605
     *
3606
     * @see        UTF8::is_utf32()
3607
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3608
     */
3609 2
    public static function isUtf32($str)
3610
    {
3611 2
        return self::is_utf32($str);
3612
    }
3613
3614
    /**
3615
     * alias for "UTF8::is_utf8()"
3616
     *
3617
     * @param string $str
3618
     * @param bool   $strict
3619
     *
3620
     * @psalm-pure
3621
     *
3622
     * @return bool
3623
     *
3624
     * @see        UTF8::is_utf8()
3625
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3626
     */
3627 17
    public static function isUtf8($str, bool $strict = false): bool
3628
    {
3629 17
        return self::is_utf8($str, $strict);
3630
    }
3631
3632
    /**
3633
     * Returns true if the string contains only alphabetic chars, false otherwise.
3634
     *
3635
     * @param string $str <p>The input string.</p>
3636
     *
3637
     * @psalm-pure
3638
     *
3639
     * @return bool
3640
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3641
     */
3642 10
    public static function is_alpha(string $str): bool
3643
    {
3644 10
        if (self::$SUPPORT['mbstring'] === true) {
3645
            /** @noinspection PhpComposerExtensionStubsInspection */
3646 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3647
        }
3648
3649
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3650
    }
3651
3652
    /**
3653
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3654
     *
3655
     * @param string $str <p>The input string.</p>
3656
     *
3657
     * @psalm-pure
3658
     *
3659
     * @return bool
3660
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3661
     */
3662 13
    public static function is_alphanumeric(string $str): bool
3663
    {
3664 13
        if (self::$SUPPORT['mbstring'] === true) {
3665
            /** @noinspection PhpComposerExtensionStubsInspection */
3666 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3667
        }
3668
3669
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3670
    }
3671
3672
    /**
3673
     * Returns true if the string contains only punctuation chars, false otherwise.
3674
     *
3675
     * @param string $str <p>The input string.</p>
3676
     *
3677
     * @psalm-pure
3678
     *
3679
     * @return bool
3680
     *              <p>Whether or not $str contains only punctuation chars.</p>
3681
     */
3682 10
    public static function is_punctuation(string $str): bool
3683
    {
3684 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3685
    }
3686
3687
    /**
3688
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3689
     *
3690
     * @param string $str <p>The input string.</p>
3691
     *
3692
     * @psalm-pure
3693
     *
3694
     * @return bool
3695
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3696
     */
3697 1
    public static function is_printable(string $str): bool
3698
    {
3699 1
        return self::remove_invisible_characters($str) === $str;
3700
    }
3701
3702
    /**
3703
     * Checks if a string is 7 bit ASCII.
3704
     *
3705
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3706
     *
3707
     * @param string $str <p>The string to check.</p>
3708
     *
3709
     * @psalm-pure
3710
     *
3711
     * @return bool
3712
     *              <p>
3713
     *              <strong>true</strong> if it is ASCII<br>
3714
     *              <strong>false</strong> otherwise
3715
     *              </p>
3716
     */
3717 8
    public static function is_ascii(string $str): bool
3718
    {
3719 8
        return ASCII::is_ascii($str);
3720
    }
3721
3722
    /**
3723
     * Returns true if the string is base64 encoded, false otherwise.
3724
     *
3725
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3726
     *
3727
     * @param string|null $str                   <p>The input string.</p>
3728
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3729
     *
3730
     * @psalm-pure
3731
     *
3732
     * @return bool
3733
     *              <p>Whether or not $str is base64 encoded.</p>
3734
     */
3735 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3736
    {
3737
        if (
3738 16
            !$empty_string_is_valid
3739
            &&
3740 16
            $str === ''
3741
        ) {
3742 3
            return false;
3743
        }
3744
3745 15
        if (!\is_string($str)) {
3746 2
            return false;
3747
        }
3748
3749 15
        $base64String = \base64_decode($str, true);
3750
3751 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3752
    }
3753
3754
    /**
3755
     * Check if the input is binary... (is look like a hack).
3756
     *
3757
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3758
     *
3759
     * @param int|string $input
3760
     * @param bool       $strict
3761
     *
3762
     * @psalm-pure
3763
     *
3764
     * @return bool
3765
     */
3766 40
    public static function is_binary($input, bool $strict = false): bool
3767
    {
3768 40
        $input = (string) $input;
3769 40
        if ($input === '') {
3770 10
            return false;
3771
        }
3772
3773 40
        if (\preg_match('~^[01]+$~', $input)) {
3774 13
            return true;
3775
        }
3776
3777 40
        $ext = self::get_file_type($input);
3778 40
        if ($ext['type'] === 'binary') {
3779 7
            return true;
3780
        }
3781
3782 39
        $test_length = \strlen($input);
3783 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3784 39
        if (($test_null_counting / $test_length) > 0.25) {
3785 15
            return true;
3786
        }
3787
3788 35
        if ($strict) {
3789 35
            if (self::$SUPPORT['finfo'] === false) {
3790
                throw new \RuntimeException('ext-fileinfo: is not installed');
3791
            }
3792
3793
            /**
3794
             * @noinspection   PhpComposerExtensionStubsInspection
3795
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3796
             */
3797 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3798 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3799 15
                return true;
3800
            }
3801
        }
3802
3803 31
        return false;
3804
    }
3805
3806
    /**
3807
     * Check if the file is binary.
3808
     *
3809
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3810
     *
3811
     * @param string $file
3812
     *
3813
     * @return bool
3814
     */
3815 6
    public static function is_binary_file($file): bool
3816
    {
3817
        // init
3818 6
        $block = '';
3819
3820 6
        $fp = \fopen($file, 'rb');
3821 6
        if (\is_resource($fp)) {
3822 6
            $block = \fread($fp, 512);
3823 6
            \fclose($fp);
3824
        }
3825
3826 6
        if ($block === '' || $block === false) {
3827 2
            return false;
3828
        }
3829
3830 6
        return self::is_binary($block, true);
3831
    }
3832
3833
    /**
3834
     * Returns true if the string contains only whitespace chars, false otherwise.
3835
     *
3836
     * @param string $str <p>The input string.</p>
3837
     *
3838
     * @psalm-pure
3839
     *
3840
     * @return bool
3841
     *              <p>Whether or not $str contains only whitespace characters.</p>
3842
     */
3843 15
    public static function is_blank(string $str): bool
3844
    {
3845 15
        if (self::$SUPPORT['mbstring'] === true) {
3846
            /** @noinspection PhpComposerExtensionStubsInspection */
3847 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3848
        }
3849
3850
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3851
    }
3852
3853
    /**
3854
     * Checks if the given string is equal to any "Byte Order Mark".
3855
     *
3856
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3857
     *
3858
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3859
     *
3860
     * @param string $str <p>The input string.</p>
3861
     *
3862
     * @psalm-pure
3863
     *
3864
     * @return bool
3865
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3866
     */
3867 2
    public static function is_bom($str): bool
3868
    {
3869
        /** @noinspection PhpUnusedLocalVariableInspection */
3870 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3871 2
            if ($str === $bom_string) {
3872 2
                return true;
3873
            }
3874
        }
3875
3876 2
        return false;
3877
    }
3878
3879
    /**
3880
     * Determine whether the string is considered to be empty.
3881
     *
3882
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3883
     * empty() does not generate a warning if the variable does not exist.
3884
     *
3885
     * @param array|float|int|string $str
3886
     *
3887
     * @psalm-pure
3888
     *
3889
     * @return bool
3890
     *              <p>Whether or not $str is empty().</p>
3891
     */
3892 1
    public static function is_empty($str): bool
3893
    {
3894 1
        return empty($str);
3895
    }
3896
3897
    /**
3898
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3899
     *
3900
     * @param string $str <p>The input string.</p>
3901
     *
3902
     * @psalm-pure
3903
     *
3904
     * @return bool
3905
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3906
     */
3907 13
    public static function is_hexadecimal(string $str): bool
3908
    {
3909 13
        if (self::$SUPPORT['mbstring'] === true) {
3910
            /** @noinspection PhpComposerExtensionStubsInspection */
3911 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3912
        }
3913
3914
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3915
    }
3916
3917
    /**
3918
     * Check if the string contains any HTML tags.
3919
     *
3920
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3921
     *
3922
     * @param string $str <p>The input string.</p>
3923
     *
3924
     * @psalm-pure
3925
     *
3926
     * @return bool
3927
     *              <p>Whether or not $str contains html elements.</p>
3928
     */
3929 3
    public static function is_html(string $str): bool
3930
    {
3931 3
        if ($str === '') {
3932 3
            return false;
3933
        }
3934
3935
        // init
3936 3
        $matches = [];
3937
3938 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3939
3940 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3941
3942 3
        return $matches !== [];
3943
    }
3944
3945
    /**
3946
     * Check if $url is an correct url.
3947
     *
3948
     * @param string $url
3949
     * @param bool   $disallow_localhost
3950
     *
3951
     * @psalm-pure
3952
     *
3953
     * @return bool
3954
     */
3955 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3956
    {
3957 1
        if ($url === '') {
3958 1
            return false;
3959
        }
3960
3961
        // WARNING: keep this as hack protection
3962 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3963 1
            return false;
3964
        }
3965
3966
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3967 1
        if ($disallow_localhost) {
3968 1
            if (self::str_istarts_with_any(
3969 1
                $url,
3970
                [
3971 1
                    'http://localhost',
3972
                    'https://localhost',
3973
                    'http://127.0.0.1',
3974
                    'https://127.0.0.1',
3975
                    'http://::1',
3976
                    'https://::1',
3977
                ]
3978
            )) {
3979 1
                return false;
3980
            }
3981
3982 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3983
            /** @noinspection BypassedUrlValidationInspection */
3984 1
            if (\preg_match($regex, $url)) {
3985 1
                return false;
3986
            }
3987
        }
3988
3989
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3990
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3991 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3992
        /** @noinspection BypassedUrlValidationInspection */
3993 1
        if (\preg_match($regex, $url)) {
3994 1
            return true;
3995
        }
3996
3997
        /** @noinspection BypassedUrlValidationInspection */
3998 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3999
    }
4000
4001
    /**
4002
     * Try to check if "$str" is a JSON-string.
4003
     *
4004
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4005
     *
4006
     * @param string $str                                    <p>The input string.</p>
4007
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4008
     *                                                       results.</p>
4009
     *
4010
     * @return bool
4011
     *              <p>Whether or not the $str is in JSON format.</p>
4012
     */
4013 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4014
    {
4015 42
        if ($str === '') {
4016 4
            return false;
4017
        }
4018
4019 40
        if (self::$SUPPORT['json'] === false) {
4020
            throw new \RuntimeException('ext-json: is not installed');
4021
        }
4022
4023 40
        $jsonOrNull = self::json_decode($str);
4024 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4025 18
            return false;
4026
        }
4027
4028
        if (
4029 24
            $only_array_or_object_results_are_valid
4030
            &&
4031 24
            !\is_object($jsonOrNull)
4032
            &&
4033 24
            !\is_array($jsonOrNull)
4034
        ) {
4035 5
            return false;
4036
        }
4037
4038
        /** @noinspection PhpComposerExtensionStubsInspection */
4039 19
        return \json_last_error() === \JSON_ERROR_NONE;
4040
    }
4041
4042
    /**
4043
     * @param string $str <p>The input string.</p>
4044
     *
4045
     * @psalm-pure
4046
     *
4047
     * @return bool
4048
     *              <p>Whether or not $str contains only lowercase chars.</p>
4049
     */
4050 8
    public static function is_lowercase(string $str): bool
4051
    {
4052 8
        if (self::$SUPPORT['mbstring'] === true) {
4053
            /** @noinspection PhpComposerExtensionStubsInspection */
4054 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4055
        }
4056
4057
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4058
    }
4059
4060
    /**
4061
     * Returns true if the string is serialized, false otherwise.
4062
     *
4063
     * @param string $str <p>The input string.</p>
4064
     *
4065
     * @psalm-pure
4066
     *
4067
     * @return bool
4068
     *              <p>Whether or not $str is serialized.</p>
4069
     */
4070 7
    public static function is_serialized(string $str): bool
4071
    {
4072 7
        if ($str === '') {
4073 1
            return false;
4074
        }
4075
4076
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4077
        /** @noinspection UnserializeExploitsInspection */
4078 6
        return $str === 'b:0;'
4079
               ||
4080 6
               @\unserialize($str) !== false;
4081
    }
4082
4083
    /**
4084
     * Returns true if the string contains only lower case chars, false
4085
     * otherwise.
4086
     *
4087
     * @param string $str <p>The input string.</p>
4088
     *
4089
     * @psalm-pure
4090
     *
4091
     * @return bool
4092
     *              <p>Whether or not $str contains only lower case characters.</p>
4093
     */
4094 8
    public static function is_uppercase(string $str): bool
4095
    {
4096 8
        if (self::$SUPPORT['mbstring'] === true) {
4097
            /** @noinspection PhpComposerExtensionStubsInspection */
4098 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4099
        }
4100
4101
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4102
    }
4103
4104
    /**
4105
     * Check if the string is UTF-16.
4106
     *
4107
     * EXAMPLE: <code>
4108
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4109
     * //
4110
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4111
     * //
4112
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4113
     * </code>
4114
     *
4115
     * @param string $str                       <p>The input string.</p>
4116
     * @param bool   $check_if_string_is_binary
4117
     *
4118
     * @psalm-pure
4119
     *
4120
     * @return false|int
4121
     *                   <strong>false</strong> if is't not UTF-16,<br>
4122
     *                   <strong>1</strong> for UTF-16LE,<br>
4123
     *                   <strong>2</strong> for UTF-16BE
4124
     */
4125 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4126
    {
4127
        // init
4128 22
        $str = (string) $str;
4129 22
        $str_chars = [];
4130
4131
        if (
4132 22
            $check_if_string_is_binary
4133
            &&
4134 22
            !self::is_binary($str, true)
4135
        ) {
4136 2
            return false;
4137
        }
4138
4139 22
        if (self::$SUPPORT['mbstring'] === false) {
4140
            /**
4141
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4142
             */
4143 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4144
        }
4145
4146 22
        $str = self::remove_bom($str);
4147
4148 22
        $maybe_utf16le = 0;
4149 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4150 22
        if ($test) {
4151 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4152 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4153 15
            if ($test3 === $test) {
4154
                /**
4155
                 * @psalm-suppress RedundantCondition
4156
                 */
4157 15
                if ($str_chars === []) {
4158 15
                    $str_chars = self::count_chars($str, true, false);
4159
                }
4160 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4161 15
                    if (\in_array($test3char, $str_chars, true)) {
4162 15
                        ++$maybe_utf16le;
4163
                    }
4164
                }
4165 15
                unset($test3charEmpty);
4166
            }
4167
        }
4168
4169 22
        $maybe_utf16be = 0;
4170 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4171 22
        if ($test) {
4172 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4173 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4174 15
            if ($test3 === $test) {
4175 15
                if ($str_chars === []) {
4176 7
                    $str_chars = self::count_chars($str, true, false);
4177
                }
4178 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4179 15
                    if (\in_array($test3char, $str_chars, true)) {
4180 15
                        ++$maybe_utf16be;
4181
                    }
4182
                }
4183 15
                unset($test3charEmpty);
4184
            }
4185
        }
4186
4187 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4188 7
            if ($maybe_utf16le > $maybe_utf16be) {
4189 5
                return 1;
4190
            }
4191
4192 6
            return 2;
4193
        }
4194
4195 18
        return false;
4196
    }
4197
4198
    /**
4199
     * Check if the string is UTF-32.
4200
     *
4201
     * EXAMPLE: <code>
4202
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4203
     * //
4204
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4205
     * //
4206
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4207
     * </code>
4208
     *
4209
     * @param string $str                       <p>The input string.</p>
4210
     * @param bool   $check_if_string_is_binary
4211
     *
4212
     * @psalm-pure
4213
     *
4214
     * @return false|int
4215
     *                   <strong>false</strong> if is't not UTF-32,<br>
4216
     *                   <strong>1</strong> for UTF-32LE,<br>
4217
     *                   <strong>2</strong> for UTF-32BE
4218
     */
4219 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4220
    {
4221
        // init
4222 20
        $str = (string) $str;
4223 20
        $str_chars = [];
4224
4225
        if (
4226 20
            $check_if_string_is_binary
4227
            &&
4228 20
            !self::is_binary($str, true)
4229
        ) {
4230 2
            return false;
4231
        }
4232
4233 20
        if (self::$SUPPORT['mbstring'] === false) {
4234
            /**
4235
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4236
             */
4237 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4238
        }
4239
4240 20
        $str = self::remove_bom($str);
4241
4242 20
        $maybe_utf32le = 0;
4243 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4244 20
        if ($test) {
4245 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4246 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4247 13
            if ($test3 === $test) {
4248
                /**
4249
                 * @psalm-suppress RedundantCondition
4250
                 */
4251 13
                if ($str_chars === []) {
4252 13
                    $str_chars = self::count_chars($str, true, false);
4253
                }
4254 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4255 13
                    if (\in_array($test3char, $str_chars, true)) {
4256 13
                        ++$maybe_utf32le;
4257
                    }
4258
                }
4259 13
                unset($test3charEmpty);
4260
            }
4261
        }
4262
4263 20
        $maybe_utf32be = 0;
4264 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4265 20
        if ($test) {
4266 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4267 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4268 13
            if ($test3 === $test) {
4269 13
                if ($str_chars === []) {
4270 7
                    $str_chars = self::count_chars($str, true, false);
4271
                }
4272 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4273 13
                    if (\in_array($test3char, $str_chars, true)) {
4274 13
                        ++$maybe_utf32be;
4275
                    }
4276
                }
4277 13
                unset($test3charEmpty);
4278
            }
4279
        }
4280
4281 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4282 3
            if ($maybe_utf32le > $maybe_utf32be) {
4283 2
                return 1;
4284
            }
4285
4286 3
            return 2;
4287
        }
4288
4289 20
        return false;
4290
    }
4291
4292
    /**
4293
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4294
     *
4295
     * EXAMPLE: <code>
4296
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4297
     * //
4298
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4299
     * </code>
4300
     *
4301
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4302
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4303
     *
4304
     * @psalm-pure
4305
     *
4306
     * @return bool
4307
     */
4308 83
    public static function is_utf8($str, bool $strict = false): bool
4309
    {
4310 83
        if (\is_array($str)) {
4311 2
            foreach ($str as &$v) {
4312 2
                if (!self::is_utf8($v, $strict)) {
4313 2
                    return false;
4314
                }
4315
            }
4316
4317
            return true;
4318
        }
4319
4320 83
        return self::is_utf8_string((string) $str, $strict);
4321
    }
4322
4323
    /**
4324
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4325
     * Decodes a JSON string
4326
     *
4327
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4328
     *
4329
     * @see http://php.net/manual/en/function.json-decode.php
4330
     *
4331
     * @param string $json    <p>
4332
     *                        The <i>json</i> string being decoded.
4333
     *                        </p>
4334
     *                        <p>
4335
     *                        This function only works with UTF-8 encoded strings.
4336
     *                        </p>
4337
     *                        <p>PHP implements a superset of
4338
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4339
     *                        only supports these values when they are nested inside an array or an object.
4340
     *                        </p>
4341
     * @param bool   $assoc   [optional] <p>
4342
     *                        When <b>TRUE</b>, returned objects will be converted into
4343
     *                        associative arrays.
4344
     *                        </p>
4345
     * @param int    $depth   [optional] <p>
4346
     *                        User specified recursion depth.
4347
     *                        </p>
4348
     * @param int    $options [optional] <p>
4349
     *                        Bitmask of JSON decode options. Currently only
4350
     *                        <b>JSON_BIGINT_AS_STRING</b>
4351
     *                        is supported (default is to cast large integers as floats)
4352
     *                        </p>
4353
     *
4354
     * @psalm-pure
4355
     *
4356
     * @return mixed
4357
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4358
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4359
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4360
     *               is deeper than the recursion limit.</p>
4361
     */
4362 43
    public static function json_decode(
4363
        string $json,
4364
        bool $assoc = false,
4365
        int $depth = 512,
4366
        int $options = 0
4367
    ) {
4368 43
        $json = self::filter($json);
4369
4370 43
        if (self::$SUPPORT['json'] === false) {
4371
            throw new \RuntimeException('ext-json: is not installed');
4372
        }
4373
4374
        /** @noinspection PhpComposerExtensionStubsInspection */
4375 43
        return \json_decode($json, $assoc, $depth, $options);
4376
    }
4377
4378
    /**
4379
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4380
     * Returns the JSON representation of a value.
4381
     *
4382
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4383
     *
4384
     * @see http://php.net/manual/en/function.json-encode.php
4385
     *
4386
     * @param mixed $value   <p>
4387
     *                       The <i>value</i> being encoded. Can be any type except
4388
     *                       a resource.
4389
     *                       </p>
4390
     *                       <p>
4391
     *                       All string data must be UTF-8 encoded.
4392
     *                       </p>
4393
     *                       <p>PHP implements a superset of
4394
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4395
     *                       only supports these values when they are nested inside an array or an object.
4396
     *                       </p>
4397
     * @param int   $options [optional] <p>
4398
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4399
     *                       <b>JSON_HEX_TAG</b>,
4400
     *                       <b>JSON_HEX_AMP</b>,
4401
     *                       <b>JSON_HEX_APOS</b>,
4402
     *                       <b>JSON_NUMERIC_CHECK</b>,
4403
     *                       <b>JSON_PRETTY_PRINT</b>,
4404
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4405
     *                       <b>JSON_FORCE_OBJECT</b>,
4406
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4407
     *                       constants is described on
4408
     *                       the JSON constants page.
4409
     *                       </p>
4410
     * @param int   $depth   [optional] <p>
4411
     *                       Set the maximum depth. Must be greater than zero.
4412
     *                       </p>
4413
     *
4414
     * @psalm-pure
4415
     *
4416
     * @return false|string
4417
     *                      A JSON encoded <strong>string</strong> on success or<br>
4418
     *                      <strong>FALSE</strong> on failure
4419
     */
4420 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4421
    {
4422 5
        $value = self::filter($value);
4423
4424 5
        if (self::$SUPPORT['json'] === false) {
4425
            throw new \RuntimeException('ext-json: is not installed');
4426
        }
4427
4428
        /** @noinspection PhpComposerExtensionStubsInspection */
4429 5
        return \json_encode($value, $options, $depth);
4430
    }
4431
4432
    /**
4433
     * Checks whether JSON is available on the server.
4434
     *
4435
     * @psalm-pure
4436
     *
4437
     * @return bool
4438
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4439
     *
4440
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4441
     */
4442
    public static function json_loaded(): bool
4443
    {
4444
        return \function_exists('json_decode');
4445
    }
4446
4447
    /**
4448
     * Makes string's first char lowercase.
4449
     *
4450
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4451
     *
4452
     * @param string      $str                           <p>The input string</p>
4453
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4454
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4455
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4456
     *                                                   tr</p>
4457
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4458
     *                                                   -> ß</p>
4459
     *
4460
     * @psalm-pure
4461
     *
4462
     * @return string the resulting string
4463
     */
4464 46
    public static function lcfirst(
4465
        string $str,
4466
        string $encoding = 'UTF-8',
4467
        bool $clean_utf8 = false,
4468
        string $lang = null,
4469
        bool $try_to_keep_the_string_length = false
4470
    ): string {
4471 46
        if ($clean_utf8) {
4472
            $str = self::clean($str);
4473
        }
4474
4475 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4476
4477 46
        if ($encoding === 'UTF-8') {
4478 43
            $str_part_two = (string) \mb_substr($str, 1);
4479
4480 43
            if ($use_mb_functions) {
4481 43
                $str_part_one = \mb_strtolower(
4482 43
                    (string) \mb_substr($str, 0, 1)
4483
                );
4484
            } else {
4485
                $str_part_one = self::strtolower(
4486
                    (string) \mb_substr($str, 0, 1),
4487
                    $encoding,
4488
                    false,
4489
                    $lang,
4490 43
                    $try_to_keep_the_string_length
4491
                );
4492
            }
4493
        } else {
4494 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4495
4496 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4497
4498 3
            $str_part_one = self::strtolower(
4499 3
                (string) self::substr($str, 0, 1, $encoding),
4500 3
                $encoding,
4501 3
                false,
4502 3
                $lang,
4503 3
                $try_to_keep_the_string_length
4504
            );
4505
        }
4506
4507 46
        return $str_part_one . $str_part_two;
4508
    }
4509
4510
    /**
4511
     * alias for "UTF8::lcfirst()"
4512
     *
4513
     * @param string      $str
4514
     * @param string      $encoding
4515
     * @param bool        $clean_utf8
4516
     * @param string|null $lang
4517
     * @param bool        $try_to_keep_the_string_length
4518
     *
4519
     * @psalm-pure
4520
     *
4521
     * @return string
4522
     *
4523
     * @see        UTF8::lcfirst()
4524
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4525
     */
4526 2
    public static function lcword(
4527
        string $str,
4528
        string $encoding = 'UTF-8',
4529
        bool $clean_utf8 = false,
4530
        string $lang = null,
4531
        bool $try_to_keep_the_string_length = false
4532
    ): string {
4533 2
        return self::lcfirst(
4534 2
            $str,
4535 2
            $encoding,
4536 2
            $clean_utf8,
4537 2
            $lang,
4538 2
            $try_to_keep_the_string_length
4539
        );
4540
    }
4541
4542
    /**
4543
     * Lowercase for all words in the string.
4544
     *
4545
     * @param string      $str                           <p>The input string.</p>
4546
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4547
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4548
     *                                                   not start a new word.</p>
4549
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4550
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4551
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4552
     *                                                   tr</p>
4553
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4554
     *                                                   -> ß</p>
4555
     *
4556
     * @psalm-pure
4557
     *
4558
     * @return string
4559
     */
4560 2
    public static function lcwords(
4561
        string $str,
4562
        array $exceptions = [],
4563
        string $char_list = '',
4564
        string $encoding = 'UTF-8',
4565
        bool $clean_utf8 = false,
4566
        string $lang = null,
4567
        bool $try_to_keep_the_string_length = false
4568
    ): string {
4569 2
        if (!$str) {
4570 2
            return '';
4571
        }
4572
4573 2
        $words = self::str_to_words($str, $char_list);
4574 2
        $use_exceptions = $exceptions !== [];
4575
4576 2
        $words_str = '';
4577 2
        foreach ($words as &$word) {
4578 2
            if (!$word) {
4579 2
                continue;
4580
            }
4581
4582
            if (
4583 2
                !$use_exceptions
4584
                ||
4585 2
                !\in_array($word, $exceptions, true)
4586
            ) {
4587 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4588
            } else {
4589 2
                $words_str .= $word;
4590
            }
4591
        }
4592
4593 2
        return $words_str;
4594
    }
4595
4596
    /**
4597
     * alias for "UTF8::lcfirst()"
4598
     *
4599
     * @param string      $str
4600
     * @param string      $encoding
4601
     * @param bool        $clean_utf8
4602
     * @param string|null $lang
4603
     * @param bool        $try_to_keep_the_string_length
4604
     *
4605
     * @psalm-pure
4606
     *
4607
     * @return string
4608
     *
4609
     * @see        UTF8::lcfirst()
4610
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4611
     */
4612 5
    public static function lowerCaseFirst(
4613
        string $str,
4614
        string $encoding = 'UTF-8',
4615
        bool $clean_utf8 = false,
4616
        string $lang = null,
4617
        bool $try_to_keep_the_string_length = false
4618
    ): string {
4619 5
        return self::lcfirst(
4620 5
            $str,
4621 5
            $encoding,
4622 5
            $clean_utf8,
4623 5
            $lang,
4624 5
            $try_to_keep_the_string_length
4625
        );
4626
    }
4627
4628
    /**
4629
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4630
     *
4631
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4632
     *
4633
     * @param string      $str   <p>The string to be trimmed</p>
4634
     * @param string|null $chars <p>Optional characters to be stripped</p>
4635
     *
4636
     * @psalm-pure
4637
     *
4638
     * @return string the string with unwanted characters stripped from the left
4639
     */
4640 23
    public static function ltrim(string $str = '', string $chars = null): string
4641
    {
4642 23
        if ($str === '') {
4643 3
            return '';
4644
        }
4645
4646 22
        if (self::$SUPPORT['mbstring'] === true) {
4647 22
            if ($chars !== null) {
4648
                /** @noinspection PregQuoteUsageInspection */
4649 11
                $chars = \preg_quote($chars);
4650 11
                $pattern = "^[${chars}]+";
4651
            } else {
4652 14
                $pattern = '^[\\s]+';
4653
            }
4654
4655
            /** @noinspection PhpComposerExtensionStubsInspection */
4656 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4657
        }
4658
4659
        if ($chars !== null) {
4660
            $chars = \preg_quote($chars, '/');
4661
            $pattern = "^[${chars}]+";
4662
        } else {
4663
            $pattern = '^[\\s]+';
4664
        }
4665
4666
        return self::regex_replace($str, $pattern, '');
4667
    }
4668
4669
    /**
4670
     * Returns the UTF-8 character with the maximum code point in the given data.
4671
     *
4672
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4673
     *
4674
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4675
     *
4676
     * @psalm-pure
4677
     *
4678
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4679
     */
4680
    public static function max($arg)
4681
    {
4682 2
        if (\is_array($arg)) {
4683 2
            $arg = \implode('', $arg);
4684
        }
4685
4686 2
        $codepoints = self::codepoints($arg);
4687 2
        if ($codepoints === []) {
4688 2
            return null;
4689
        }
4690
4691 2
        $codepoint_max = \max($codepoints);
4692
4693 2
        return self::chr((int) $codepoint_max);
4694
    }
4695
4696
    /**
4697
     * Calculates and returns the maximum number of bytes taken by any
4698
     * UTF-8 encoded character in the given string.
4699
     *
4700
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4701
     *
4702
     * @param string $str <p>The original Unicode string.</p>
4703
     *
4704
     * @psalm-pure
4705
     *
4706
     * @return int
4707
     *             <p>Max byte lengths of the given chars.</p>
4708
     */
4709
    public static function max_chr_width(string $str): int
4710
    {
4711 2
        $bytes = self::chr_size_list($str);
4712 2
        if ($bytes !== []) {
4713 2
            return (int) \max($bytes);
4714
        }
4715
4716 2
        return 0;
4717
    }
4718
4719
    /**
4720
     * Checks whether mbstring is available on the server.
4721
     *
4722
     * @psalm-pure
4723
     *
4724
     * @return bool
4725
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4726
     *
4727
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4728
     */
4729
    public static function mbstring_loaded(): bool
4730
    {
4731 26
        return \extension_loaded('mbstring');
4732
    }
4733
4734
    /**
4735
     * Returns the UTF-8 character with the minimum code point in the given data.
4736
     *
4737
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4738
     *
4739
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4740
     *
4741
     * @psalm-pure
4742
     *
4743
     * @return string|null
4744
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4745
     */
4746
    public static function min($arg)
4747
    {
4748 2
        if (\is_array($arg)) {
4749 2
            $arg = \implode('', $arg);
4750
        }
4751
4752 2
        $codepoints = self::codepoints($arg);
4753 2
        if ($codepoints === []) {
4754 2
            return null;
4755
        }
4756
4757 2
        $codepoint_min = \min($codepoints);
4758
4759 2
        return self::chr((int) $codepoint_min);
4760
    }
4761
4762
    /**
4763
     * alias for "UTF8::normalize_encoding()"
4764
     *
4765
     * @param mixed $encoding
4766
     * @param mixed $fallback
4767
     *
4768
     * @psalm-pure
4769
     *
4770
     * @return mixed
4771
     *
4772
     * @see        UTF8::normalize_encoding()
4773
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4774
     */
4775
    public static function normalizeEncoding($encoding, $fallback = '')
4776
    {
4777 2
        return self::normalize_encoding($encoding, $fallback);
4778
    }
4779
4780
    /**
4781
     * Normalize the encoding-"name" input.
4782
     *
4783
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4784
     *
4785
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4786
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4787
     *
4788
     * @psalm-pure
4789
     *
4790
     * @return mixed|string
4791
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4792
     *
4793
     * @template TNormalizeEncodingFallback
4794
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4795
     * @psalm-return string|TNormalizeEncodingFallback
4796
     */
4797
    public static function normalize_encoding($encoding, $fallback = '')
4798
    {
4799
        /**
4800
         * @psalm-suppress ImpureStaticVariable
4801
         *
4802
         * @var array<string,string>
4803
         */
4804 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4805
4806
        // init
4807 339
        $encoding = (string) $encoding;
4808
4809 339
        if (!$encoding) {
4810 290
            return $fallback;
4811
        }
4812
4813
        if (
4814 53
            $encoding === 'UTF-8'
4815
            ||
4816 53
            $encoding === 'UTF8'
4817
        ) {
4818 29
            return 'UTF-8';
4819
        }
4820
4821
        if (
4822 44
            $encoding === '8BIT'
4823
            ||
4824 44
            $encoding === 'BINARY'
4825
        ) {
4826
            return 'CP850';
4827
        }
4828
4829
        if (
4830 44
            $encoding === 'HTML'
4831
            ||
4832 44
            $encoding === 'HTML-ENTITIES'
4833
        ) {
4834 2
            return 'HTML-ENTITIES';
4835
        }
4836
4837
        if (
4838 44
            $encoding === 'ISO'
4839
            ||
4840 44
            $encoding === 'ISO-8859-1'
4841
        ) {
4842 41
            return 'ISO-8859-1';
4843
        }
4844
4845
        if (
4846 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4847
            ||
4848 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4849
        ) {
4850
            return $fallback;
4851
        }
4852
4853 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4854 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4855
        }
4856
4857 5
        if (self::$ENCODINGS === null) {
4858 1
            self::$ENCODINGS = self::getData('encodings');
4859
        }
4860
4861 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4862 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4863
4864 3
            return $encoding;
4865
        }
4866
4867 4
        $encoding_original = $encoding;
4868 4
        $encoding = \strtoupper($encoding);
4869 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4870
4871
        $equivalences = [
4872 4
            'ISO8859'     => 'ISO-8859-1',
4873
            'ISO88591'    => 'ISO-8859-1',
4874
            'ISO'         => 'ISO-8859-1',
4875
            'LATIN'       => 'ISO-8859-1',
4876
            'LATIN1'      => 'ISO-8859-1', // Western European
4877
            'ISO88592'    => 'ISO-8859-2',
4878
            'LATIN2'      => 'ISO-8859-2', // Central European
4879
            'ISO88593'    => 'ISO-8859-3',
4880
            'LATIN3'      => 'ISO-8859-3', // Southern European
4881
            'ISO88594'    => 'ISO-8859-4',
4882
            'LATIN4'      => 'ISO-8859-4', // Northern European
4883
            'ISO88595'    => 'ISO-8859-5',
4884
            'ISO88596'    => 'ISO-8859-6', // Greek
4885
            'ISO88597'    => 'ISO-8859-7',
4886
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4887
            'ISO88599'    => 'ISO-8859-9',
4888
            'LATIN5'      => 'ISO-8859-9', // Turkish
4889
            'ISO885911'   => 'ISO-8859-11',
4890
            'TIS620'      => 'ISO-8859-11', // Thai
4891
            'ISO885910'   => 'ISO-8859-10',
4892
            'LATIN6'      => 'ISO-8859-10', // Nordic
4893
            'ISO885913'   => 'ISO-8859-13',
4894
            'LATIN7'      => 'ISO-8859-13', // Baltic
4895
            'ISO885914'   => 'ISO-8859-14',
4896
            'LATIN8'      => 'ISO-8859-14', // Celtic
4897
            'ISO885915'   => 'ISO-8859-15',
4898
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4899
            'ISO885916'   => 'ISO-8859-16',
4900
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4901
            'CP1250'      => 'WINDOWS-1250',
4902
            'WIN1250'     => 'WINDOWS-1250',
4903
            'WINDOWS1250' => 'WINDOWS-1250',
4904
            'CP1251'      => 'WINDOWS-1251',
4905
            'WIN1251'     => 'WINDOWS-1251',
4906
            'WINDOWS1251' => 'WINDOWS-1251',
4907
            'CP1252'      => 'WINDOWS-1252',
4908
            'WIN1252'     => 'WINDOWS-1252',
4909
            'WINDOWS1252' => 'WINDOWS-1252',
4910
            'CP1253'      => 'WINDOWS-1253',
4911
            'WIN1253'     => 'WINDOWS-1253',
4912
            'WINDOWS1253' => 'WINDOWS-1253',
4913
            'CP1254'      => 'WINDOWS-1254',
4914
            'WIN1254'     => 'WINDOWS-1254',
4915
            'WINDOWS1254' => 'WINDOWS-1254',
4916
            'CP1255'      => 'WINDOWS-1255',
4917
            'WIN1255'     => 'WINDOWS-1255',
4918
            'WINDOWS1255' => 'WINDOWS-1255',
4919
            'CP1256'      => 'WINDOWS-1256',
4920
            'WIN1256'     => 'WINDOWS-1256',
4921
            'WINDOWS1256' => 'WINDOWS-1256',
4922
            'CP1257'      => 'WINDOWS-1257',
4923
            'WIN1257'     => 'WINDOWS-1257',
4924
            'WINDOWS1257' => 'WINDOWS-1257',
4925
            'CP1258'      => 'WINDOWS-1258',
4926
            'WIN1258'     => 'WINDOWS-1258',
4927
            'WINDOWS1258' => 'WINDOWS-1258',
4928
            'UTF16'       => 'UTF-16',
4929
            'UTF32'       => 'UTF-32',
4930
            'UTF8'        => 'UTF-8',
4931
            'UTF'         => 'UTF-8',
4932
            'UTF7'        => 'UTF-7',
4933
            '8BIT'        => 'CP850',
4934
            'BINARY'      => 'CP850',
4935
        ];
4936
4937 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4938 3
            $encoding = $equivalences[$encoding_upper_helper];
4939
        }
4940
4941 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4942
4943 4
        return $encoding;
4944
    }
4945
4946
    /**
4947
     * Standardize line ending to unix-like.
4948
     *
4949
     * @param string          $str      <p>The input string.</p>
4950
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4951
     *                                  here.</p>
4952
     *
4953
     * @psalm-pure
4954
     *
4955
     * @return string
4956
     *                <p>A string with normalized line ending.</p>
4957
     */
4958
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4959
    {
4960 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4961
    }
4962
4963
    /**
4964
     * Normalize some MS Word special characters.
4965
     *
4966
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4967
     *
4968
     * @param string $str <p>The string to be normalized.</p>
4969
     *
4970
     * @psalm-pure
4971
     *
4972
     * @return string
4973
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4974
     */
4975
    public static function normalize_msword(string $str): string
4976
    {
4977 10
        return ASCII::normalize_msword($str);
4978
    }
4979
4980
    /**
4981
     * Normalize the whitespace.
4982
     *
4983
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4984
     *
4985
     * @param string $str                        <p>The string to be normalized.</p>
4986
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4987
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4988
     *                                           bidirectional text chars.</p>
4989
     *
4990
     * @psalm-pure
4991
     *
4992
     * @return string
4993
     *                <p>A string with normalized whitespace.</p>
4994
     */
4995
    public static function normalize_whitespace(
4996
        string $str,
4997
        bool $keep_non_breaking_space = false,
4998
        bool $keep_bidi_unicode_controls = false
4999
    ): string {
5000 61
        return ASCII::normalize_whitespace(
5001 61
            $str,
5002 61
            $keep_non_breaking_space,
5003 61
            $keep_bidi_unicode_controls
5004
        );
5005
    }
5006
5007
    /**
5008
     * Calculates Unicode code point of the given UTF-8 encoded character.
5009
     *
5010
     * INFO: opposite to UTF8::chr()
5011
     *
5012
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5013
     *
5014
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5015
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5016
     *
5017
     * @psalm-pure
5018
     *
5019
     * @return int
5020
     *             <p>Unicode code point of the given character,<br>
5021
     *             0 on invalid UTF-8 byte sequence</p>
5022
     */
5023
    public static function ord($chr, string $encoding = 'UTF-8'): int
5024
    {
5025
        /**
5026
         * @psalm-suppress ImpureStaticVariable
5027
         *
5028
         * @var array<string,int>
5029
         */
5030 27
        static $CHAR_CACHE = [];
5031
5032
        // init
5033 27
        $chr = (string) $chr;
5034
5035 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5036 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5037
        }
5038
5039 27
        $cache_key = $chr . '_' . $encoding;
5040 27
        if (isset($CHAR_CACHE[$cache_key])) {
5041 27
            return $CHAR_CACHE[$cache_key];
5042
        }
5043
5044
        // check again, if it's still not UTF-8
5045 11
        if ($encoding !== 'UTF-8') {
5046 3
            $chr = self::encode($encoding, $chr);
5047
        }
5048
5049 11
        if (self::$ORD === null) {
5050
            self::$ORD = self::getData('ord');
5051
        }
5052
5053 11
        if (isset(self::$ORD[$chr])) {
5054 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5055
        }
5056
5057
        //
5058
        // fallback via "IntlChar"
5059
        //
5060
5061 6
        if (self::$SUPPORT['intlChar'] === true) {
5062
            /** @noinspection PhpComposerExtensionStubsInspection */
5063 5
            $code = \IntlChar::ord($chr);
5064 5
            if ($code) {
5065 5
                return $CHAR_CACHE[$cache_key] = $code;
5066
            }
5067
        }
5068
5069
        //
5070
        // fallback via vanilla php
5071
        //
5072
5073
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5074 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5075
        /** @noinspection OffsetOperationsInspection */
5076 1
        $code = $chr ? $chr[1] : 0;
5077
5078
        /** @noinspection OffsetOperationsInspection */
5079 1
        if ($code >= 0xF0 && isset($chr[4])) {
5080
            /** @noinspection UnnecessaryCastingInspection */
5081
            /** @noinspection OffsetOperationsInspection */
5082
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5083
        }
5084
5085
        /** @noinspection OffsetOperationsInspection */
5086 1
        if ($code >= 0xE0 && isset($chr[3])) {
5087
            /** @noinspection UnnecessaryCastingInspection */
5088
            /** @noinspection OffsetOperationsInspection */
5089 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5090
        }
5091
5092
        /** @noinspection OffsetOperationsInspection */
5093 1
        if ($code >= 0xC0 && isset($chr[2])) {
5094
            /** @noinspection UnnecessaryCastingInspection */
5095
            /** @noinspection OffsetOperationsInspection */
5096 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5097
        }
5098
5099
        return $CHAR_CACHE[$cache_key] = $code;
5100
    }
5101
5102
    /**
5103
     * Parses the string into an array (into the the second parameter).
5104
     *
5105
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5106
     *          if the second parameter is not set!
5107
     *
5108
     * EXAMPLE: <code>
5109
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5110
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5111
     * </code>
5112
     *
5113
     * @see http://php.net/manual/en/function.parse-str.php
5114
     *
5115
     * @param string $str        <p>The input string.</p>
5116
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5117
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5118
     *
5119
     * @psalm-pure
5120
     *
5121
     * @return bool
5122
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5123
     */
5124
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5125
    {
5126 2
        if ($clean_utf8) {
5127 2
            $str = self::clean($str);
5128
        }
5129
5130 2
        if (self::$SUPPORT['mbstring'] === true) {
5131 2
            $return = \mb_parse_str($str, $result);
5132
5133 2
            return $return !== false && $result !== [];
5134
        }
5135
5136
        /**
5137
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5138
         */
5139
        \parse_str($str, $result);
5140
5141
        return $result !== [];
5142
    }
5143
5144
    /**
5145
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5146
     *
5147
     * @psalm-pure
5148
     *
5149
     * @return bool
5150
     *              <p>
5151
     *              <strong>true</strong> if support is available,<br>
5152
     *              <strong>false</strong> otherwise
5153
     *              </p>
5154
     */
5155
    public static function pcre_utf8_support(): bool
5156
    {
5157
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5158
        return (bool) @\preg_match('//u', '');
5159
    }
5160
5161
    /**
5162
     * Create an array containing a range of UTF-8 characters.
5163
     *
5164
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5165
     *
5166
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5167
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5168
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5169
     *                              "is_numeric"</p>
5170
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5171
     * @param float|int  $step      [optional] <p>
5172
     *                              If a step value is given, it will be used as the
5173
     *                              increment between elements in the sequence. step
5174
     *                              should be given as a positive number. If not specified,
5175
     *                              step will default to 1.
5176
     *                              </p>
5177
     *
5178
     * @psalm-pure
5179
     *
5180
     * @return string[]
5181
     */
5182
    public static function range(
5183
        $var1,
5184
        $var2,
5185
        bool $use_ctype = true,
5186
        string $encoding = 'UTF-8',
5187
        $step = 1
5188
    ): array {
5189 2
        if (!$var1 || !$var2) {
5190 2
            return [];
5191
        }
5192
5193 2
        if ($step !== 1) {
5194
            /**
5195
             * @psalm-suppress RedundantConditionGivenDocblockType
5196
             * @psalm-suppress DocblockTypeContradiction
5197
             */
5198 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5199
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5200
            }
5201
5202
            /**
5203
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5204
             */
5205 1
            if ($step <= 0) {
5206
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5207
            }
5208
        }
5209
5210 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5211
            throw new \RuntimeException('ext-ctype: is not installed');
5212
        }
5213
5214 2
        $is_digit = false;
5215 2
        $is_xdigit = false;
5216
5217
        /** @noinspection PhpComposerExtensionStubsInspection */
5218 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5219 2
            $is_digit = true;
5220 2
            $start = (int) $var1;
5221 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5222
            $is_xdigit = true;
5223
            $start = (int) self::hex_to_int((string) $var1);
5224 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5225 1
            $start = (int) $var1;
5226
        } else {
5227 2
            $start = self::ord((string) $var1);
5228
        }
5229
5230 2
        if (!$start) {
5231
            return [];
5232
        }
5233
5234 2
        if ($is_digit) {
5235 2
            $end = (int) $var2;
5236 2
        } elseif ($is_xdigit) {
5237
            $end = (int) self::hex_to_int((string) $var2);
5238 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5239 1
            $end = (int) $var2;
5240
        } else {
5241 2
            $end = self::ord((string) $var2);
5242
        }
5243
5244 2
        if (!$end) {
5245
            return [];
5246
        }
5247
5248 2
        $array = [];
5249 2
        foreach (\range($start, $end, $step) as $i) {
5250 2
            $array[] = (string) self::chr((int) $i, $encoding);
5251
        }
5252
5253 2
        return $array;
5254
    }
5255
5256
    /**
5257
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5258
     *
5259
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5260
     *
5261
     * e.g:
5262
     * 'test+test'                     => 'test+test'
5263
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5264
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5265
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5266
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5267
     * 'Düsseldorf'                   => 'Düsseldorf'
5268
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5269
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5270
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5271
     *
5272
     * @param string $str          <p>The input string.</p>
5273
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5274
     *
5275
     * @psalm-pure
5276
     *
5277
     * @return string
5278
     *                <p>The decoded URL, as a string.</p>
5279
     */
5280
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5281
    {
5282 7
        if ($str === '') {
5283 4
            return '';
5284
        }
5285
5286
        if (
5287 7
            \strpos($str, '&') === false
5288
            &&
5289 7
            \strpos($str, '%') === false
5290
            &&
5291 7
            \strpos($str, '+') === false
5292
            &&
5293 7
            \strpos($str, '\u') === false
5294
        ) {
5295 4
            return self::fix_simple_utf8($str);
5296
        }
5297
5298 7
        $str = self::urldecode_unicode_helper($str);
5299
5300 7
        if ($multi_decode) {
5301
            do {
5302 6
                $str_compare = $str;
5303
5304
                /**
5305
                 * @psalm-suppress PossiblyInvalidArgument
5306
                 */
5307 6
                $str = self::fix_simple_utf8(
5308 6
                    \rawurldecode(
5309 6
                        self::html_entity_decode(
5310 6
                            self::to_utf8($str),
5311 6
                            \ENT_QUOTES | \ENT_HTML5
5312
                        )
5313
                    )
5314
                );
5315 6
            } while ($str_compare !== $str);
5316
        } else {
5317
            /**
5318
             * @psalm-suppress PossiblyInvalidArgument
5319
             */
5320 1
            $str = self::fix_simple_utf8(
5321 1
                \rawurldecode(
5322 1
                    self::html_entity_decode(
5323 1
                        self::to_utf8($str),
5324 1
                        \ENT_QUOTES | \ENT_HTML5
5325
                    )
5326
                )
5327
            );
5328
        }
5329
5330 7
        return $str;
5331
    }
5332
5333
    /**
5334
     * Replaces all occurrences of $pattern in $str by $replacement.
5335
     *
5336
     * @param string $str         <p>The input string.</p>
5337
     * @param string $pattern     <p>The regular expression pattern.</p>
5338
     * @param string $replacement <p>The string to replace with.</p>
5339
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5340
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5341
     *
5342
     * @psalm-pure
5343
     *
5344
     * @return string
5345
     */
5346
    public static function regex_replace(
5347
        string $str,
5348
        string $pattern,
5349
        string $replacement,
5350
        string $options = '',
5351
        string $delimiter = '/'
5352
    ): string {
5353 18
        if ($options === 'msr') {
5354 9
            $options = 'ms';
5355
        }
5356
5357
        // fallback
5358 18
        if (!$delimiter) {
5359
            $delimiter = '/';
5360
        }
5361
5362 18
        return (string) \preg_replace(
5363 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5364 18
            $replacement,
5365 18
            $str
5366
        );
5367
    }
5368
5369
    /**
5370
     * alias for "UTF8::remove_bom()"
5371
     *
5372
     * @param string $str
5373
     *
5374
     * @psalm-pure
5375
     *
5376
     * @return string
5377
     *
5378
     * @see        UTF8::remove_bom()
5379
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5380
     */
5381
    public static function removeBOM(string $str): string
5382
    {
5383 1
        return self::remove_bom($str);
5384
    }
5385
5386
    /**
5387
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5388
     *
5389
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5390
     *
5391
     * @param string $str <p>The input string.</p>
5392
     *
5393
     * @psalm-pure
5394
     *
5395
     * @return string
5396
     *                <p>A string without UTF-BOM.</p>
5397
     */
5398
    public static function remove_bom(string $str): string
5399
    {
5400 55
        if ($str === '') {
5401 9
            return '';
5402
        }
5403
5404 55
        $str_length = \strlen($str);
5405 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5406 55
            if (\strpos($str, $bom_string) === 0) {
5407
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5408 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5409 11
                if ($str_tmp === false) {
5410
                    return '';
5411
                }
5412
5413 11
                $str_length -= (int) $bom_byte_length;
5414
5415 55
                $str = (string) $str_tmp;
5416
            }
5417
        }
5418
5419 55
        return $str;
5420
    }
5421
5422
    /**
5423
     * Removes duplicate occurrences of a string in another string.
5424
     *
5425
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5426
     *
5427
     * @param string          $str  <p>The base string.</p>
5428
     * @param string|string[] $what <p>String to search for in the base string.</p>
5429
     *
5430
     * @psalm-pure
5431
     *
5432
     * @return string
5433
     *                <p>A string with removed duplicates.</p>
5434
     */
5435
    public static function remove_duplicates(string $str, $what = ' '): string
5436
    {
5437 2
        if (\is_string($what)) {
5438 2
            $what = [$what];
5439
        }
5440
5441
        /**
5442
         * @psalm-suppress RedundantConditionGivenDocblockType
5443
         */
5444 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5445 2
            foreach ($what as $item) {
5446 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5447
            }
5448
        }
5449
5450 2
        return $str;
5451
    }
5452
5453
    /**
5454
     * Remove html via "strip_tags()" from the string.
5455
     *
5456
     * @param string $str            <p>The input string.</p>
5457
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5458
     *                               should not be stripped. Default: null
5459
     *                               </p>
5460
     *
5461
     * @psalm-pure
5462
     *
5463
     * @return string
5464
     *                <p>A string with without html tags.</p>
5465
     */
5466
    public static function remove_html(string $str, string $allowable_tags = ''): string
5467
    {
5468 6
        return \strip_tags($str, $allowable_tags);
5469
    }
5470
5471
    /**
5472
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5473
     *
5474
     * @param string $str         <p>The input string.</p>
5475
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5476
     *
5477
     * @psalm-pure
5478
     *
5479
     * @return string
5480
     *                <p>A string without breaks.</p>
5481
     */
5482
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5483
    {
5484 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5485
    }
5486
5487
    /**
5488
     * Remove invisible characters from a string.
5489
     *
5490
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5491
     *
5492
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5493
     *
5494
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5495
     *
5496
     * @param string $str         <p>The input string.</p>
5497
     * @param bool   $url_encoded [optional] <p>
5498
     *                            Try to remove url encoded control character.
5499
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5500
     *                            <br>
5501
     *                            Default: false
5502
     *                            </p>
5503
     * @param string $replacement [optional] <p>The replacement character.</p>
5504
     *
5505
     * @psalm-pure
5506
     *
5507
     * @return string
5508
     *                <p>A string without invisible chars.</p>
5509
     */
5510
    public static function remove_invisible_characters(
5511
        string $str,
5512
        bool $url_encoded = false,
5513
        string $replacement = ''
5514
    ): string {
5515 91
        return ASCII::remove_invisible_characters(
5516 91
            $str,
5517 91
            $url_encoded,
5518 91
            $replacement
5519
        );
5520
    }
5521
5522
    /**
5523
     * Returns a new string with the prefix $substring removed, if present.
5524
     *
5525
     * @param string $str       <p>The input string.</p>
5526
     * @param string $substring <p>The prefix to remove.</p>
5527
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5528
     *
5529
     * @psalm-pure
5530
     *
5531
     * @return string
5532
     *                <p>A string without the prefix $substring.</p>
5533
     */
5534
    public static function remove_left(
5535
        string $str,
5536
        string $substring,
5537
        string $encoding = 'UTF-8'
5538
    ): string {
5539 12
        if ($substring && \strpos($str, $substring) === 0) {
5540 6
            if ($encoding === 'UTF-8') {
5541 4
                return (string) \mb_substr(
5542 4
                    $str,
5543 4
                    (int) \mb_strlen($substring)
5544
                );
5545
            }
5546
5547 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5548
5549 2
            return (string) self::substr(
5550 2
                $str,
5551 2
                (int) self::strlen($substring, $encoding),
5552 2
                null,
5553 2
                $encoding
5554
            );
5555
        }
5556
5557 6
        return $str;
5558
    }
5559
5560
    /**
5561
     * Returns a new string with the suffix $substring removed, if present.
5562
     *
5563
     * @param string $str
5564
     * @param string $substring <p>The suffix to remove.</p>
5565
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5566
     *
5567
     * @psalm-pure
5568
     *
5569
     * @return string
5570
     *                <p>A string having a $str without the suffix $substring.</p>
5571
     */
5572
    public static function remove_right(
5573
        string $str,
5574
        string $substring,
5575
        string $encoding = 'UTF-8'
5576
    ): string {
5577 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5578 6
            if ($encoding === 'UTF-8') {
5579 4
                return (string) \mb_substr(
5580 4
                    $str,
5581 4
                    0,
5582 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5583
                );
5584
            }
5585
5586 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5587
5588 2
            return (string) self::substr(
5589 2
                $str,
5590 2
                0,
5591 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5592 2
                $encoding
5593
            );
5594
        }
5595
5596 6
        return $str;
5597
    }
5598
5599
    /**
5600
     * Replaces all occurrences of $search in $str by $replacement.
5601
     *
5602
     * @param string $str            <p>The input string.</p>
5603
     * @param string $search         <p>The needle to search for.</p>
5604
     * @param string $replacement    <p>The string to replace with.</p>
5605
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5606
     *
5607
     * @psalm-pure
5608
     *
5609
     * @return string
5610
     *                <p>A string with replaced parts.</p>
5611
     */
5612
    public static function replace(
5613
        string $str,
5614
        string $search,
5615
        string $replacement,
5616
        bool $case_sensitive = true
5617
    ): string {
5618 29
        if ($case_sensitive) {
5619 22
            return \str_replace($search, $replacement, $str);
5620
        }
5621
5622 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5623
    }
5624
5625
    /**
5626
     * Replaces all occurrences of $search in $str by $replacement.
5627
     *
5628
     * @param string       $str            <p>The input string.</p>
5629
     * @param array        $search         <p>The elements to search for.</p>
5630
     * @param array|string $replacement    <p>The string to replace with.</p>
5631
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5632
     *
5633
     * @psalm-pure
5634
     *
5635
     * @return string
5636
     *                <p>A string with replaced parts.</p>
5637
     */
5638
    public static function replace_all(
5639
        string $str,
5640
        array $search,
5641
        $replacement,
5642
        bool $case_sensitive = true
5643
    ): string {
5644 30
        if ($case_sensitive) {
5645 23
            return \str_replace($search, $replacement, $str);
5646
        }
5647
5648 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5649
    }
5650
5651
    /**
5652
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5653
     *
5654
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5655
     *
5656
     * @param string $str                        <p>The input string</p>
5657
     * @param string $replacement_char           <p>The replacement character.</p>
5658
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5659
     *
5660
     * @psalm-pure
5661
     *
5662
     * @return string
5663
     *                <p>A string without diamond question marks (�).</p>
5664
     */
5665
    public static function replace_diamond_question_mark(
5666
        string $str,
5667
        string $replacement_char = '',
5668
        bool $process_invalid_utf8_chars = true
5669
    ): string {
5670 35
        if ($str === '') {
5671 9
            return '';
5672
        }
5673
5674 35
        if ($process_invalid_utf8_chars) {
5675 35
            $replacement_char_helper = $replacement_char;
5676 35
            if ($replacement_char === '') {
5677 35
                $replacement_char_helper = 'none';
5678
            }
5679
5680 35
            if (self::$SUPPORT['mbstring'] === false) {
5681
                // if there is no native support for "mbstring",
5682
                // then we need to clean the string before ...
5683
                $str = self::clean($str);
5684
            }
5685
5686
            /**
5687
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5688
             */
5689 35
            $save = \mb_substitute_character();
5690
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5691 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5691
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5692
            // the polyfill maybe return false, so cast to string
5693 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5694 35
            \mb_substitute_character($save);
5695
        }
5696
5697 35
        return \str_replace(
5698
            [
5699 35
                "\xEF\xBF\xBD",
5700
                '�',
5701
            ],
5702
            [
5703 35
                $replacement_char,
5704 35
                $replacement_char,
5705
            ],
5706 35
            $str
5707
        );
5708
    }
5709
5710
    /**
5711
     * Strip whitespace or other characters from the end of a UTF-8 string.
5712
     *
5713
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5714
     *
5715
     * @param string      $str   <p>The string to be trimmed.</p>
5716
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5717
     *
5718
     * @psalm-pure
5719
     *
5720
     * @return string
5721
     *                <p>A string with unwanted characters stripped from the right.</p>
5722
     */
5723
    public static function rtrim(string $str = '', string $chars = null): string
5724
    {
5725 21
        if ($str === '') {
5726 3
            return '';
5727
        }
5728
5729 20
        if (self::$SUPPORT['mbstring'] === true) {
5730 20
            if ($chars !== null) {
5731
                /** @noinspection PregQuoteUsageInspection */
5732 9
                $chars = \preg_quote($chars);
5733 9
                $pattern = "[${chars}]+$";
5734
            } else {
5735 14
                $pattern = '[\\s]+$';
5736
            }
5737
5738
            /** @noinspection PhpComposerExtensionStubsInspection */
5739 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5740
        }
5741
5742
        if ($chars !== null) {
5743
            $chars = \preg_quote($chars, '/');
5744
            $pattern = "[${chars}]+$";
5745
        } else {
5746
            $pattern = '[\\s]+$';
5747
        }
5748
5749
        return self::regex_replace($str, $pattern, '');
5750
    }
5751
5752
    /**
5753
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5754
     *
5755
     * @param bool $useEcho
5756
     *
5757
     * @psalm-pure
5758
     *
5759
     * @return string|void
5760
     */
5761
    public static function showSupport(bool $useEcho = true)
5762
    {
5763
        // init
5764 2
        $html = '';
5765
5766 2
        $html .= '<pre>';
5767
        /** @noinspection AlterInForeachInspection */
5768 2
        foreach (self::$SUPPORT as $key => &$value) {
5769 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5770
        }
5771 2
        $html .= '</pre>';
5772
5773 2
        if ($useEcho) {
5774 1
            echo $html;
5775
        }
5776
5777 2
        return $html;
5778
    }
5779
5780
    /**
5781
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5782
     *
5783
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5784
     *
5785
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5786
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5787
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5788
     *
5789
     * @psalm-pure
5790
     *
5791
     * @return string
5792
     *                <p>The HTML numbered entity for the given character.</p>
5793
     */
5794
    public static function single_chr_html_encode(
5795
        string $char,
5796
        bool $keep_ascii_chars = false,
5797
        string $encoding = 'UTF-8'
5798
    ): string {
5799 2
        if ($char === '') {
5800 2
            return '';
5801
        }
5802
5803
        if (
5804 2
            $keep_ascii_chars
5805
            &&
5806 2
            ASCII::is_ascii($char)
5807
        ) {
5808 2
            return $char;
5809
        }
5810
5811 2
        return '&#' . self::ord($char, $encoding) . ';';
5812
    }
5813
5814
    /**
5815
     * @param string $str
5816
     * @param int    $tab_length
5817
     *
5818
     * @psalm-pure
5819
     *
5820
     * @return string
5821
     */
5822
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5823
    {
5824 5
        if ($tab_length === 4) {
5825 3
            $tab = '    ';
5826 2
        } elseif ($tab_length === 2) {
5827 1
            $tab = '  ';
5828
        } else {
5829 1
            $tab = \str_repeat(' ', $tab_length);
5830
        }
5831
5832 5
        return \str_replace($tab, "\t", $str);
5833
    }
5834
5835
    /**
5836
     * alias for "UTF8::str_split()"
5837
     *
5838
     * @param int|string $str
5839
     * @param int        $length
5840
     * @param bool       $clean_utf8
5841
     *
5842
     * @psalm-pure
5843
     *
5844
     * @return string[]
5845
     *
5846
     * @see        UTF8::str_split()
5847
     * @deprecated <p>please use "UTF8::str_split()"</p>
5848
     */
5849
    public static function split(
5850
        $str,
5851
        int $length = 1,
5852
        bool $clean_utf8 = false
5853
    ): array {
5854
        /** @var string[] */
5855 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5856
    }
5857
5858
    /**
5859
     * alias for "UTF8::str_starts_with()"
5860
     *
5861
     * @param string $haystack
5862
     * @param string $needle
5863
     *
5864
     * @psalm-pure
5865
     *
5866
     * @return bool
5867
     *
5868
     * @see        UTF8::str_starts_with()
5869
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5870
     */
5871
    public static function str_begins(string $haystack, string $needle): bool
5872
    {
5873 1
        return self::str_starts_with($haystack, $needle);
5874
    }
5875
5876
    /**
5877
     * Returns a camelCase version of the string. Trims surrounding spaces,
5878
     * capitalizes letters following digits, spaces, dashes and underscores,
5879
     * and removes spaces, dashes, as well as underscores.
5880
     *
5881
     * @param string      $str                           <p>The input string.</p>
5882
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5883
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5884
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5885
     *                                                   tr</p>
5886
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5887
     *                                                   -> ß</p>
5888
     *
5889
     * @psalm-pure
5890
     *
5891
     * @return string
5892
     */
5893
    public static function str_camelize(
5894
        string $str,
5895
        string $encoding = 'UTF-8',
5896
        bool $clean_utf8 = false,
5897
        string $lang = null,
5898
        bool $try_to_keep_the_string_length = false
5899
    ): string {
5900 32
        if ($clean_utf8) {
5901
            $str = self::clean($str);
5902
        }
5903
5904 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5905 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5906
        }
5907
5908 32
        $str = self::lcfirst(
5909 32
            \trim($str),
5910 32
            $encoding,
5911 32
            false,
5912 32
            $lang,
5913 32
            $try_to_keep_the_string_length
5914
        );
5915 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5916
5917 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5918
5919 32
        $str = (string) \preg_replace_callback(
5920 32
            '/[-_\\s]+(.)?/u',
5921
            /**
5922
             * @param array $match
5923
             *
5924
             * @psalm-pure
5925
             *
5926
             * @return string
5927
             */
5928
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5929 27
                if (isset($match[1])) {
5930 27
                    if ($use_mb_functions) {
5931 27
                        if ($encoding === 'UTF-8') {
5932 27
                            return \mb_strtoupper($match[1]);
5933
                        }
5934
5935
                        return \mb_strtoupper($match[1], $encoding);
5936
                    }
5937
5938
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5939
                }
5940
5941 1
                return '';
5942 32
            },
5943 32
            $str
5944
        );
5945
5946 32
        return (string) \preg_replace_callback(
5947 32
            '/[\\p{N}]+(.)?/u',
5948
            /**
5949
             * @param array $match
5950
             *
5951
             * @psalm-pure
5952
             *
5953
             * @return string
5954
             */
5955
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5956 6
                if ($use_mb_functions) {
5957 6
                    if ($encoding === 'UTF-8') {
5958 6
                        return \mb_strtoupper($match[0]);
5959
                    }
5960
5961
                    return \mb_strtoupper($match[0], $encoding);
5962
                }
5963
5964
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5965 32
            },
5966 32
            $str
5967
        );
5968
    }
5969
5970
    /**
5971
     * Returns the string with the first letter of each word capitalized,
5972
     * except for when the word is a name which shouldn't be capitalized.
5973
     *
5974
     * @param string $str
5975
     *
5976
     * @psalm-pure
5977
     *
5978
     * @return string
5979
     *                <p>A string with $str capitalized.</p>
5980
     */
5981
    public static function str_capitalize_name(string $str): string
5982
    {
5983 1
        return self::str_capitalize_name_helper(
5984 1
            self::str_capitalize_name_helper(
5985 1
                self::collapse_whitespace($str),
5986 1
                ' '
5987
            ),
5988 1
            '-'
5989
        );
5990
    }
5991
5992
    /**
5993
     * Returns true if the string contains $needle, false otherwise. By default
5994
     * the comparison is case-sensitive, but can be made insensitive by setting
5995
     * $case_sensitive to false.
5996
     *
5997
     * @param string $haystack       <p>The input string.</p>
5998
     * @param string $needle         <p>Substring to look for.</p>
5999
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6000
     *
6001
     * @psalm-pure
6002
     *
6003
     * @return bool
6004
     *              <p>Whether or not $haystack contains $needle.</p>
6005
     */
6006
    public static function str_contains(
6007
        string $haystack,
6008
        string $needle,
6009
        bool $case_sensitive = true
6010
    ): bool {
6011 21
        if ($case_sensitive) {
6012 11
            return \strpos($haystack, $needle) !== false;
6013
        }
6014
6015 10
        return \mb_stripos($haystack, $needle) !== false;
6016
    }
6017
6018
    /**
6019
     * Returns true if the string contains all $needles, false otherwise. By
6020
     * default the comparison is case-sensitive, but can be made insensitive by
6021
     * setting $case_sensitive to false.
6022
     *
6023
     * @param string $haystack       <p>The input string.</p>
6024
     * @param array  $needles        <p>SubStrings to look for.</p>
6025
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6026
     *
6027
     * @psalm-pure
6028
     *
6029
     * @return bool
6030
     *              <p>Whether or not $haystack contains $needle.</p>
6031
     */
6032
    public static function str_contains_all(
6033
        string $haystack,
6034
        array $needles,
6035
        bool $case_sensitive = true
6036
    ): bool {
6037 45
        if ($haystack === '' || $needles === []) {
6038 1
            return false;
6039
        }
6040
6041
        /** @noinspection LoopWhichDoesNotLoopInspection */
6042 44
        foreach ($needles as &$needle) {
6043 44
            if ($case_sensitive) {
6044
                /** @noinspection NestedPositiveIfStatementsInspection */
6045 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6046 12
                    return false;
6047
                }
6048
            }
6049
6050 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6051 33
                return false;
6052
            }
6053
        }
6054
6055 24
        return true;
6056
    }
6057
6058
    /**
6059
     * Returns true if the string contains any $needles, false otherwise. By
6060
     * default the comparison is case-sensitive, but can be made insensitive by
6061
     * setting $case_sensitive to false.
6062
     *
6063
     * @param string $haystack       <p>The input string.</p>
6064
     * @param array  $needles        <p>SubStrings to look for.</p>
6065
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6066
     *
6067
     * @psalm-pure
6068
     *
6069
     * @return bool
6070
     *              <p>Whether or not $str contains $needle.</p>
6071
     */
6072
    public static function str_contains_any(
6073
        string $haystack,
6074
        array $needles,
6075
        bool $case_sensitive = true
6076
    ): bool {
6077 46
        if ($haystack === '' || $needles === []) {
6078 1
            return false;
6079
        }
6080
6081
        /** @noinspection LoopWhichDoesNotLoopInspection */
6082 45
        foreach ($needles as &$needle) {
6083 45
            if (!$needle) {
6084
                continue;
6085
            }
6086
6087 45
            if ($case_sensitive) {
6088 25
                if (\strpos($haystack, $needle) !== false) {
6089 14
                    return true;
6090
                }
6091
6092 13
                continue;
6093
            }
6094
6095 20
            if (\mb_stripos($haystack, $needle) !== false) {
6096 20
                return true;
6097
            }
6098
        }
6099
6100 19
        return false;
6101
    }
6102
6103
    /**
6104
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6105
     * inserted before uppercase characters (with the exception of the first
6106
     * character of the string), and in place of spaces as well as underscores.
6107
     *
6108
     * @param string $str      <p>The input string.</p>
6109
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6110
     *
6111
     * @psalm-pure
6112
     *
6113
     * @return string
6114
     */
6115
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6116
    {
6117 19
        return self::str_delimit($str, '-', $encoding);
6118
    }
6119
6120
    /**
6121
     * Returns a lowercase and trimmed string separated by the given delimiter.
6122
     * Delimiters are inserted before uppercase characters (with the exception
6123
     * of the first character of the string), and in place of spaces, dashes,
6124
     * and underscores. Alpha delimiters are not converted to lowercase.
6125
     *
6126
     * @param string      $str                           <p>The input string.</p>
6127
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6128
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6129
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6130
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6131
     *                                                   tr</p>
6132
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6133
     *                                                   ß</p>
6134
     *
6135
     * @psalm-pure
6136
     *
6137
     * @return string
6138
     */
6139
    public static function str_delimit(
6140
        string $str,
6141
        string $delimiter,
6142
        string $encoding = 'UTF-8',
6143
        bool $clean_utf8 = false,
6144
        string $lang = null,
6145
        bool $try_to_keep_the_string_length = false
6146
    ): string {
6147 49
        if (self::$SUPPORT['mbstring'] === true) {
6148
            /** @noinspection PhpComposerExtensionStubsInspection */
6149 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6150
6151 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6152 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6153 22
                $str = \mb_strtolower($str);
6154
            } else {
6155 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6156
            }
6157
6158
            /** @noinspection PhpComposerExtensionStubsInspection */
6159 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6160
        }
6161
6162
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6163
6164
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6165
        if ($use_mb_functions && $encoding === 'UTF-8') {
6166
            $str = \mb_strtolower($str);
6167
        } else {
6168
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6169
        }
6170
6171
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6172
    }
6173
6174
    /**
6175
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6176
     *
6177
     * EXAMPLE: <code>
6178
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6179
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6180
     * </code>
6181
     *
6182
     * @param string $str <p>The input string.</p>
6183
     *
6184
     * @psalm-pure
6185
     *
6186
     * @return false|string
6187
     *                      <p>
6188
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6189
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6190
     *                      </p>
6191
     */
6192
    public static function str_detect_encoding($str)
6193
    {
6194
        // init
6195 31
        $str = (string) $str;
6196
6197
        //
6198
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6199
        //
6200
6201 31
        if (self::is_binary($str, true)) {
6202 11
            $is_utf32 = self::is_utf32($str, false);
6203 11
            if ($is_utf32 === 1) {
6204
                return 'UTF-32LE';
6205
            }
6206 11
            if ($is_utf32 === 2) {
6207 1
                return 'UTF-32BE';
6208
            }
6209
6210 11
            $is_utf16 = self::is_utf16($str, false);
6211 11
            if ($is_utf16 === 1) {
6212 3
                return 'UTF-16LE';
6213
            }
6214 11
            if ($is_utf16 === 2) {
6215 2
                return 'UTF-16BE';
6216
            }
6217
6218
            // is binary but not "UTF-16" or "UTF-32"
6219 9
            return false;
6220
        }
6221
6222
        //
6223
        // 2.) simple check for ASCII chars
6224
        //
6225
6226 27
        if (ASCII::is_ascii($str)) {
6227 10
            return 'ASCII';
6228
        }
6229
6230
        //
6231
        // 3.) simple check for UTF-8 chars
6232
        //
6233
6234 27
        if (self::is_utf8_string($str)) {
6235 19
            return 'UTF-8';
6236
        }
6237
6238
        //
6239
        // 4.) check via "mb_detect_encoding()"
6240
        //
6241
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6242
6243
        $encoding_detecting_order = [
6244 16
            'ISO-8859-1',
6245
            'ISO-8859-2',
6246
            'ISO-8859-3',
6247
            'ISO-8859-4',
6248
            'ISO-8859-5',
6249
            'ISO-8859-6',
6250
            'ISO-8859-7',
6251
            'ISO-8859-8',
6252
            'ISO-8859-9',
6253
            'ISO-8859-10',
6254
            'ISO-8859-13',
6255
            'ISO-8859-14',
6256
            'ISO-8859-15',
6257
            'ISO-8859-16',
6258
            'WINDOWS-1251',
6259
            'WINDOWS-1252',
6260
            'WINDOWS-1254',
6261
            'CP932',
6262
            'CP936',
6263
            'CP950',
6264
            'CP866',
6265
            'CP850',
6266
            'CP51932',
6267
            'CP50220',
6268
            'CP50221',
6269
            'CP50222',
6270
            'ISO-2022-JP',
6271
            'ISO-2022-KR',
6272
            'JIS',
6273
            'JIS-ms',
6274
            'EUC-CN',
6275
            'EUC-JP',
6276
        ];
6277
6278 16
        if (self::$SUPPORT['mbstring'] === true) {
6279
            // info: do not use the symfony polyfill here
6280 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6281 16
            if ($encoding) {
6282 16
                return $encoding;
6283
            }
6284
        }
6285
6286
        //
6287
        // 5.) check via "iconv()"
6288
        //
6289
6290
        if (self::$ENCODINGS === null) {
6291
            self::$ENCODINGS = self::getData('encodings');
6292
        }
6293
6294
        foreach (self::$ENCODINGS as $encoding_tmp) {
6295
            // INFO: //IGNORE but still throw notice
6296
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6297
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6298
                return $encoding_tmp;
6299
            }
6300
        }
6301
6302
        return false;
6303
    }
6304
6305
    /**
6306
     * alias for "UTF8::str_ends_with()"
6307
     *
6308
     * @param string $haystack
6309
     * @param string $needle
6310
     *
6311
     * @psalm-pure
6312
     *
6313
     * @return bool
6314
     *
6315
     * @see        UTF8::str_ends_with()
6316
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6317
     */
6318
    public static function str_ends(string $haystack, string $needle): bool
6319
    {
6320 1
        return self::str_ends_with($haystack, $needle);
6321
    }
6322
6323
    /**
6324
     * Check if the string ends with the given substring.
6325
     *
6326
     * EXAMPLE: <code>
6327
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6328
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6329
     * </code>
6330
     *
6331
     * @param string $haystack <p>The string to search in.</p>
6332
     * @param string $needle   <p>The substring to search for.</p>
6333
     *
6334
     * @psalm-pure
6335
     *
6336
     * @return bool
6337
     */
6338
    public static function str_ends_with(string $haystack, string $needle): bool
6339
    {
6340 9
        if ($needle === '') {
6341 2
            return true;
6342
        }
6343
6344 9
        if ($haystack === '') {
6345
            return false;
6346
        }
6347
6348 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6349
    }
6350
6351
    /**
6352
     * Returns true if the string ends with any of $substrings, false otherwise.
6353
     *
6354
     * - case-sensitive
6355
     *
6356
     * @param string   $str        <p>The input string.</p>
6357
     * @param string[] $substrings <p>Substrings to look for.</p>
6358
     *
6359
     * @psalm-pure
6360
     *
6361
     * @return bool
6362
     *              <p>Whether or not $str ends with $substring.</p>
6363
     */
6364
    public static function str_ends_with_any(string $str, array $substrings): bool
6365
    {
6366 7
        if ($substrings === []) {
6367
            return false;
6368
        }
6369
6370 7
        foreach ($substrings as &$substring) {
6371 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6372 7
                return true;
6373
            }
6374
        }
6375
6376 6
        return false;
6377
    }
6378
6379
    /**
6380
     * Ensures that the string begins with $substring. If it doesn't, it's
6381
     * prepended.
6382
     *
6383
     * @param string $str       <p>The input string.</p>
6384
     * @param string $substring <p>The substring to add if not present.</p>
6385
     *
6386
     * @psalm-pure
6387
     *
6388
     * @return string
6389
     */
6390
    public static function str_ensure_left(string $str, string $substring): string
6391
    {
6392
        if (
6393 10
            $substring !== ''
6394
            &&
6395 10
            \strpos($str, $substring) === 0
6396
        ) {
6397 6
            return $str;
6398
        }
6399
6400 4
        return $substring . $str;
6401
    }
6402
6403
    /**
6404
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6405
     *
6406
     * @param string $str       <p>The input string.</p>
6407
     * @param string $substring <p>The substring to add if not present.</p>
6408
     *
6409
     * @psalm-pure
6410
     *
6411
     * @return string
6412
     */
6413
    public static function str_ensure_right(string $str, string $substring): string
6414
    {
6415
        if (
6416 10
            $str === ''
6417
            ||
6418 10
            $substring === ''
6419
            ||
6420 10
            \substr($str, -\strlen($substring)) !== $substring
6421
        ) {
6422 4
            $str .= $substring;
6423
        }
6424
6425 10
        return $str;
6426
    }
6427
6428
    /**
6429
     * Capitalizes the first word of the string, replaces underscores with
6430
     * spaces, and strips '_id'.
6431
     *
6432
     * @param string $str
6433
     *
6434
     * @psalm-pure
6435
     *
6436
     * @return string
6437
     */
6438
    public static function str_humanize($str): string
6439
    {
6440 3
        $str = \str_replace(
6441
            [
6442 3
                '_id',
6443
                '_',
6444
            ],
6445
            [
6446 3
                '',
6447
                ' ',
6448
            ],
6449 3
            $str
6450
        );
6451
6452 3
        return self::ucfirst(\trim($str));
6453
    }
6454
6455
    /**
6456
     * alias for "UTF8::str_istarts_with()"
6457
     *
6458
     * @param string $haystack
6459
     * @param string $needle
6460
     *
6461
     * @psalm-pure
6462
     *
6463
     * @return bool
6464
     *
6465
     * @see        UTF8::str_istarts_with()
6466
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6467
     */
6468
    public static function str_ibegins(string $haystack, string $needle): bool
6469
    {
6470 1
        return self::str_istarts_with($haystack, $needle);
6471
    }
6472
6473
    /**
6474
     * alias for "UTF8::str_iends_with()"
6475
     *
6476
     * @param string $haystack
6477
     * @param string $needle
6478
     *
6479
     * @psalm-pure
6480
     *
6481
     * @return bool
6482
     *
6483
     * @see        UTF8::str_iends_with()
6484
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6485
     */
6486
    public static function str_iends(string $haystack, string $needle): bool
6487
    {
6488 1
        return self::str_iends_with($haystack, $needle);
6489
    }
6490
6491
    /**
6492
     * Check if the string ends with the given substring, case-insensitive.
6493
     *
6494
     * EXAMPLE: <code>
6495
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6496
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6497
     * </code>
6498
     *
6499
     * @param string $haystack <p>The string to search in.</p>
6500
     * @param string $needle   <p>The substring to search for.</p>
6501
     *
6502
     * @psalm-pure
6503
     *
6504
     * @return bool
6505
     */
6506
    public static function str_iends_with(string $haystack, string $needle): bool
6507
    {
6508 12
        if ($needle === '') {
6509 2
            return true;
6510
        }
6511
6512 12
        if ($haystack === '') {
6513
            return false;
6514
        }
6515
6516 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6517
    }
6518
6519
    /**
6520
     * Returns true if the string ends with any of $substrings, false otherwise.
6521
     *
6522
     * - case-insensitive
6523
     *
6524
     * @param string   $str        <p>The input string.</p>
6525
     * @param string[] $substrings <p>Substrings to look for.</p>
6526
     *
6527
     * @psalm-pure
6528
     *
6529
     * @return bool
6530
     *              <p>Whether or not $str ends with $substring.</p>
6531
     */
6532
    public static function str_iends_with_any(string $str, array $substrings): bool
6533
    {
6534 4
        if ($substrings === []) {
6535
            return false;
6536
        }
6537
6538 4
        foreach ($substrings as &$substring) {
6539 4
            if (self::str_iends_with($str, $substring)) {
6540 4
                return true;
6541
            }
6542
        }
6543
6544
        return false;
6545
    }
6546
6547
    /**
6548
     * Returns the index of the first occurrence of $needle in the string,
6549
     * and false if not found. Accepts an optional offset from which to begin
6550
     * the search.
6551
     *
6552
     * @param string $str      <p>The input string.</p>
6553
     * @param string $needle   <p>Substring to look for.</p>
6554
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6555
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6556
     *
6557
     * @psalm-pure
6558
     *
6559
     * @return false|int
6560
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6561
     *
6562
     * @see        UTF8::stripos()
6563
     * @deprecated <p>please use "UTF8::stripos()"</p>
6564
     */
6565
    public static function str_iindex_first(
6566
        string $str,
6567
        string $needle,
6568
        int $offset = 0,
6569
        string $encoding = 'UTF-8'
6570
    ) {
6571 1
        return self::stripos(
6572 1
            $str,
6573 1
            $needle,
6574 1
            $offset,
6575 1
            $encoding
6576
        );
6577
    }
6578
6579
    /**
6580
     * Returns the index of the last occurrence of $needle in the string,
6581
     * and false if not found. Accepts an optional offset from which to begin
6582
     * the search. Offsets may be negative to count from the last character
6583
     * in the string.
6584
     *
6585
     * @param string $str      <p>The input string.</p>
6586
     * @param string $needle   <p>Substring to look for.</p>
6587
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6588
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6589
     *
6590
     * @psalm-pure
6591
     *
6592
     * @return false|int
6593
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6594
     *
6595
     * @see        UTF8::strripos()
6596
     * @deprecated <p>please use "UTF8::strripos()"</p>
6597
     */
6598
    public static function str_iindex_last(
6599
        string $str,
6600
        string $needle,
6601
        int $offset = 0,
6602
        string $encoding = 'UTF-8'
6603
    ) {
6604 10
        return self::strripos(
6605 10
            $str,
6606 10
            $needle,
6607 10
            $offset,
6608 10
            $encoding
6609
        );
6610
    }
6611
6612
    /**
6613
     * Returns the index of the first occurrence of $needle in the string,
6614
     * and false if not found. Accepts an optional offset from which to begin
6615
     * the search.
6616
     *
6617
     * @param string $str      <p>The input string.</p>
6618
     * @param string $needle   <p>Substring to look for.</p>
6619
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6620
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6621
     *
6622
     * @psalm-pure
6623
     *
6624
     * @return false|int
6625
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6626
     *
6627
     * @see        UTF8::strpos()
6628
     * @deprecated <p>please use "UTF8::strpos()"</p>
6629
     */
6630
    public static function str_index_first(
6631
        string $str,
6632
        string $needle,
6633
        int $offset = 0,
6634
        string $encoding = 'UTF-8'
6635
    ) {
6636 11
        return self::strpos(
6637 11
            $str,
6638 11
            $needle,
6639 11
            $offset,
6640 11
            $encoding
6641
        );
6642
    }
6643
6644
    /**
6645
     * Returns the index of the last occurrence of $needle in the string,
6646
     * and false if not found. Accepts an optional offset from which to begin
6647
     * the search. Offsets may be negative to count from the last character
6648
     * in the string.
6649
     *
6650
     * @param string $str      <p>The input string.</p>
6651
     * @param string $needle   <p>Substring to look for.</p>
6652
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6653
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6654
     *
6655
     * @psalm-pure
6656
     *
6657
     * @return false|int
6658
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6659
     *
6660
     * @see        UTF8::strrpos()
6661
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6662
     */
6663
    public static function str_index_last(
6664
        string $str,
6665
        string $needle,
6666
        int $offset = 0,
6667
        string $encoding = 'UTF-8'
6668
    ) {
6669 10
        return self::strrpos(
6670 10
            $str,
6671 10
            $needle,
6672 10
            $offset,
6673 10
            $encoding
6674
        );
6675
    }
6676
6677
    /**
6678
     * Inserts $substring into the string at the $index provided.
6679
     *
6680
     * @param string $str       <p>The input string.</p>
6681
     * @param string $substring <p>String to be inserted.</p>
6682
     * @param int    $index     <p>The index at which to insert the substring.</p>
6683
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6684
     *
6685
     * @psalm-pure
6686
     *
6687
     * @return string
6688
     */
6689
    public static function str_insert(
6690
        string $str,
6691
        string $substring,
6692
        int $index,
6693
        string $encoding = 'UTF-8'
6694
    ): string {
6695 8
        if ($encoding === 'UTF-8') {
6696 4
            $len = (int) \mb_strlen($str);
6697 4
            if ($index > $len) {
6698
                return $str;
6699
            }
6700
6701
            /** @noinspection UnnecessaryCastingInspection */
6702 4
            return (string) \mb_substr($str, 0, $index) .
6703 4
                   $substring .
6704 4
                   (string) \mb_substr($str, $index, $len);
6705
        }
6706
6707 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6708
6709 4
        $len = (int) self::strlen($str, $encoding);
6710 4
        if ($index > $len) {
6711 1
            return $str;
6712
        }
6713
6714 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6715 3
               $substring .
6716 3
               ((string) self::substr($str, $index, $len, $encoding));
6717
    }
6718
6719
    /**
6720
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6721
     *
6722
     * EXAMPLE: <code>
6723
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6724
     * </code>
6725
     *
6726
     * @see http://php.net/manual/en/function.str-ireplace.php
6727
     *
6728
     * @param string|string[] $search      <p>
6729
     *                                     Every replacement with search array is
6730
     *                                     performed on the result of previous replacement.
6731
     *                                     </p>
6732
     * @param string|string[] $replacement <p>The replacement.</p>
6733
     * @param string|string[] $subject     <p>
6734
     *                                     If subject is an array, then the search and
6735
     *                                     replace is performed with every entry of
6736
     *                                     subject, and the return value is an array as
6737
     *                                     well.
6738
     *                                     </p>
6739
     * @param int             $count       [optional] <p>
6740
     *                                     The number of matched and replaced needles will
6741
     *                                     be returned in count which is passed by
6742
     *                                     reference.
6743
     *                                     </p>
6744
     *
6745
     * @psalm-pure
6746
     *
6747
     * @return string|string[]
6748
     *                         <p>A string or an array of replacements.</p>
6749
     *
6750
     * @template TStrIReplaceSubject
6751
     * @psalm-param TStrIReplaceSubject $subject
6752
     * @psalm-return TStrIReplaceSubject
6753
     */
6754
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6755
    {
6756 29
        $search = (array) $search;
6757
6758
        /** @noinspection AlterInForeachInspection */
6759 29
        foreach ($search as &$s) {
6760 29
            $s = (string) $s;
6761 29
            if ($s === '') {
6762 6
                $s = '/^(?<=.)$/';
6763
            } else {
6764 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6765
            }
6766
        }
6767
6768
        /**
6769
         * @psalm-suppress PossiblyNullArgument
6770
         * @psalm-var TStrIReplaceSubject $subject
6771
         */
6772 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6773
6774 29
        return $subject;
6775
    }
6776
6777
    /**
6778
     * Replaces $search from the beginning of string with $replacement.
6779
     *
6780
     * @param string $str         <p>The input string.</p>
6781
     * @param string $search      <p>The string to search for.</p>
6782
     * @param string $replacement <p>The replacement.</p>
6783
     *
6784
     * @psalm-pure
6785
     *
6786
     * @return string
6787
     *                <p>The string after the replacement.</p>
6788
     */
6789
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6790
    {
6791 17
        if ($str === '') {
6792 4
            if ($replacement === '') {
6793 2
                return '';
6794
            }
6795
6796 2
            if ($search === '') {
6797 2
                return $replacement;
6798
            }
6799
        }
6800
6801 13
        if ($search === '') {
6802 2
            return $str . $replacement;
6803
        }
6804
6805 11
        if (\stripos($str, $search) === 0) {
6806 10
            return $replacement . \substr($str, \strlen($search));
6807
        }
6808
6809 1
        return $str;
6810
    }
6811
6812
    /**
6813
     * Replaces $search from the ending of string with $replacement.
6814
     *
6815
     * @param string $str         <p>The input string.</p>
6816
     * @param string $search      <p>The string to search for.</p>
6817
     * @param string $replacement <p>The replacement.</p>
6818
     *
6819
     * @psalm-pure
6820
     *
6821
     * @return string
6822
     *                <p>The string after the replacement.</p>
6823
     */
6824
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6825
    {
6826 17
        if ($str === '') {
6827 4
            if ($replacement === '') {
6828 2
                return '';
6829
            }
6830
6831 2
            if ($search === '') {
6832 2
                return $replacement;
6833
            }
6834
        }
6835
6836 13
        if ($search === '') {
6837 2
            return $str . $replacement;
6838
        }
6839
6840 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6841 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6842
        }
6843
6844 11
        return $str;
6845
    }
6846
6847
    /**
6848
     * Check if the string starts with the given substring, case-insensitive.
6849
     *
6850
     * EXAMPLE: <code>
6851
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6852
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6853
     * </code>
6854
     *
6855
     * @param string $haystack <p>The string to search in.</p>
6856
     * @param string $needle   <p>The substring to search for.</p>
6857
     *
6858
     * @psalm-pure
6859
     *
6860
     * @return bool
6861
     */
6862
    public static function str_istarts_with(string $haystack, string $needle): bool
6863
    {
6864 13
        if ($needle === '') {
6865 2
            return true;
6866
        }
6867
6868 13
        if ($haystack === '') {
6869
            return false;
6870
        }
6871
6872 13
        return self::stripos($haystack, $needle) === 0;
6873
    }
6874
6875
    /**
6876
     * Returns true if the string begins with any of $substrings, false otherwise.
6877
     *
6878
     * - case-insensitive
6879
     *
6880
     * @param string $str        <p>The input string.</p>
6881
     * @param array  $substrings <p>Substrings to look for.</p>
6882
     *
6883
     * @psalm-pure
6884
     *
6885
     * @return bool
6886
     *              <p>Whether or not $str starts with $substring.</p>
6887
     */
6888
    public static function str_istarts_with_any(string $str, array $substrings): bool
6889
    {
6890 5
        if ($str === '') {
6891
            return false;
6892
        }
6893
6894 5
        if ($substrings === []) {
6895
            return false;
6896
        }
6897
6898 5
        foreach ($substrings as &$substring) {
6899 5
            if (self::str_istarts_with($str, $substring)) {
6900 5
                return true;
6901
            }
6902
        }
6903
6904 1
        return false;
6905
    }
6906
6907
    /**
6908
     * Gets the substring after the first occurrence of a separator.
6909
     *
6910
     * @param string $str       <p>The input string.</p>
6911
     * @param string $separator <p>The string separator.</p>
6912
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6913
     *
6914
     * @psalm-pure
6915
     *
6916
     * @return string
6917
     */
6918
    public static function str_isubstr_after_first_separator(
6919
        string $str,
6920
        string $separator,
6921
        string $encoding = 'UTF-8'
6922
    ): string {
6923 1
        if ($separator === '' || $str === '') {
6924 1
            return '';
6925
        }
6926
6927 1
        $offset = self::stripos($str, $separator);
6928 1
        if ($offset === false) {
6929 1
            return '';
6930
        }
6931
6932 1
        if ($encoding === 'UTF-8') {
6933 1
            return (string) \mb_substr(
6934 1
                $str,
6935 1
                $offset + (int) \mb_strlen($separator)
6936
            );
6937
        }
6938
6939
        return (string) self::substr(
6940
            $str,
6941
            $offset + (int) self::strlen($separator, $encoding),
6942
            null,
6943
            $encoding
6944
        );
6945
    }
6946
6947
    /**
6948
     * Gets the substring after the last occurrence of a separator.
6949
     *
6950
     * @param string $str       <p>The input string.</p>
6951
     * @param string $separator <p>The string separator.</p>
6952
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6953
     *
6954
     * @psalm-pure
6955
     *
6956
     * @return string
6957
     */
6958
    public static function str_isubstr_after_last_separator(
6959
        string $str,
6960
        string $separator,
6961
        string $encoding = 'UTF-8'
6962
    ): string {
6963 1
        if ($separator === '' || $str === '') {
6964 1
            return '';
6965
        }
6966
6967 1
        $offset = self::strripos($str, $separator);
6968 1
        if ($offset === false) {
6969 1
            return '';
6970
        }
6971
6972 1
        if ($encoding === 'UTF-8') {
6973 1
            return (string) \mb_substr(
6974 1
                $str,
6975 1
                $offset + (int) self::strlen($separator)
6976
            );
6977
        }
6978
6979
        return (string) self::substr(
6980
            $str,
6981
            $offset + (int) self::strlen($separator, $encoding),
6982
            null,
6983
            $encoding
6984
        );
6985
    }
6986
6987
    /**
6988
     * Gets the substring before the first occurrence of a separator.
6989
     *
6990
     * @param string $str       <p>The input string.</p>
6991
     * @param string $separator <p>The string separator.</p>
6992
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6993
     *
6994
     * @psalm-pure
6995
     *
6996
     * @return string
6997
     */
6998
    public static function str_isubstr_before_first_separator(
6999
        string $str,
7000
        string $separator,
7001
        string $encoding = 'UTF-8'
7002
    ): string {
7003 1
        if ($separator === '' || $str === '') {
7004 1
            return '';
7005
        }
7006
7007 1
        $offset = self::stripos($str, $separator);
7008 1
        if ($offset === false) {
7009 1
            return '';
7010
        }
7011
7012 1
        if ($encoding === 'UTF-8') {
7013 1
            return (string) \mb_substr($str, 0, $offset);
7014
        }
7015
7016
        return (string) self::substr($str, 0, $offset, $encoding);
7017
    }
7018
7019
    /**
7020
     * Gets the substring before the last occurrence of a separator.
7021
     *
7022
     * @param string $str       <p>The input string.</p>
7023
     * @param string $separator <p>The string separator.</p>
7024
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7025
     *
7026
     * @psalm-pure
7027
     *
7028
     * @return string
7029
     */
7030
    public static function str_isubstr_before_last_separator(
7031
        string $str,
7032
        string $separator,
7033
        string $encoding = 'UTF-8'
7034
    ): string {
7035 1
        if ($separator === '' || $str === '') {
7036 1
            return '';
7037
        }
7038
7039 1
        if ($encoding === 'UTF-8') {
7040 1
            $offset = \mb_strripos($str, $separator);
7041 1
            if ($offset === false) {
7042 1
                return '';
7043
            }
7044
7045 1
            return (string) \mb_substr($str, 0, $offset);
7046
        }
7047
7048
        $offset = self::strripos($str, $separator, 0, $encoding);
7049
        if ($offset === false) {
7050
            return '';
7051
        }
7052
7053
        return (string) self::substr($str, 0, $offset, $encoding);
7054
    }
7055
7056
    /**
7057
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7058
     *
7059
     * @param string $str           <p>The input string.</p>
7060
     * @param string $needle        <p>The string to look for.</p>
7061
     * @param bool   $before_needle [optional] <p>Default: false</p>
7062
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7063
     *
7064
     * @psalm-pure
7065
     *
7066
     * @return string
7067
     */
7068
    public static function str_isubstr_first(
7069
        string $str,
7070
        string $needle,
7071
        bool $before_needle = false,
7072
        string $encoding = 'UTF-8'
7073
    ): string {
7074
        if (
7075 2
            $needle === ''
7076
            ||
7077 2
            $str === ''
7078
        ) {
7079 2
            return '';
7080
        }
7081
7082 2
        $part = self::stristr(
7083 2
            $str,
7084 2
            $needle,
7085 2
            $before_needle,
7086 2
            $encoding
7087
        );
7088 2
        if ($part === false) {
7089 2
            return '';
7090
        }
7091
7092 2
        return $part;
7093
    }
7094
7095
    /**
7096
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7097
     *
7098
     * @param string $str           <p>The input string.</p>
7099
     * @param string $needle        <p>The string to look for.</p>
7100
     * @param bool   $before_needle [optional] <p>Default: false</p>
7101
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7102
     *
7103
     * @psalm-pure
7104
     *
7105
     * @return string
7106
     */
7107
    public static function str_isubstr_last(
7108
        string $str,
7109
        string $needle,
7110
        bool $before_needle = false,
7111
        string $encoding = 'UTF-8'
7112
    ): string {
7113
        if (
7114 1
            $needle === ''
7115
            ||
7116 1
            $str === ''
7117
        ) {
7118 1
            return '';
7119
        }
7120
7121 1
        $part = self::strrichr(
7122 1
            $str,
7123 1
            $needle,
7124 1
            $before_needle,
7125 1
            $encoding
7126
        );
7127 1
        if ($part === false) {
7128 1
            return '';
7129
        }
7130
7131 1
        return $part;
7132
    }
7133
7134
    /**
7135
     * Returns the last $n characters of the string.
7136
     *
7137
     * @param string $str      <p>The input string.</p>
7138
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7139
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7140
     *
7141
     * @psalm-pure
7142
     *
7143
     * @return string
7144
     */
7145
    public static function str_last_char(
7146
        string $str,
7147
        int $n = 1,
7148
        string $encoding = 'UTF-8'
7149
    ): string {
7150 12
        if ($str === '' || $n <= 0) {
7151 4
            return '';
7152
        }
7153
7154 8
        if ($encoding === 'UTF-8') {
7155 4
            return (string) \mb_substr($str, -$n);
7156
        }
7157
7158 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7159
7160 4
        return (string) self::substr($str, -$n, null, $encoding);
7161
    }
7162
7163
    /**
7164
     * Limit the number of characters in a string.
7165
     *
7166
     * @param string $str        <p>The input string.</p>
7167
     * @param int    $length     [optional] <p>Default: 100</p>
7168
     * @param string $str_add_on [optional] <p>Default: …</p>
7169
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7170
     *
7171
     * @psalm-pure
7172
     *
7173
     * @return string
7174
     */
7175
    public static function str_limit(
7176
        string $str,
7177
        int $length = 100,
7178
        string $str_add_on = '…',
7179
        string $encoding = 'UTF-8'
7180
    ): string {
7181 2
        if ($str === '' || $length <= 0) {
7182 2
            return '';
7183
        }
7184
7185 2
        if ($encoding === 'UTF-8') {
7186 2
            if ((int) \mb_strlen($str) <= $length) {
7187 2
                return $str;
7188
            }
7189
7190
            /** @noinspection UnnecessaryCastingInspection */
7191 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7192
        }
7193
7194
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7195
7196
        if ((int) self::strlen($str, $encoding) <= $length) {
7197
            return $str;
7198
        }
7199
7200
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7201
    }
7202
7203
    /**
7204
     * Limit the number of characters in a string, but also after the next word.
7205
     *
7206
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7207
     *
7208
     * @param string $str        <p>The input string.</p>
7209
     * @param int    $length     [optional] <p>Default: 100</p>
7210
     * @param string $str_add_on [optional] <p>Default: …</p>
7211
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7212
     *
7213
     * @psalm-pure
7214
     *
7215
     * @return string
7216
     */
7217
    public static function str_limit_after_word(
7218
        string $str,
7219
        int $length = 100,
7220
        string $str_add_on = '…',
7221
        string $encoding = 'UTF-8'
7222
    ): string {
7223 6
        if ($str === '' || $length <= 0) {
7224 2
            return '';
7225
        }
7226
7227 6
        if ($encoding === 'UTF-8') {
7228
            /** @noinspection UnnecessaryCastingInspection */
7229 2
            if ((int) \mb_strlen($str) <= $length) {
7230 2
                return $str;
7231
            }
7232
7233 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7234 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7235
            }
7236
7237 2
            $str = \mb_substr($str, 0, $length);
7238
7239 2
            $array = \explode(' ', $str, -1);
7240 2
            $new_str = \implode(' ', $array);
7241
7242 2
            if ($new_str === '') {
7243 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7244
            }
7245
        } else {
7246 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7247
                return $str;
7248
            }
7249
7250 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7251 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7252
            }
7253
7254
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7255 1
            $str = self::substr($str, 0, $length, $encoding);
7256
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7257 1
            if ($str === false) {
7258
                return '' . $str_add_on;
7259
            }
7260
7261 1
            $array = \explode(' ', $str, -1);
7262 1
            $new_str = \implode(' ', $array);
7263
7264 1
            if ($new_str === '') {
7265
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7266
            }
7267
        }
7268
7269 3
        return $new_str . $str_add_on;
7270
    }
7271
7272
    /**
7273
     * Returns the longest common prefix between the $str1 and $str2.
7274
     *
7275
     * @param string $str1     <p>The input sting.</p>
7276
     * @param string $str2     <p>Second string for comparison.</p>
7277
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7278
     *
7279
     * @psalm-pure
7280
     *
7281
     * @return string
7282
     */
7283
    public static function str_longest_common_prefix(
7284
        string $str1,
7285
        string $str2,
7286
        string $encoding = 'UTF-8'
7287
    ): string {
7288
        // init
7289 10
        $longest_common_prefix = '';
7290
7291 10
        if ($encoding === 'UTF-8') {
7292 5
            $max_length = (int) \min(
7293 5
                \mb_strlen($str1),
7294 5
                \mb_strlen($str2)
7295
            );
7296
7297 5
            for ($i = 0; $i < $max_length; ++$i) {
7298 4
                $char = \mb_substr($str1, $i, 1);
7299
7300
                if (
7301 4
                    $char !== false
7302
                    &&
7303 4
                    $char === \mb_substr($str2, $i, 1)
7304
                ) {
7305 3
                    $longest_common_prefix .= $char;
7306
                } else {
7307 3
                    break;
7308
                }
7309
            }
7310
        } else {
7311 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7312
7313 5
            $max_length = (int) \min(
7314 5
                self::strlen($str1, $encoding),
7315 5
                self::strlen($str2, $encoding)
7316
            );
7317
7318 5
            for ($i = 0; $i < $max_length; ++$i) {
7319 4
                $char = self::substr($str1, $i, 1, $encoding);
7320
7321
                if (
7322 4
                    $char !== false
7323
                    &&
7324 4
                    $char === self::substr($str2, $i, 1, $encoding)
7325
                ) {
7326 3
                    $longest_common_prefix .= $char;
7327
                } else {
7328 3
                    break;
7329
                }
7330
            }
7331
        }
7332
7333 10
        return $longest_common_prefix;
7334
    }
7335
7336
    /**
7337
     * Returns the longest common substring between the $str1 and $str2.
7338
     * In the case of ties, it returns that which occurs first.
7339
     *
7340
     * @param string $str1
7341
     * @param string $str2     <p>Second string for comparison.</p>
7342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7343
     *
7344
     * @psalm-pure
7345
     *
7346
     * @return string
7347
     *                <p>A string with its $str being the longest common substring.</p>
7348
     */
7349
    public static function str_longest_common_substring(
7350
        string $str1,
7351
        string $str2,
7352
        string $encoding = 'UTF-8'
7353
    ): string {
7354 11
        if ($str1 === '' || $str2 === '') {
7355 2
            return '';
7356
        }
7357
7358
        // Uses dynamic programming to solve
7359
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7360
7361 9
        if ($encoding === 'UTF-8') {
7362 4
            $str_length = (int) \mb_strlen($str1);
7363 4
            $other_length = (int) \mb_strlen($str2);
7364
        } else {
7365 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7366
7367 5
            $str_length = (int) self::strlen($str1, $encoding);
7368 5
            $other_length = (int) self::strlen($str2, $encoding);
7369
        }
7370
7371
        // Return if either string is empty
7372 9
        if ($str_length === 0 || $other_length === 0) {
7373
            return '';
7374
        }
7375
7376 9
        $len = 0;
7377 9
        $end = 0;
7378 9
        $table = \array_fill(
7379 9
            0,
7380 9
            $str_length + 1,
7381 9
            \array_fill(0, $other_length + 1, 0)
7382
        );
7383
7384 9
        if ($encoding === 'UTF-8') {
7385 9
            for ($i = 1; $i <= $str_length; ++$i) {
7386 9
                for ($j = 1; $j <= $other_length; ++$j) {
7387 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7388 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7389
7390 9
                    if ($str_char === $other_char) {
7391 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7392 8
                        if ($table[$i][$j] > $len) {
7393 8
                            $len = $table[$i][$j];
7394 8
                            $end = $i;
7395
                        }
7396
                    } else {
7397 9
                        $table[$i][$j] = 0;
7398
                    }
7399
                }
7400
            }
7401
        } else {
7402
            for ($i = 1; $i <= $str_length; ++$i) {
7403
                for ($j = 1; $j <= $other_length; ++$j) {
7404
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7405
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7406
7407
                    if ($str_char === $other_char) {
7408
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7409
                        if ($table[$i][$j] > $len) {
7410
                            $len = $table[$i][$j];
7411
                            $end = $i;
7412
                        }
7413
                    } else {
7414
                        $table[$i][$j] = 0;
7415
                    }
7416
                }
7417
            }
7418
        }
7419
7420 9
        if ($encoding === 'UTF-8') {
7421 9
            return (string) \mb_substr($str1, $end - $len, $len);
7422
        }
7423
7424
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7425
    }
7426
7427
    /**
7428
     * Returns the longest common suffix between the $str1 and $str2.
7429
     *
7430
     * @param string $str1
7431
     * @param string $str2     <p>Second string for comparison.</p>
7432
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7433
     *
7434
     * @psalm-pure
7435
     *
7436
     * @return string
7437
     */
7438
    public static function str_longest_common_suffix(
7439
        string $str1,
7440
        string $str2,
7441
        string $encoding = 'UTF-8'
7442
    ): string {
7443 10
        if ($str1 === '' || $str2 === '') {
7444 2
            return '';
7445
        }
7446
7447 8
        if ($encoding === 'UTF-8') {
7448 4
            $max_length = (int) \min(
7449 4
                \mb_strlen($str1, $encoding),
7450 4
                \mb_strlen($str2, $encoding)
7451
            );
7452
7453 4
            $longest_common_suffix = '';
7454 4
            for ($i = 1; $i <= $max_length; ++$i) {
7455 4
                $char = \mb_substr($str1, -$i, 1);
7456
7457
                if (
7458 4
                    $char !== false
7459
                    &&
7460 4
                    $char === \mb_substr($str2, -$i, 1)
7461
                ) {
7462 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7463
                } else {
7464 3
                    break;
7465
                }
7466
            }
7467
        } else {
7468 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7469
7470 4
            $max_length = (int) \min(
7471 4
                self::strlen($str1, $encoding),
7472 4
                self::strlen($str2, $encoding)
7473
            );
7474
7475 4
            $longest_common_suffix = '';
7476 4
            for ($i = 1; $i <= $max_length; ++$i) {
7477 4
                $char = self::substr($str1, -$i, 1, $encoding);
7478
7479
                if (
7480 4
                    $char !== false
7481
                    &&
7482 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7483
                ) {
7484 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7485
                } else {
7486 3
                    break;
7487
                }
7488
            }
7489
        }
7490
7491 8
        return $longest_common_suffix;
7492
    }
7493
7494
    /**
7495
     * Returns true if $str matches the supplied pattern, false otherwise.
7496
     *
7497
     * @param string $str     <p>The input string.</p>
7498
     * @param string $pattern <p>Regex pattern to match against.</p>
7499
     *
7500
     * @psalm-pure
7501
     *
7502
     * @return bool
7503
     *              <p>Whether or not $str matches the pattern.</p>
7504
     */
7505
    public static function str_matches_pattern(string $str, string $pattern): bool
7506
    {
7507 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7508
    }
7509
7510
    /**
7511
     * Returns whether or not a character exists at an index. Offsets may be
7512
     * negative to count from the last character in the string. Implements
7513
     * part of the ArrayAccess interface.
7514
     *
7515
     * @param string $str      <p>The input string.</p>
7516
     * @param int    $offset   <p>The index to check.</p>
7517
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7518
     *
7519
     * @psalm-pure
7520
     *
7521
     * @return bool
7522
     *              <p>Whether or not the index exists.</p>
7523
     */
7524
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7525
    {
7526
        // init
7527 6
        $length = (int) self::strlen($str, $encoding);
7528
7529 6
        if ($offset >= 0) {
7530 3
            return $length > $offset;
7531
        }
7532
7533 3
        return $length >= \abs($offset);
7534
    }
7535
7536
    /**
7537
     * Returns the character at the given index. Offsets may be negative to
7538
     * count from the last character in the string. Implements part of the
7539
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7540
     * does not exist.
7541
     *
7542
     * @param string $str      <p>The input string.</p>
7543
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7544
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7545
     *
7546
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7547
     *
7548
     * @return string
7549
     *                <p>The character at the specified index.</p>
7550
     *
7551
     * @psalm-pure
7552
     */
7553
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7554
    {
7555
        // init
7556 2
        $length = (int) self::strlen($str);
7557
7558
        if (
7559 2
            ($index >= 0 && $length <= $index)
7560
            ||
7561 2
            $length < \abs($index)
7562
        ) {
7563 1
            throw new \OutOfBoundsException('No character exists at the index');
7564
        }
7565
7566 1
        return self::char_at($str, $index, $encoding);
7567
    }
7568
7569
    /**
7570
     * Pad a UTF-8 string to a given length with another string.
7571
     *
7572
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7573
     *
7574
     * @param string     $str        <p>The input string.</p>
7575
     * @param int        $pad_length <p>The length of return string.</p>
7576
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7577
     * @param int|string $pad_type   [optional] <p>
7578
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7579
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7580
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7581
     *                               </p>
7582
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7583
     *
7584
     * @psalm-pure
7585
     *
7586
     * @return string
7587
     *                <p>Returns the padded string.</p>
7588
     */
7589
    public static function str_pad(
7590
        string $str,
7591
        int $pad_length,
7592
        string $pad_string = ' ',
7593
        $pad_type = \STR_PAD_RIGHT,
7594
        string $encoding = 'UTF-8'
7595
    ): string {
7596 41
        if ($pad_length === 0 || $pad_string === '') {
7597 1
            return $str;
7598
        }
7599
7600 41
        if ($pad_type !== (int) $pad_type) {
7601 13
            if ($pad_type === 'left') {
7602 3
                $pad_type = \STR_PAD_LEFT;
7603 10
            } elseif ($pad_type === 'right') {
7604 6
                $pad_type = \STR_PAD_RIGHT;
7605 4
            } elseif ($pad_type === 'both') {
7606 3
                $pad_type = \STR_PAD_BOTH;
7607
            } else {
7608 1
                throw new \InvalidArgumentException(
7609 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7610
                );
7611
            }
7612
        }
7613
7614 40
        if ($encoding === 'UTF-8') {
7615 25
            $str_length = (int) \mb_strlen($str);
7616
7617 25
            if ($pad_length >= $str_length) {
7618
                switch ($pad_type) {
7619 25
                    case \STR_PAD_LEFT:
7620 8
                        $ps_length = (int) \mb_strlen($pad_string);
7621
7622 8
                        $diff = ($pad_length - $str_length);
7623
7624 8
                        $pre = (string) \mb_substr(
7625 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7626 8
                            0,
7627 8
                            $diff
7628
                        );
7629 8
                        $post = '';
7630
7631 8
                        break;
7632
7633 20
                    case \STR_PAD_BOTH:
7634 14
                        $diff = ($pad_length - $str_length);
7635
7636 14
                        $ps_length_left = (int) \floor($diff / 2);
7637
7638 14
                        $ps_length_right = (int) \ceil($diff / 2);
7639
7640 14
                        $pre = (string) \mb_substr(
7641 14
                            \str_repeat($pad_string, $ps_length_left),
7642 14
                            0,
7643 14
                            $ps_length_left
7644
                        );
7645 14
                        $post = (string) \mb_substr(
7646 14
                            \str_repeat($pad_string, $ps_length_right),
7647 14
                            0,
7648 14
                            $ps_length_right
7649
                        );
7650
7651 14
                        break;
7652
7653 9
                    case \STR_PAD_RIGHT:
7654
                    default:
7655 9
                        $ps_length = (int) \mb_strlen($pad_string);
7656
7657 9
                        $diff = ($pad_length - $str_length);
7658
7659 9
                        $post = (string) \mb_substr(
7660 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7661 9
                            0,
7662 9
                            $diff
7663
                        );
7664 9
                        $pre = '';
7665
                }
7666
7667 25
                return $pre . $str . $post;
7668
            }
7669
7670 3
            return $str;
7671
        }
7672
7673 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7674
7675 15
        $str_length = (int) self::strlen($str, $encoding);
7676
7677 15
        if ($pad_length >= $str_length) {
7678
            switch ($pad_type) {
7679 14
                case \STR_PAD_LEFT:
7680 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7681
7682 5
                    $diff = ($pad_length - $str_length);
7683
7684 5
                    $pre = (string) self::substr(
7685 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7686 5
                        0,
7687 5
                        $diff,
7688 5
                        $encoding
7689
                    );
7690 5
                    $post = '';
7691
7692 5
                    break;
7693
7694 9
                case \STR_PAD_BOTH:
7695 3
                    $diff = ($pad_length - $str_length);
7696
7697 3
                    $ps_length_left = (int) \floor($diff / 2);
7698
7699 3
                    $ps_length_right = (int) \ceil($diff / 2);
7700
7701 3
                    $pre = (string) self::substr(
7702 3
                        \str_repeat($pad_string, $ps_length_left),
7703 3
                        0,
7704 3
                        $ps_length_left,
7705 3
                        $encoding
7706
                    );
7707 3
                    $post = (string) self::substr(
7708 3
                        \str_repeat($pad_string, $ps_length_right),
7709 3
                        0,
7710 3
                        $ps_length_right,
7711 3
                        $encoding
7712
                    );
7713
7714 3
                    break;
7715
7716 6
                case \STR_PAD_RIGHT:
7717
                default:
7718 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7719
7720 6
                    $diff = ($pad_length - $str_length);
7721
7722 6
                    $post = (string) self::substr(
7723 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7724 6
                        0,
7725 6
                        $diff,
7726 6
                        $encoding
7727
                    );
7728 6
                    $pre = '';
7729
            }
7730
7731 14
            return $pre . $str . $post;
7732
        }
7733
7734 1
        return $str;
7735
    }
7736
7737
    /**
7738
     * Returns a new string of a given length such that both sides of the
7739
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7740
     *
7741
     * @param string $str
7742
     * @param int    $length   <p>Desired string length after padding.</p>
7743
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7745
     *
7746
     * @psalm-pure
7747
     *
7748
     * @return string
7749
     *                <p>The string with padding applied.</p>
7750
     */
7751
    public static function str_pad_both(
7752
        string $str,
7753
        int $length,
7754
        string $pad_str = ' ',
7755
        string $encoding = 'UTF-8'
7756
    ): string {
7757 11
        return self::str_pad(
7758 11
            $str,
7759 11
            $length,
7760 11
            $pad_str,
7761 11
            \STR_PAD_BOTH,
7762 11
            $encoding
7763
        );
7764
    }
7765
7766
    /**
7767
     * Returns a new string of a given length such that the beginning of the
7768
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7769
     *
7770
     * @param string $str
7771
     * @param int    $length   <p>Desired string length after padding.</p>
7772
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7773
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7774
     *
7775
     * @psalm-pure
7776
     *
7777
     * @return string
7778
     *                <p>The string with left padding.</p>
7779
     */
7780
    public static function str_pad_left(
7781
        string $str,
7782
        int $length,
7783
        string $pad_str = ' ',
7784
        string $encoding = 'UTF-8'
7785
    ): string {
7786 7
        return self::str_pad(
7787 7
            $str,
7788 7
            $length,
7789 7
            $pad_str,
7790 7
            \STR_PAD_LEFT,
7791 7
            $encoding
7792
        );
7793
    }
7794
7795
    /**
7796
     * Returns a new string of a given length such that the end of the string
7797
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7798
     *
7799
     * @param string $str
7800
     * @param int    $length   <p>Desired string length after padding.</p>
7801
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7802
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7803
     *
7804
     * @psalm-pure
7805
     *
7806
     * @return string
7807
     *                <p>The string with right padding.</p>
7808
     */
7809
    public static function str_pad_right(
7810
        string $str,
7811
        int $length,
7812
        string $pad_str = ' ',
7813
        string $encoding = 'UTF-8'
7814
    ): string {
7815 7
        return self::str_pad(
7816 7
            $str,
7817 7
            $length,
7818 7
            $pad_str,
7819 7
            \STR_PAD_RIGHT,
7820 7
            $encoding
7821
        );
7822
    }
7823
7824
    /**
7825
     * Repeat a string.
7826
     *
7827
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7828
     *
7829
     * @param string $str        <p>
7830
     *                           The string to be repeated.
7831
     *                           </p>
7832
     * @param int    $multiplier <p>
7833
     *                           Number of time the input string should be
7834
     *                           repeated.
7835
     *                           </p>
7836
     *                           <p>
7837
     *                           multiplier has to be greater than or equal to 0.
7838
     *                           If the multiplier is set to 0, the function
7839
     *                           will return an empty string.
7840
     *                           </p>
7841
     *
7842
     * @psalm-pure
7843
     *
7844
     * @return string
7845
     *                <p>The repeated string.</p>
7846
     */
7847
    public static function str_repeat(string $str, int $multiplier): string
7848
    {
7849 9
        $str = self::filter($str);
7850
7851 9
        return \str_repeat($str, $multiplier);
7852
    }
7853
7854
    /**
7855
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7856
     *
7857
     * Replace all occurrences of the search string with the replacement string
7858
     *
7859
     * @see http://php.net/manual/en/function.str-replace.php
7860
     *
7861
     * @param string|string[] $search  <p>
7862
     *                                 The value being searched for, otherwise known as the needle.
7863
     *                                 An array may be used to designate multiple needles.
7864
     *                                 </p>
7865
     * @param string|string[] $replace <p>
7866
     *                                 The replacement value that replaces found search
7867
     *                                 values. An array may be used to designate multiple replacements.
7868
     *                                 </p>
7869
     * @param string|string[] $subject <p>
7870
     *                                 The string or array of strings being searched and replaced on,
7871
     *                                 otherwise known as the haystack.
7872
     *                                 </p>
7873
     *                                 <p>
7874
     *                                 If subject is an array, then the search and
7875
     *                                 replace is performed with every entry of
7876
     *                                 subject, and the return value is an array as
7877
     *                                 well.
7878
     *                                 </p>
7879
     * @param int|null        $count   [optional] <p>
7880
     *                                 If passed, this will hold the number of matched and replaced needles.
7881
     *                                 </p>
7882
     *
7883
     * @psalm-pure
7884
     *
7885
     * @return string|string[]
7886
     *                         <p>This function returns a string or an array with the replaced values.</p>
7887
     *
7888
     * @template TStrReplaceSubject
7889
     * @psalm-param TStrReplaceSubject $subject
7890
     * @psalm-return TStrReplaceSubject
7891
     *
7892
     * @deprecated please use \str_replace() instead
7893
     */
7894
    public static function str_replace(
7895
        $search,
7896
        $replace,
7897
        $subject,
7898
        int &$count = null
7899
    ) {
7900
        /**
7901
         * @psalm-suppress PossiblyNullArgument
7902
         * @psalm-var TStrReplaceSubject $return;
7903
         */
7904 12
        $return = \str_replace(
7905 12
            $search,
7906 12
            $replace,
7907 12
            $subject,
7908 12
            $count
7909
        );
7910
7911 12
        return $return;
7912
    }
7913
7914
    /**
7915
     * Replaces $search from the beginning of string with $replacement.
7916
     *
7917
     * @param string $str         <p>The input string.</p>
7918
     * @param string $search      <p>The string to search for.</p>
7919
     * @param string $replacement <p>The replacement.</p>
7920
     *
7921
     * @psalm-pure
7922
     *
7923
     * @return string
7924
     *                <p>A string after the replacements.</p>
7925
     */
7926
    public static function str_replace_beginning(
7927
        string $str,
7928
        string $search,
7929
        string $replacement
7930
    ): string {
7931 17
        if ($str === '') {
7932 4
            if ($replacement === '') {
7933 2
                return '';
7934
            }
7935
7936 2
            if ($search === '') {
7937 2
                return $replacement;
7938
            }
7939
        }
7940
7941 13
        if ($search === '') {
7942 2
            return $str . $replacement;
7943
        }
7944
7945 11
        if (\strpos($str, $search) === 0) {
7946 9
            return $replacement . \substr($str, \strlen($search));
7947
        }
7948
7949 2
        return $str;
7950
    }
7951
7952
    /**
7953
     * Replaces $search from the ending of string with $replacement.
7954
     *
7955
     * @param string $str         <p>The input string.</p>
7956
     * @param string $search      <p>The string to search for.</p>
7957
     * @param string $replacement <p>The replacement.</p>
7958
     *
7959
     * @psalm-pure
7960
     *
7961
     * @return string
7962
     *                <p>A string after the replacements.</p>
7963
     */
7964
    public static function str_replace_ending(
7965
        string $str,
7966
        string $search,
7967
        string $replacement
7968
    ): string {
7969 17
        if ($str === '') {
7970 4
            if ($replacement === '') {
7971 2
                return '';
7972
            }
7973
7974 2
            if ($search === '') {
7975 2
                return $replacement;
7976
            }
7977
        }
7978
7979 13
        if ($search === '') {
7980 2
            return $str . $replacement;
7981
        }
7982
7983 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7984 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7985
        }
7986
7987 11
        return $str;
7988
    }
7989
7990
    /**
7991
     * Replace the first "$search"-term with the "$replace"-term.
7992
     *
7993
     * @param string $search
7994
     * @param string $replace
7995
     * @param string $subject
7996
     *
7997
     * @psalm-pure
7998
     *
7999
     * @return string
8000
     *
8001
     * @psalm-suppress InvalidReturnType
8002
     */
8003
    public static function str_replace_first(
8004
        string $search,
8005
        string $replace,
8006
        string $subject
8007
    ): string {
8008 2
        $pos = self::strpos($subject, $search);
8009
8010 2
        if ($pos !== false) {
8011
            /**
8012
             * @psalm-suppress InvalidReturnStatement
8013
             */
8014 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8015 2
                $subject,
8016 2
                $replace,
8017 2
                $pos,
8018 2
                (int) self::strlen($search)
8019
            );
8020
        }
8021
8022 2
        return $subject;
8023
    }
8024
8025
    /**
8026
     * Replace the last "$search"-term with the "$replace"-term.
8027
     *
8028
     * @param string $search
8029
     * @param string $replace
8030
     * @param string $subject
8031
     *
8032
     * @psalm-pure
8033
     *
8034
     * @return string
8035
     *
8036
     * @psalm-suppress InvalidReturnType
8037
     */
8038
    public static function str_replace_last(
8039
        string $search,
8040
        string $replace,
8041
        string $subject
8042
    ): string {
8043 2
        $pos = self::strrpos($subject, $search);
8044 2
        if ($pos !== false) {
8045
            /**
8046
             * @psalm-suppress InvalidReturnStatement
8047
             */
8048 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8049 2
                $subject,
8050 2
                $replace,
8051 2
                $pos,
8052 2
                (int) self::strlen($search)
8053
            );
8054
        }
8055
8056 2
        return $subject;
8057
    }
8058
8059
    /**
8060
     * Shuffles all the characters in the string.
8061
     *
8062
     * INFO: uses random algorithm which is weak for cryptography purposes
8063
     *
8064
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8065
     *
8066
     * @param string $str      <p>The input string</p>
8067
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8068
     *
8069
     * @return string
8070
     *                <p>The shuffled string.</p>
8071
     */
8072
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8073
    {
8074 5
        if ($encoding === 'UTF-8') {
8075 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8076
            /** @noinspection NonSecureShuffleUsageInspection */
8077 5
            \shuffle($indexes);
8078
8079
            // init
8080 5
            $shuffled_str = '';
8081
8082 5
            foreach ($indexes as &$i) {
8083 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8084 5
                if ($tmp_sub_str !== false) {
8085 5
                    $shuffled_str .= $tmp_sub_str;
8086
                }
8087
            }
8088
        } else {
8089
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8090
8091
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8092
            /** @noinspection NonSecureShuffleUsageInspection */
8093
            \shuffle($indexes);
8094
8095
            // init
8096
            $shuffled_str = '';
8097
8098
            foreach ($indexes as &$i) {
8099
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8100
                if ($tmp_sub_str !== false) {
8101
                    $shuffled_str .= $tmp_sub_str;
8102
                }
8103
            }
8104
        }
8105
8106 5
        return $shuffled_str;
8107
    }
8108
8109
    /**
8110
     * Returns the substring beginning at $start, and up to, but not including
8111
     * the index specified by $end. If $end is omitted, the function extracts
8112
     * the remaining string. If $end is negative, it is computed from the end
8113
     * of the string.
8114
     *
8115
     * @param string   $str
8116
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8117
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8118
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8119
     *
8120
     * @psalm-pure
8121
     *
8122
     * @return false|string
8123
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8124
     *                      characters long, <b>FALSE</b> will be returned.
8125
     */
8126
    public static function str_slice(
8127
        string $str,
8128
        int $start,
8129
        int $end = null,
8130
        string $encoding = 'UTF-8'
8131
    ) {
8132 18
        if ($encoding === 'UTF-8') {
8133 7
            if ($end === null) {
8134 1
                $length = (int) \mb_strlen($str);
8135 6
            } elseif ($end >= 0 && $end <= $start) {
8136 2
                return '';
8137 4
            } elseif ($end < 0) {
8138 1
                $length = (int) \mb_strlen($str) + $end - $start;
8139
            } else {
8140 3
                $length = $end - $start;
8141
            }
8142
8143 5
            return \mb_substr($str, $start, $length);
8144
        }
8145
8146 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8147
8148 11
        if ($end === null) {
8149 5
            $length = (int) self::strlen($str, $encoding);
8150 6
        } elseif ($end >= 0 && $end <= $start) {
8151 2
            return '';
8152 4
        } elseif ($end < 0) {
8153 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8154
        } else {
8155 3
            $length = $end - $start;
8156
        }
8157
8158 9
        return self::substr($str, $start, $length, $encoding);
8159
    }
8160
8161
    /**
8162
     * Convert a string to e.g.: "snake_case"
8163
     *
8164
     * @param string $str
8165
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8166
     *
8167
     * @psalm-pure
8168
     *
8169
     * @return string
8170
     *                <p>A string in snake_case.</p>
8171
     */
8172
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8173
    {
8174 22
        if ($str === '') {
8175
            return '';
8176
        }
8177
8178 22
        $str = \str_replace(
8179 22
            '-',
8180 22
            '_',
8181 22
            self::normalize_whitespace($str)
8182
        );
8183
8184 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8185 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8186
        }
8187
8188 22
        $str = (string) \preg_replace_callback(
8189 22
            '/([\\p{N}|\\p{Lu}])/u',
8190
            /**
8191
             * @param string[] $matches
8192
             *
8193
             * @psalm-pure
8194
             *
8195
             * @return string
8196
             */
8197
            static function (array $matches) use ($encoding): string {
8198 9
                $match = $matches[1];
8199 9
                $match_int = (int) $match;
8200
8201 9
                if ((string) $match_int === $match) {
8202 4
                    return '_' . $match . '_';
8203
                }
8204
8205 5
                if ($encoding === 'UTF-8') {
8206 5
                    return '_' . \mb_strtolower($match);
8207
                }
8208
8209
                return '_' . self::strtolower($match, $encoding);
8210 22
            },
8211 22
            $str
8212
        );
8213
8214 22
        $str = (string) \preg_replace(
8215
            [
8216 22
                '/\\s+/u',           // convert spaces to "_"
8217
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8218
                '/_+/',                 // remove double "_"
8219
            ],
8220
            [
8221 22
                '_',
8222
                '',
8223
                '_',
8224
            ],
8225 22
            $str
8226
        );
8227
8228 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8229
    }
8230
8231
    /**
8232
     * Sort all characters according to code points.
8233
     *
8234
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8235
     *
8236
     * @param string $str    <p>A UTF-8 string.</p>
8237
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8238
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8239
     *
8240
     * @psalm-pure
8241
     *
8242
     * @return string
8243
     *                <p>A string of sorted characters.</p>
8244
     */
8245
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8246
    {
8247 2
        $array = self::codepoints($str);
8248
8249 2
        if ($unique) {
8250 2
            $array = \array_flip(\array_flip($array));
8251
        }
8252
8253 2
        if ($desc) {
8254 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8254
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8255
        } else {
8256 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8256
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8257
        }
8258
8259 2
        return self::string($array);
8260
    }
8261
8262
    /**
8263
     * Convert a string to an array of Unicode characters.
8264
     *
8265
     * EXAMPLE: <code>
8266
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8267
     * </code>
8268
     *
8269
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8270
     * @param int            $length                  [optional] <p>Max character length of each array
8271
     *                                                lement.</p>
8272
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8273
     *                                                string.</p>
8274
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8275
     *                                                "mb_substr"</p>
8276
     *
8277
     * @psalm-pure
8278
     *
8279
     * @return string[][]
8280
     *                    <p>An array containing chunks of the input.</p>
8281
     */
8282
    public static function str_split_array(
8283
        array $input,
8284
        int $length = 1,
8285
        bool $clean_utf8 = false,
8286
        bool $try_to_use_mb_functions = true
8287
    ): array {
8288 1
        foreach ($input as $k => &$v) {
8289 1
            $v = self::str_split(
8290 1
                $v,
8291 1
                $length,
8292 1
                $clean_utf8,
8293 1
                $try_to_use_mb_functions
8294
            );
8295
        }
8296
8297
        /** @var string[][] $input */
8298 1
        return $input;
8299
    }
8300
8301
    /**
8302
     * Convert a string to an array of unicode characters.
8303
     *
8304
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8305
     *
8306
     * @param int|string $input                   <p>The string or int to split into array.</p>
8307
     * @param int        $length                  [optional] <p>Max character length of each array
8308
     *                                            element.</p>
8309
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8310
     *                                            string.</p>
8311
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8312
     *                                            "mb_substr"</p>
8313
     *
8314
     * @psalm-pure
8315
     *
8316
     * @return string[]
8317
     *                  <p>An array containing chunks of chars from the input.</p>
8318
     *
8319
     * @noinspection SuspiciousBinaryOperationInspection
8320
     * @noinspection OffsetOperationsInspection
8321
     */
8322
    public static function str_split(
8323
        $input,
8324
        int $length = 1,
8325
        bool $clean_utf8 = false,
8326
        bool $try_to_use_mb_functions = true
8327
    ): array {
8328 90
        if ($length <= 0) {
8329 3
            return [];
8330
        }
8331
8332
        // this is only an old fallback
8333
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8334
        /** @var int|int[]|string|string[] $input */
8335 89
        $input = $input;
8336 89
        if (\is_array($input)) {
8337
            /**
8338
             * @psalm-suppress InvalidReturnStatement
8339
             */
8340
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8341
                $input,
8342
                $length,
8343
                $clean_utf8,
8344
                $try_to_use_mb_functions
8345
            );
8346
        }
8347
8348
        // init
8349 89
        $input = (string) $input;
8350
8351 89
        if ($input === '') {
8352 14
            return [];
8353
        }
8354
8355 86
        if ($clean_utf8) {
8356 19
            $input = self::clean($input);
8357
        }
8358
8359
        if (
8360 86
            $try_to_use_mb_functions
8361
            &&
8362 86
            self::$SUPPORT['mbstring'] === true
8363
        ) {
8364 82
            if (\function_exists('mb_str_split')) {
8365
                /**
8366
                 * @psalm-suppress ImpureFunctionCall - why?
8367
                 */
8368 82
                $return = \mb_str_split($input, $length);
8369 82
                if ($return !== false) {
8370 82
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8371
                }
8372
            }
8373
8374
            $i_max = \mb_strlen($input);
8375
            if ($i_max <= 127) {
8376
                $ret = [];
8377
                for ($i = 0; $i < $i_max; ++$i) {
8378
                    $ret[] = \mb_substr($input, $i, 1);
8379
                }
8380
            } else {
8381
                $return_array = [];
8382
                \preg_match_all('/./us', $input, $return_array);
8383
                $ret = $return_array[0] ?? [];
8384
            }
8385 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8386 17
            $return_array = [];
8387 17
            \preg_match_all('/./us', $input, $return_array);
8388 17
            $ret = $return_array[0] ?? [];
8389
        } else {
8390
8391
            // fallback
8392
8393 8
            $ret = [];
8394 8
            $len = \strlen($input);
8395
8396
            /** @noinspection ForeachInvariantsInspection */
8397 8
            for ($i = 0; $i < $len; ++$i) {
8398 8
                if (($input[$i] & "\x80") === "\x00") {
8399 8
                    $ret[] = $input[$i];
8400
                } elseif (
8401 8
                    isset($input[$i + 1])
8402
                    &&
8403 8
                    ($input[$i] & "\xE0") === "\xC0"
8404
                ) {
8405 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8406 4
                        $ret[] = $input[$i] . $input[$i + 1];
8407
8408 4
                        ++$i;
8409
                    }
8410
                } elseif (
8411 6
                    isset($input[$i + 2])
8412
                    &&
8413 6
                    ($input[$i] & "\xF0") === "\xE0"
8414
                ) {
8415
                    if (
8416 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8417
                        &&
8418 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8419
                    ) {
8420 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8421
8422 6
                        $i += 2;
8423
                    }
8424
                } elseif (
8425
                    isset($input[$i + 3])
8426
                    &&
8427
                    ($input[$i] & "\xF8") === "\xF0"
8428
                ) {
8429
                    if (
8430
                        ($input[$i + 1] & "\xC0") === "\x80"
8431
                        &&
8432
                        ($input[$i + 2] & "\xC0") === "\x80"
8433
                        &&
8434
                        ($input[$i + 3] & "\xC0") === "\x80"
8435
                    ) {
8436
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8437
8438
                        $i += 3;
8439
                    }
8440
                }
8441
            }
8442
        }
8443
8444 23
        if ($length > 1) {
8445 2
            $ret = \array_chunk($ret, $length);
8446
8447 2
            return \array_map(
8448
                static function (array &$item): string {
8449 2
                    return \implode('', $item);
8450 2
                },
8451 2
                $ret
8452
            );
8453
        }
8454
8455 23
        if (isset($ret[0]) && $ret[0] === '') {
8456
            return [];
8457
        }
8458
8459 23
        return $ret;
8460
    }
8461
8462
    /**
8463
     * Splits the string with the provided regular expression, returning an
8464
     * array of strings. An optional integer $limit will truncate the
8465
     * results.
8466
     *
8467
     * @param string $str
8468
     * @param string $pattern <p>The regex with which to split the string.</p>
8469
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8470
     *
8471
     * @psalm-pure
8472
     *
8473
     * @return string[]
8474
     *                  <p>An array of strings.</p>
8475
     */
8476
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8477
    {
8478 16
        if ($limit === 0) {
8479 2
            return [];
8480
        }
8481
8482 14
        if ($pattern === '') {
8483 1
            return [$str];
8484
        }
8485
8486 13
        if (self::$SUPPORT['mbstring'] === true) {
8487 13
            if ($limit >= 0) {
8488
                /** @noinspection PhpComposerExtensionStubsInspection */
8489 8
                $result_tmp = \mb_split($pattern, $str);
8490
8491 8
                $result = [];
8492 8
                foreach ($result_tmp as $item_tmp) {
8493 8
                    if ($limit === 0) {
8494 4
                        break;
8495
                    }
8496 8
                    --$limit;
8497
8498 8
                    $result[] = $item_tmp;
8499
                }
8500
8501 8
                return $result;
8502
            }
8503
8504
            /** @noinspection PhpComposerExtensionStubsInspection */
8505 5
            return \mb_split($pattern, $str);
8506
        }
8507
8508
        if ($limit > 0) {
8509
            ++$limit;
8510
        } else {
8511
            $limit = -1;
8512
        }
8513
8514
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8515
8516
        if ($array === false) {
8517
            return [];
8518
        }
8519
8520
        if ($limit > 0 && \count($array) === $limit) {
8521
            \array_pop($array);
8522
        }
8523
8524
        return $array;
8525
    }
8526
8527
    /**
8528
     * Check if the string starts with the given substring.
8529
     *
8530
     * EXAMPLE: <code>
8531
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8532
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8533
     * </code>
8534
     *
8535
     * @param string $haystack <p>The string to search in.</p>
8536
     * @param string $needle   <p>The substring to search for.</p>
8537
     *
8538
     * @psalm-pure
8539
     *
8540
     * @return bool
8541
     */
8542
    public static function str_starts_with(string $haystack, string $needle): bool
8543
    {
8544 19
        if ($needle === '') {
8545 2
            return true;
8546
        }
8547
8548 19
        if ($haystack === '') {
8549
            return false;
8550
        }
8551
8552 19
        return \strpos($haystack, $needle) === 0;
8553
    }
8554
8555
    /**
8556
     * Returns true if the string begins with any of $substrings, false otherwise.
8557
     *
8558
     * - case-sensitive
8559
     *
8560
     * @param string $str        <p>The input string.</p>
8561
     * @param array  $substrings <p>Substrings to look for.</p>
8562
     *
8563
     * @psalm-pure
8564
     *
8565
     * @return bool
8566
     *              <p>Whether or not $str starts with $substring.</p>
8567
     */
8568
    public static function str_starts_with_any(string $str, array $substrings): bool
8569
    {
8570 8
        if ($str === '') {
8571
            return false;
8572
        }
8573
8574 8
        if ($substrings === []) {
8575
            return false;
8576
        }
8577
8578 8
        foreach ($substrings as &$substring) {
8579 8
            if (self::str_starts_with($str, $substring)) {
8580 8
                return true;
8581
            }
8582
        }
8583
8584 6
        return false;
8585
    }
8586
8587
    /**
8588
     * Gets the substring after the first occurrence of a separator.
8589
     *
8590
     * @param string $str       <p>The input string.</p>
8591
     * @param string $separator <p>The string separator.</p>
8592
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8593
     *
8594
     * @psalm-pure
8595
     *
8596
     * @return string
8597
     */
8598
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8599
    {
8600 1
        if ($separator === '' || $str === '') {
8601 1
            return '';
8602
        }
8603
8604 1
        if ($encoding === 'UTF-8') {
8605 1
            $offset = \mb_strpos($str, $separator);
8606 1
            if ($offset === false) {
8607 1
                return '';
8608
            }
8609
8610 1
            return (string) \mb_substr(
8611 1
                $str,
8612 1
                $offset + (int) \mb_strlen($separator)
8613
            );
8614
        }
8615
8616
        $offset = self::strpos($str, $separator, 0, $encoding);
8617
        if ($offset === false) {
8618
            return '';
8619
        }
8620
8621
        return (string) \mb_substr(
8622
            $str,
8623
            $offset + (int) self::strlen($separator, $encoding),
8624
            null,
8625
            $encoding
8626
        );
8627
    }
8628
8629
    /**
8630
     * Gets the substring after the last occurrence of a separator.
8631
     *
8632
     * @param string $str       <p>The input string.</p>
8633
     * @param string $separator <p>The string separator.</p>
8634
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8635
     *
8636
     * @psalm-pure
8637
     *
8638
     * @return string
8639
     */
8640
    public static function str_substr_after_last_separator(
8641
        string $str,
8642
        string $separator,
8643
        string $encoding = 'UTF-8'
8644
    ): string {
8645 1
        if ($separator === '' || $str === '') {
8646 1
            return '';
8647
        }
8648
8649 1
        if ($encoding === 'UTF-8') {
8650 1
            $offset = \mb_strrpos($str, $separator);
8651 1
            if ($offset === false) {
8652 1
                return '';
8653
            }
8654
8655 1
            return (string) \mb_substr(
8656 1
                $str,
8657 1
                $offset + (int) \mb_strlen($separator)
8658
            );
8659
        }
8660
8661
        $offset = self::strrpos($str, $separator, 0, $encoding);
8662
        if ($offset === false) {
8663
            return '';
8664
        }
8665
8666
        return (string) self::substr(
8667
            $str,
8668
            $offset + (int) self::strlen($separator, $encoding),
8669
            null,
8670
            $encoding
8671
        );
8672
    }
8673
8674
    /**
8675
     * Gets the substring before the first occurrence of a separator.
8676
     *
8677
     * @param string $str       <p>The input string.</p>
8678
     * @param string $separator <p>The string separator.</p>
8679
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8680
     *
8681
     * @psalm-pure
8682
     *
8683
     * @return string
8684
     */
8685
    public static function str_substr_before_first_separator(
8686
        string $str,
8687
        string $separator,
8688
        string $encoding = 'UTF-8'
8689
    ): string {
8690 1
        if ($separator === '' || $str === '') {
8691 1
            return '';
8692
        }
8693
8694 1
        if ($encoding === 'UTF-8') {
8695 1
            $offset = \mb_strpos($str, $separator);
8696 1
            if ($offset === false) {
8697 1
                return '';
8698
            }
8699
8700 1
            return (string) \mb_substr(
8701 1
                $str,
8702 1
                0,
8703 1
                $offset
8704
            );
8705
        }
8706
8707
        $offset = self::strpos($str, $separator, 0, $encoding);
8708
        if ($offset === false) {
8709
            return '';
8710
        }
8711
8712
        return (string) self::substr(
8713
            $str,
8714
            0,
8715
            $offset,
8716
            $encoding
8717
        );
8718
    }
8719
8720
    /**
8721
     * Gets the substring before the last occurrence of a separator.
8722
     *
8723
     * @param string $str       <p>The input string.</p>
8724
     * @param string $separator <p>The string separator.</p>
8725
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8726
     *
8727
     * @psalm-pure
8728
     *
8729
     * @return string
8730
     */
8731
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8732
    {
8733 1
        if ($separator === '' || $str === '') {
8734 1
            return '';
8735
        }
8736
8737 1
        if ($encoding === 'UTF-8') {
8738 1
            $offset = \mb_strrpos($str, $separator);
8739 1
            if ($offset === false) {
8740 1
                return '';
8741
            }
8742
8743 1
            return (string) \mb_substr(
8744 1
                $str,
8745 1
                0,
8746 1
                $offset
8747
            );
8748
        }
8749
8750
        $offset = self::strrpos($str, $separator, 0, $encoding);
8751
        if ($offset === false) {
8752
            return '';
8753
        }
8754
8755
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8756
8757
        return (string) self::substr(
8758
            $str,
8759
            0,
8760
            $offset,
8761
            $encoding
8762
        );
8763
    }
8764
8765
    /**
8766
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8767
     *
8768
     * @param string $str           <p>The input string.</p>
8769
     * @param string $needle        <p>The string to look for.</p>
8770
     * @param bool   $before_needle [optional] <p>Default: false</p>
8771
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8772
     *
8773
     * @psalm-pure
8774
     *
8775
     * @return string
8776
     */
8777
    public static function str_substr_first(
8778
        string $str,
8779
        string $needle,
8780
        bool $before_needle = false,
8781
        string $encoding = 'UTF-8'
8782
    ): string {
8783 2
        if ($str === '' || $needle === '') {
8784 2
            return '';
8785
        }
8786
8787 2
        if ($encoding === 'UTF-8') {
8788 2
            if ($before_needle) {
8789 1
                $part = \mb_strstr(
8790 1
                    $str,
8791 1
                    $needle,
8792 1
                    $before_needle
8793
                );
8794
            } else {
8795 1
                $part = \mb_strstr(
8796 1
                    $str,
8797 2
                    $needle
8798
                );
8799
            }
8800
        } else {
8801
            $part = self::strstr(
8802
                $str,
8803
                $needle,
8804
                $before_needle,
8805
                $encoding
8806
            );
8807
        }
8808
8809 2
        return $part === false ? '' : $part;
8810
    }
8811
8812
    /**
8813
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8814
     *
8815
     * @param string $str           <p>The input string.</p>
8816
     * @param string $needle        <p>The string to look for.</p>
8817
     * @param bool   $before_needle [optional] <p>Default: false</p>
8818
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8819
     *
8820
     * @psalm-pure
8821
     *
8822
     * @return string
8823
     */
8824
    public static function str_substr_last(
8825
        string $str,
8826
        string $needle,
8827
        bool $before_needle = false,
8828
        string $encoding = 'UTF-8'
8829
    ): string {
8830 2
        if ($str === '' || $needle === '') {
8831 2
            return '';
8832
        }
8833
8834 2
        if ($encoding === 'UTF-8') {
8835 2
            if ($before_needle) {
8836 1
                $part = \mb_strrchr(
8837 1
                    $str,
8838 1
                    $needle,
8839 1
                    $before_needle
8840
                );
8841
            } else {
8842 1
                $part = \mb_strrchr(
8843 1
                    $str,
8844 2
                    $needle
8845
                );
8846
            }
8847
        } else {
8848
            $part = self::strrchr(
8849
                $str,
8850
                $needle,
8851
                $before_needle,
8852
                $encoding
8853
            );
8854
        }
8855
8856 2
        return $part === false ? '' : $part;
8857
    }
8858
8859
    /**
8860
     * Surrounds $str with the given substring.
8861
     *
8862
     * @param string $str
8863
     * @param string $substring <p>The substring to add to both sides.</p>
8864
     *
8865
     * @psalm-pure
8866
     *
8867
     * @return string
8868
     *                <p>A string with the substring both prepended and appended.</p>
8869
     */
8870
    public static function str_surround(string $str, string $substring): string
8871
    {
8872 5
        return $substring . $str . $substring;
8873
    }
8874
8875
    /**
8876
     * Returns a trimmed string with the first letter of each word capitalized.
8877
     * Also accepts an array, $ignore, allowing you to list words not to be
8878
     * capitalized.
8879
     *
8880
     * @param string              $str
8881
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8882
     *                                                           null. Default: null</p>
8883
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8884
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8885
     *                                                           string.</p>
8886
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8887
     *                                                           el, lt, tr</p>
8888
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8889
     *                                                           e.g. ẞ -> ß</p>
8890
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8891
     *                                                           first</p>
8892
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8893
     *                                                           whitespace separator === words.</p>
8894
     *
8895
     * @psalm-pure
8896
     *
8897
     * @return string
8898
     *                <p>The titleized string.</p>
8899
     *
8900
     * @noinspection PhpTooManyParametersInspection
8901
     */
8902
    public static function str_titleize(
8903
        string $str,
8904
        array $ignore = null,
8905
        string $encoding = 'UTF-8',
8906
        bool $clean_utf8 = false,
8907
        string $lang = null,
8908
        bool $try_to_keep_the_string_length = false,
8909
        bool $use_trim_first = true,
8910
        string $word_define_chars = null
8911
    ): string {
8912 10
        if ($str === '') {
8913
            return '';
8914
        }
8915
8916 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8917 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8918
        }
8919
8920 10
        if ($use_trim_first) {
8921 10
            $str = \trim($str);
8922
        }
8923
8924 10
        if ($clean_utf8) {
8925
            $str = self::clean($str);
8926
        }
8927
8928 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8929
8930 10
        if ($word_define_chars) {
8931 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8932
        } else {
8933 6
            $word_define_chars = '';
8934
        }
8935
8936 10
        $str = (string) \preg_replace_callback(
8937 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8938
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8939 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8940 4
                    return $match[0];
8941
                }
8942
8943 10
                if ($use_mb_functions) {
8944 10
                    if ($encoding === 'UTF-8') {
8945 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8946 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8947
                    }
8948
8949
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8950
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8951
                }
8952
8953
                return self::ucfirst(
8954
                    self::strtolower(
8955
                        $match[0],
8956
                        $encoding,
8957
                        false,
8958
                        $lang,
8959
                        $try_to_keep_the_string_length
8960
                    ),
8961
                    $encoding,
8962
                    false,
8963
                    $lang,
8964
                    $try_to_keep_the_string_length
8965
                );
8966 10
            },
8967 10
            $str
8968
        );
8969
8970 10
        return $str;
8971
    }
8972
8973
    /**
8974
     * Convert a string into a obfuscate string.
8975
     *
8976
     * EXAMPLE: <code>
8977
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8978
     * </code>
8979
     *
8980
     * @param string   $str
8981
     * @param float    $percent
8982
     * @param string   $obfuscateChar
8983
     * @param string[] $keepChars
8984
     *
8985
     * @psalm-pure
8986
     *
8987
     * @return string
8988
     *                <p>The obfuscate string.</p>
8989
     */
8990
    public static function str_obfuscate(
8991
        string $str,
8992
        float $percent = 0.5,
8993
        string $obfuscateChar = '*',
8994
        array $keepChars = []
8995
    ): string {
8996 1
        $obfuscateCharHelper = "\u{2603}";
8997 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8998
8999 1
        $chars = self::chars($str);
9000 1
        $charsMax = \count($chars);
9001 1
        $charsMaxChange = \round($charsMax * $percent);
9002 1
        $charsCounter = 0;
9003 1
        $charKeyDone = [];
9004
9005 1
        while ($charsCounter < $charsMaxChange) {
9006 1
            foreach ($chars as $charKey => $char) {
9007 1
                if (isset($charKeyDone[$charKey])) {
9008 1
                    continue;
9009
                }
9010
9011 1
                if (\random_int(0, 100) > 50) {
9012 1
                    continue;
9013
                }
9014
9015 1
                if ($char === $obfuscateChar) {
9016
                    continue;
9017
                }
9018
9019 1
                ++$charsCounter;
9020 1
                $charKeyDone[$charKey] = true;
9021
9022 1
                if ($charsCounter > $charsMaxChange) {
9023
                    break;
9024
                }
9025
9026 1
                if (\in_array($char, $keepChars, true)) {
9027 1
                    continue;
9028
                }
9029
9030 1
                $chars[$charKey] = $obfuscateChar;
9031
            }
9032
        }
9033
9034 1
        $str = \implode('', $chars);
9035
9036 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9037
    }
9038
9039
    /**
9040
     * Returns a trimmed string in proper title case.
9041
     *
9042
     * Also accepts an array, $ignore, allowing you to list words not to be
9043
     * capitalized.
9044
     *
9045
     * Adapted from John Gruber's script.
9046
     *
9047
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9048
     *
9049
     * @param string $str
9050
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9051
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9052
     *
9053
     * @psalm-pure
9054
     *
9055
     * @return string
9056
     *                <p>The titleized string.</p>
9057
     */
9058
    public static function str_titleize_for_humans(
9059
        string $str,
9060
        array $ignore = [],
9061
        string $encoding = 'UTF-8'
9062
    ): string {
9063 35
        if ($str === '') {
9064
            return '';
9065
        }
9066
9067
        $small_words = [
9068 35
            '(?<!q&)a',
9069
            'an',
9070
            'and',
9071
            'as',
9072
            'at(?!&t)',
9073
            'but',
9074
            'by',
9075
            'en',
9076
            'for',
9077
            'if',
9078
            'in',
9079
            'of',
9080
            'on',
9081
            'or',
9082
            'the',
9083
            'to',
9084
            'v[.]?',
9085
            'via',
9086
            'vs[.]?',
9087
        ];
9088
9089 35
        if ($ignore !== []) {
9090 1
            $small_words = \array_merge($small_words, $ignore);
9091
        }
9092
9093 35
        $small_words_rx = \implode('|', $small_words);
9094 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9095
9096 35
        $str = \trim($str);
9097
9098 35
        if (!self::has_lowercase($str)) {
9099 2
            $str = self::strtolower($str, $encoding);
9100
        }
9101
9102
        // the main substitutions
9103
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9104 35
        $str = (string) \preg_replace_callback(
9105
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9106
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9107 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9108
                        |
9109 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9110
                        |
9111 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9112
                        |
9113 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9114
                      ) (_*) \\b                                                          # 6. With trailing underscore
9115
                    ~ux',
9116
            /**
9117
             * @param string[] $matches
9118
             *
9119
             * @psalm-pure
9120
             *
9121
             * @return string
9122
             */
9123
            static function (array $matches) use ($encoding): string {
9124
                // preserve leading underscore
9125 35
                $str = $matches[1];
9126 35
                if ($matches[2]) {
9127
                    // preserve URLs, domains, emails and file paths
9128 5
                    $str .= $matches[2];
9129 35
                } elseif ($matches[3]) {
9130
                    // lower-case small words
9131 25
                    $str .= self::strtolower($matches[3], $encoding);
9132 35
                } elseif ($matches[4]) {
9133
                    // capitalize word w/o internal caps
9134 34
                    $str .= static::ucfirst($matches[4], $encoding);
9135
                } else {
9136
                    // preserve other kinds of word (iPhone)
9137 7
                    $str .= $matches[5];
9138
                }
9139
                // preserve trailing underscore
9140 35
                $str .= $matches[6];
9141
9142 35
                return $str;
9143 35
            },
9144 35
            $str
9145
        );
9146
9147
        // Exceptions for small words: capitalize at start of title...
9148 35
        $str = (string) \preg_replace_callback(
9149
            '~(  \\A [[:punct:]]*            # start of title...
9150
                      |  [:.;?!][ ]+                # or of subsentence...
9151
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9152 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9153
                     ~uxi',
9154
            /**
9155
             * @param string[] $matches
9156
             *
9157
             * @psalm-pure
9158
             *
9159
             * @return string
9160
             */
9161
            static function (array $matches) use ($encoding): string {
9162 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9163 35
            },
9164 35
            $str
9165
        );
9166
9167
        // ...and end of title
9168 35
        $str = (string) \preg_replace_callback(
9169 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9170
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9171
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9172
                     ~uxi',
9173
            /**
9174
             * @param string[] $matches
9175
             *
9176
             * @psalm-pure
9177
             *
9178
             * @return string
9179
             */
9180
            static function (array $matches) use ($encoding): string {
9181 3
                return static::ucfirst($matches[1], $encoding);
9182 35
            },
9183 35
            $str
9184
        );
9185
9186
        // Exceptions for small words in hyphenated compound words.
9187
        // e.g. "in-flight" -> In-Flight
9188 35
        $str = (string) \preg_replace_callback(
9189
            '~\\b
9190
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9191 35
                        ( ' . $small_words_rx . ' )
9192
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9193
                       ~uxi',
9194
            /**
9195
             * @param string[] $matches
9196
             *
9197
             * @psalm-pure
9198
             *
9199
             * @return string
9200
             */
9201
            static function (array $matches) use ($encoding): string {
9202
                return static::ucfirst($matches[1], $encoding);
9203 35
            },
9204 35
            $str
9205
        );
9206
9207
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9208 35
        $str = (string) \preg_replace_callback(
9209
            '~\\b
9210
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9211
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9212 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9213
                      (?!	- )                 # Negative lookahead for another -
9214
                     ~uxi',
9215
            /**
9216
             * @param string[] $matches
9217
             *
9218
             * @psalm-pure
9219
             *
9220
             * @return string
9221
             */
9222
            static function (array $matches) use ($encoding): string {
9223
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9224 35
            },
9225 35
            $str
9226
        );
9227
9228 35
        return $str;
9229
    }
9230
9231
    /**
9232
     * Get a binary representation of a specific string.
9233
     *
9234
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9235
     *
9236
     * @param string $str <p>The input string.</p>
9237
     *
9238
     * @psalm-pure
9239
     *
9240
     * @return false|string
9241
     *                      <p>false on error</p>
9242
     */
9243
    public static function str_to_binary(string $str)
9244
    {
9245
        /** @var array|false $value - needed for PhpStan (stubs error) */
9246 2
        $value = \unpack('H*', $str);
9247 2
        if ($value === false) {
9248
            return false;
9249
        }
9250
9251
        /** @noinspection OffsetOperationsInspection */
9252 2
        return \base_convert($value[1], 16, 2);
9253
    }
9254
9255
    /**
9256
     * @param string   $str
9257
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9258
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9259
     *
9260
     * @psalm-pure
9261
     *
9262
     * @return string[]
9263
     */
9264
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9265
    {
9266 17
        if ($str === '') {
9267 1
            return $remove_empty_values ? [] : [''];
9268
        }
9269
9270 16
        if (self::$SUPPORT['mbstring'] === true) {
9271
            /** @noinspection PhpComposerExtensionStubsInspection */
9272 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9273
        } else {
9274
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9275
        }
9276
9277 16
        if ($return === false) {
9278
            return $remove_empty_values ? [] : [''];
9279
        }
9280
9281
        if (
9282 16
            $remove_short_values === null
9283
            &&
9284 16
            !$remove_empty_values
9285
        ) {
9286 16
            return $return;
9287
        }
9288
9289
        return self::reduce_string_array(
9290
            $return,
9291
            $remove_empty_values,
9292
            $remove_short_values
9293
        );
9294
    }
9295
9296
    /**
9297
     * Convert a string into an array of words.
9298
     *
9299
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9300
     *
9301
     * @param string   $str
9302
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9303
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9304
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9305
     *
9306
     * @psalm-pure
9307
     *
9308
     * @return string[]
9309
     */
9310
    public static function str_to_words(
9311
        string $str,
9312
        string $char_list = '',
9313
        bool $remove_empty_values = false,
9314
        int $remove_short_values = null
9315
    ): array {
9316 13
        if ($str === '') {
9317 4
            return $remove_empty_values ? [] : [''];
9318
        }
9319
9320 13
        $char_list = self::rxClass($char_list, '\pL');
9321
9322 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9323 13
        if ($return === false) {
9324
            return $remove_empty_values ? [] : [''];
9325
        }
9326
9327
        if (
9328 13
            $remove_short_values === null
9329
            &&
9330 13
            !$remove_empty_values
9331
        ) {
9332 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9333
        }
9334
9335 2
        $tmp_return = self::reduce_string_array(
9336 2
            $return,
9337 2
            $remove_empty_values,
9338 2
            $remove_short_values
9339
        );
9340
9341 2
        foreach ($tmp_return as &$item) {
9342 2
            $item = (string) $item;
9343
        }
9344
9345 2
        return $tmp_return;
9346
    }
9347
9348
    /**
9349
     * alias for "UTF8::to_ascii()"
9350
     *
9351
     * @param string $str
9352
     * @param string $unknown
9353
     * @param bool   $strict
9354
     *
9355
     * @psalm-pure
9356
     *
9357
     * @return string
9358
     *
9359
     * @see        UTF8::to_ascii()
9360
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9361
     */
9362
    public static function str_transliterate(
9363
        string $str,
9364
        string $unknown = '?',
9365
        bool $strict = false
9366
    ): string {
9367 7
        return self::to_ascii($str, $unknown, $strict);
9368
    }
9369
9370
    /**
9371
     * Truncates the string to a given length. If $substring is provided, and
9372
     * truncating occurs, the string is further truncated so that the substring
9373
     * may be appended without exceeding the desired length.
9374
     *
9375
     * @param string $str
9376
     * @param int    $length    <p>Desired length of the truncated string.</p>
9377
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9378
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9379
     *
9380
     * @psalm-pure
9381
     *
9382
     * @return string
9383
     *                <p>A string after truncating.</p>
9384
     */
9385
    public static function str_truncate(
9386
        string $str,
9387
        int $length,
9388
        string $substring = '',
9389
        string $encoding = 'UTF-8'
9390
    ): string {
9391 22
        if ($str === '') {
9392
            return '';
9393
        }
9394
9395 22
        if ($encoding === 'UTF-8') {
9396 10
            if ($length >= (int) \mb_strlen($str)) {
9397 2
                return $str;
9398
            }
9399
9400 8
            if ($substring !== '') {
9401 4
                $length -= (int) \mb_strlen($substring);
9402
9403
                /** @noinspection UnnecessaryCastingInspection */
9404 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9405
            }
9406
9407
            /** @noinspection UnnecessaryCastingInspection */
9408 4
            return (string) \mb_substr($str, 0, $length);
9409
        }
9410
9411 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9412
9413 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9414 2
            return $str;
9415
        }
9416
9417 10
        if ($substring !== '') {
9418 6
            $length -= (int) self::strlen($substring, $encoding);
9419
        }
9420
9421
        return (
9422 10
               (string) self::substr(
9423 10
                   $str,
9424 10
                   0,
9425 10
                   $length,
9426 10
                   $encoding
9427
               )
9428 10
               ) . $substring;
9429
    }
9430
9431
    /**
9432
     * Truncates the string to a given length, while ensuring that it does not
9433
     * split words. If $substring is provided, and truncating occurs, the
9434
     * string is further truncated so that the substring may be appended without
9435
     * exceeding the desired length.
9436
     *
9437
     * @param string $str
9438
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9439
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9440
     *                                                       Default:
9441
     *                                                       ''</p>
9442
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9443
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9444
     *
9445
     * @psalm-pure
9446
     *
9447
     * @return string
9448
     *                <p>A string after truncating.</p>
9449
     */
9450
    public static function str_truncate_safe(
9451
        string $str,
9452
        int $length,
9453
        string $substring = '',
9454
        string $encoding = 'UTF-8',
9455
        bool $ignore_do_not_split_words_for_one_word = false
9456
    ): string {
9457 47
        if ($str === '' || $length <= 0) {
9458 1
            return $substring;
9459
        }
9460
9461 47
        if ($encoding === 'UTF-8') {
9462 21
            if ($length >= (int) \mb_strlen($str)) {
9463 5
                return $str;
9464
            }
9465
9466
            // need to further trim the string so we can append the substring
9467 17
            $length -= (int) \mb_strlen($substring);
9468 17
            if ($length <= 0) {
9469 1
                return $substring;
9470
            }
9471
9472
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9473 17
            $truncated = \mb_substr($str, 0, $length);
9474 17
            if ($truncated === false) {
9475
                return '';
9476
            }
9477
9478
            // if the last word was truncated
9479 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9480 17
            if ($space_position !== $length) {
9481
                // find pos of the last occurrence of a space, get up to that
9482 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9483
9484
                if (
9485 13
                    $last_position !== false
9486
                    ||
9487
                    (
9488 3
                        $space_position !== false
9489
                        &&
9490 13
                         !$ignore_do_not_split_words_for_one_word
9491
                    )
9492
                ) {
9493 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9494
                }
9495
            }
9496
        } else {
9497 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9498
9499 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9500 4
                return $str;
9501
            }
9502
9503
            // need to further trim the string so we can append the substring
9504 22
            $length -= (int) self::strlen($substring, $encoding);
9505 22
            if ($length <= 0) {
9506
                return $substring;
9507
            }
9508
9509 22
            $truncated = self::substr($str, 0, $length, $encoding);
9510
9511 22
            if ($truncated === false) {
9512
                return '';
9513
            }
9514
9515
            // if the last word was truncated
9516 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9517 22
            if ($space_position !== $length) {
9518
                // find pos of the last occurrence of a space, get up to that
9519 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9520
9521
                if (
9522 12
                    $last_position !== false
9523
                    ||
9524
                    (
9525 4
                        $space_position !== false
9526
                        &&
9527 12
                        !$ignore_do_not_split_words_for_one_word
9528
                    )
9529
                ) {
9530 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9531
                }
9532
            }
9533
        }
9534
9535 39
        return $truncated . $substring;
9536
    }
9537
9538
    /**
9539
     * Returns a lowercase and trimmed string separated by underscores.
9540
     * Underscores are inserted before uppercase characters (with the exception
9541
     * of the first character of the string), and in place of spaces as well as
9542
     * dashes.
9543
     *
9544
     * @param string $str
9545
     *
9546
     * @psalm-pure
9547
     *
9548
     * @return string
9549
     *                <p>The underscored string.</p>
9550
     */
9551
    public static function str_underscored(string $str): string
9552
    {
9553 16
        return self::str_delimit($str, '_');
9554
    }
9555
9556
    /**
9557
     * Returns an UpperCamelCase version of the supplied string. It trims
9558
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9559
     * and underscores, and removes spaces, dashes, underscores.
9560
     *
9561
     * @param string      $str                           <p>The input string.</p>
9562
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9563
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9564
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9565
     *                                                   tr</p>
9566
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9567
     *                                                   -> ß</p>
9568
     *
9569
     * @psalm-pure
9570
     *
9571
     * @return string
9572
     *                <p>A string in UpperCamelCase.</p>
9573
     */
9574
    public static function str_upper_camelize(
9575
        string $str,
9576
        string $encoding = 'UTF-8',
9577
        bool $clean_utf8 = false,
9578
        string $lang = null,
9579
        bool $try_to_keep_the_string_length = false
9580
    ): string {
9581 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9582
    }
9583
9584
    /**
9585
     * alias for "UTF8::ucfirst()"
9586
     *
9587
     * @param string      $str
9588
     * @param string      $encoding
9589
     * @param bool        $clean_utf8
9590
     * @param string|null $lang
9591
     * @param bool        $try_to_keep_the_string_length
9592
     *
9593
     * @psalm-pure
9594
     *
9595
     * @return string
9596
     *
9597
     * @see        UTF8::ucfirst()
9598
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9599
     */
9600
    public static function str_upper_first(
9601
        string $str,
9602
        string $encoding = 'UTF-8',
9603
        bool $clean_utf8 = false,
9604
        string $lang = null,
9605
        bool $try_to_keep_the_string_length = false
9606
    ): string {
9607 5
        return self::ucfirst(
9608 5
            $str,
9609 5
            $encoding,
9610 5
            $clean_utf8,
9611 5
            $lang,
9612 5
            $try_to_keep_the_string_length
9613
        );
9614
    }
9615
9616
    /**
9617
     * Get the number of words in a specific string.
9618
     *
9619
     * EXAMPLES: <code>
9620
     * // format: 0 -> return only word count (int)
9621
     * //
9622
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9623
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9624
     *
9625
     * // format: 1 -> return words (array)
9626
     * //
9627
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9628
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9629
     *
9630
     * // format: 2 -> return words with offset (array)
9631
     * //
9632
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9633
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9634
     * </code>
9635
     *
9636
     * @param string $str       <p>The input string.</p>
9637
     * @param int    $format    [optional] <p>
9638
     *                          <strong>0</strong> => return a number of words (default)<br>
9639
     *                          <strong>1</strong> => return an array of words<br>
9640
     *                          <strong>2</strong> => return an array of words with word-offset as key
9641
     *                          </p>
9642
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9643
     *
9644
     * @psalm-pure
9645
     *
9646
     * @return int|string[]
9647
     *                      <p>The number of words in the string.</p>
9648
     */
9649
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9650
    {
9651 2
        $str_parts = self::str_to_words($str, $char_list);
9652
9653 2
        $len = \count($str_parts);
9654
9655 2
        if ($format === 1) {
9656 2
            $number_of_words = [];
9657 2
            for ($i = 1; $i < $len; $i += 2) {
9658 2
                $number_of_words[] = $str_parts[$i];
9659
            }
9660 2
        } elseif ($format === 2) {
9661 2
            $number_of_words = [];
9662 2
            $offset = (int) self::strlen($str_parts[0]);
9663 2
            for ($i = 1; $i < $len; $i += 2) {
9664 2
                $number_of_words[$offset] = $str_parts[$i];
9665 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9666
            }
9667
        } else {
9668 2
            $number_of_words = (int) (($len - 1) / 2);
9669
        }
9670
9671 2
        return $number_of_words;
9672
    }
9673
9674
    /**
9675
     * Case-insensitive string comparison.
9676
     *
9677
     * INFO: Case-insensitive version of UTF8::strcmp()
9678
     *
9679
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9680
     *
9681
     * @param string $str1     <p>The first string.</p>
9682
     * @param string $str2     <p>The second string.</p>
9683
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9684
     *
9685
     * @psalm-pure
9686
     *
9687
     * @return int
9688
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9689
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9690
     *             <strong>0</strong> if they are equal
9691
     */
9692
    public static function strcasecmp(
9693
        string $str1,
9694
        string $str2,
9695
        string $encoding = 'UTF-8'
9696
    ): int {
9697 23
        return self::strcmp(
9698 23
            self::strtocasefold(
9699 23
                $str1,
9700 23
                true,
9701 23
                false,
9702 23
                $encoding,
9703 23
                null,
9704 23
                false
9705
            ),
9706 23
            self::strtocasefold(
9707 23
                $str2,
9708 23
                true,
9709 23
                false,
9710 23
                $encoding,
9711 23
                null,
9712 23
                false
9713
            )
9714
        );
9715
    }
9716
9717
    /**
9718
     * alias for "UTF8::strstr()"
9719
     *
9720
     * @param string $haystack
9721
     * @param string $needle
9722
     * @param bool   $before_needle
9723
     * @param string $encoding
9724
     * @param bool   $clean_utf8
9725
     *
9726
     * @psalm-pure
9727
     *
9728
     * @return false|string
9729
     *
9730
     * @see        UTF8::strstr()
9731
     * @deprecated <p>please use "UTF8::strstr()"</p>
9732
     */
9733
    public static function strchr(
9734
        string $haystack,
9735
        string $needle,
9736
        bool $before_needle = false,
9737
        string $encoding = 'UTF-8',
9738
        bool $clean_utf8 = false
9739
    ) {
9740 2
        return self::strstr(
9741 2
            $haystack,
9742 2
            $needle,
9743 2
            $before_needle,
9744 2
            $encoding,
9745 2
            $clean_utf8
9746
        );
9747
    }
9748
9749
    /**
9750
     * Case-sensitive string comparison.
9751
     *
9752
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9753
     *
9754
     * @param string $str1 <p>The first string.</p>
9755
     * @param string $str2 <p>The second string.</p>
9756
     *
9757
     * @psalm-pure
9758
     *
9759
     * @return int
9760
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9761
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9762
     *             <strong>0</strong> if they are equal
9763
     */
9764
    public static function strcmp(string $str1, string $str2): int
9765
    {
9766 29
        if ($str1 === $str2) {
9767 21
            return 0;
9768
        }
9769
9770 24
        return \strcmp(
9771 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9772 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9773
        );
9774
    }
9775
9776
    /**
9777
     * Find length of initial segment not matching mask.
9778
     *
9779
     * @param string   $str
9780
     * @param string   $char_list
9781
     * @param int      $offset
9782
     * @param int|null $length
9783
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9784
     *
9785
     * @psalm-pure
9786
     *
9787
     * @return int
9788
     */
9789
    public static function strcspn(
9790
        string $str,
9791
        string $char_list,
9792
        int $offset = 0,
9793
        int $length = null,
9794
        string $encoding = 'UTF-8'
9795
    ): int {
9796 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9797
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9798
        }
9799
9800 12
        if ($char_list === '') {
9801 2
            return (int) self::strlen($str, $encoding);
9802
        }
9803
9804 11
        if ($offset || $length !== null) {
9805 3
            if ($encoding === 'UTF-8') {
9806 3
                if ($length === null) {
9807 2
                    $str_tmp = \mb_substr($str, $offset);
9808
                } else {
9809 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9810
                }
9811
            } else {
9812
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9813
            }
9814
9815 3
            if ($str_tmp === false) {
9816
                return 0;
9817
            }
9818
9819
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9820 3
            $str = $str_tmp;
9821
        }
9822
9823 11
        if ($str === '') {
9824 2
            return 0;
9825
        }
9826
9827 10
        $matches = [];
9828 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9829 9
            $return = self::strlen($matches[1], $encoding);
9830 9
            if ($return === false) {
9831
                return 0;
9832
            }
9833
9834 9
            return $return;
9835
        }
9836
9837 2
        return (int) self::strlen($str, $encoding);
9838
    }
9839
9840
    /**
9841
     * alias for "UTF8::stristr()"
9842
     *
9843
     * @param string $haystack
9844
     * @param string $needle
9845
     * @param bool   $before_needle
9846
     * @param string $encoding
9847
     * @param bool   $clean_utf8
9848
     *
9849
     * @psalm-pure
9850
     *
9851
     * @return false|string
9852
     *
9853
     * @see        UTF8::stristr()
9854
     * @deprecated <p>please use "UTF8::stristr()"</p>
9855
     */
9856
    public static function strichr(
9857
        string $haystack,
9858
        string $needle,
9859
        bool $before_needle = false,
9860
        string $encoding = 'UTF-8',
9861
        bool $clean_utf8 = false
9862
    ) {
9863 1
        return self::stristr(
9864 1
            $haystack,
9865 1
            $needle,
9866 1
            $before_needle,
9867 1
            $encoding,
9868 1
            $clean_utf8
9869
        );
9870
    }
9871
9872
    /**
9873
     * Create a UTF-8 string from code points.
9874
     *
9875
     * INFO: opposite to UTF8::codepoints()
9876
     *
9877
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9878
     *
9879
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9880
     *
9881
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9882
     *
9883
     * @psalm-pure
9884
     *
9885
     * @return string
9886
     *                <p>A UTF-8 encoded string.</p>
9887
     */
9888
    public static function string($intOrHex): string
9889
    {
9890 4
        if ($intOrHex === []) {
9891 4
            return '';
9892
        }
9893
9894 4
        if (!\is_array($intOrHex)) {
9895 1
            $intOrHex = [$intOrHex];
9896
        }
9897
9898 4
        $str = '';
9899 4
        foreach ($intOrHex as $strPart) {
9900 4
            $str .= '&#' . (int) $strPart . ';';
9901
        }
9902
9903 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9904
    }
9905
9906
    /**
9907
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9908
     *
9909
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9910
     *
9911
     * @param string $str <p>The input string.</p>
9912
     *
9913
     * @psalm-pure
9914
     *
9915
     * @return bool
9916
     *              <p>
9917
     *              <strong>true</strong> if the string has BOM at the start,<br>
9918
     *              <strong>false</strong> otherwise
9919
     *              </p>
9920
     */
9921
    public static function string_has_bom(string $str): bool
9922
    {
9923
        /** @noinspection PhpUnusedLocalVariableInspection */
9924 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9925 6
            if (\strpos($str, $bom_string) === 0) {
9926 6
                return true;
9927
            }
9928
        }
9929
9930 6
        return false;
9931
    }
9932
9933
    /**
9934
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9935
     *
9936
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9937
     *
9938
     * @see http://php.net/manual/en/function.strip-tags.php
9939
     *
9940
     * @param string      $str            <p>
9941
     *                                    The input string.
9942
     *                                    </p>
9943
     * @param string|null $allowable_tags [optional] <p>
9944
     *                                    You can use the optional second parameter to specify tags which should
9945
     *                                    not be stripped.
9946
     *                                    </p>
9947
     *                                    <p>
9948
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9949
     *                                    can not be changed with allowable_tags.
9950
     *                                    </p>
9951
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9952
     *
9953
     * @psalm-pure
9954
     *
9955
     * @return string
9956
     *                <p>The stripped string.</p>
9957
     */
9958
    public static function strip_tags(
9959
        string $str,
9960
        string $allowable_tags = null,
9961
        bool $clean_utf8 = false
9962
    ): string {
9963 4
        if ($str === '') {
9964 1
            return '';
9965
        }
9966
9967 4
        if ($clean_utf8) {
9968 2
            $str = self::clean($str);
9969
        }
9970
9971 4
        if ($allowable_tags === null) {
9972 4
            return \strip_tags($str);
9973
        }
9974
9975 2
        return \strip_tags($str, $allowable_tags);
9976
    }
9977
9978
    /**
9979
     * Strip all whitespace characters. This includes tabs and newline
9980
     * characters, as well as multibyte whitespace such as the thin space
9981
     * and ideographic space.
9982
     *
9983
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9984
     *
9985
     * @param string $str
9986
     *
9987
     * @psalm-pure
9988
     *
9989
     * @return string
9990
     */
9991
    public static function strip_whitespace(string $str): string
9992
    {
9993 36
        if ($str === '') {
9994 3
            return '';
9995
        }
9996
9997 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9998
    }
9999
10000
    /**
10001
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10002
     *
10003
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10004
     *
10005
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10006
     *
10007
     * @see http://php.net/manual/en/function.mb-stripos.php
10008
     *
10009
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10010
     * @param string $needle     <p>The string to find in haystack.</p>
10011
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10012
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10013
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10014
     *
10015
     * @psalm-pure
10016
     *
10017
     * @return false|int
10018
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10019
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10020
     */
10021
    public static function stripos(
10022
        string $haystack,
10023
        string $needle,
10024
        int $offset = 0,
10025
        string $encoding = 'UTF-8',
10026
        bool $clean_utf8 = false
10027
    ) {
10028 25
        if ($haystack === '' || $needle === '') {
10029 5
            return false;
10030
        }
10031
10032 24
        if ($clean_utf8) {
10033
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10034
            // if invalid characters are found in $haystack before $needle
10035 1
            $haystack = self::clean($haystack);
10036 1
            $needle = self::clean($needle);
10037
        }
10038
10039 24
        if (self::$SUPPORT['mbstring'] === true) {
10040 24
            if ($encoding === 'UTF-8') {
10041 24
                return \mb_stripos($haystack, $needle, $offset);
10042
            }
10043
10044 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10045
10046 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10047
        }
10048
10049 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10050
10051
        if (
10052 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10053
            &&
10054 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10055
            &&
10056 2
            self::$SUPPORT['intl'] === true
10057
        ) {
10058
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10059
            if ($return_tmp !== false) {
10060
                return $return_tmp;
10061
            }
10062
        }
10063
10064
        //
10065
        // fallback for ascii only
10066
        //
10067
10068 2
        if (ASCII::is_ascii($haystack . $needle)) {
10069
            return \stripos($haystack, $needle, $offset);
10070
        }
10071
10072
        //
10073
        // fallback via vanilla php
10074
        //
10075
10076 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10077 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10078
10079 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10080
    }
10081
10082
    /**
10083
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10084
     *
10085
     * EXAMPLE: <code>
10086
     * $str = 'iñtërnâtiônàlizætiøn';
10087
     * $search = 'NÂT';
10088
     *
10089
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10090
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10091
     * </code>
10092
     *
10093
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10094
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10095
     * @param bool   $before_needle [optional] <p>
10096
     *                              If <b>TRUE</b>, it returns the part of the
10097
     *                              haystack before the first occurrence of the needle (excluding the needle).
10098
     *                              </p>
10099
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10100
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10101
     *
10102
     * @psalm-pure
10103
     *
10104
     * @return false|string
10105
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10106
     */
10107
    public static function stristr(
10108
        string $haystack,
10109
        string $needle,
10110
        bool $before_needle = false,
10111
        string $encoding = 'UTF-8',
10112
        bool $clean_utf8 = false
10113
    ) {
10114 12
        if ($haystack === '' || $needle === '') {
10115 3
            return false;
10116
        }
10117
10118 9
        if ($clean_utf8) {
10119
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10120
            // if invalid characters are found in $haystack before $needle
10121 1
            $needle = self::clean($needle);
10122 1
            $haystack = self::clean($haystack);
10123
        }
10124
10125 9
        if (!$needle) {
10126
            return $haystack;
10127
        }
10128
10129 9
        if (self::$SUPPORT['mbstring'] === true) {
10130 9
            if ($encoding === 'UTF-8') {
10131 9
                return \mb_stristr($haystack, $needle, $before_needle);
10132
            }
10133
10134 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10135
10136 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10137
        }
10138
10139
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10140
10141
        if (
10142
            $encoding !== 'UTF-8'
10143
            &&
10144
            self::$SUPPORT['mbstring'] === false
10145
        ) {
10146
            /**
10147
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10148
             */
10149
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10150
        }
10151
10152
        if (
10153
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10154
            &&
10155
            self::$SUPPORT['intl'] === true
10156
        ) {
10157
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10158
            if ($return_tmp !== false) {
10159
                return $return_tmp;
10160
            }
10161
        }
10162
10163
        if (ASCII::is_ascii($needle . $haystack)) {
10164
            return \stristr($haystack, $needle, $before_needle);
10165
        }
10166
10167
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10168
10169
        if (!isset($match[1])) {
10170
            return false;
10171
        }
10172
10173
        if ($before_needle) {
10174
            return $match[1];
10175
        }
10176
10177
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10178
    }
10179
10180
    /**
10181
     * Get the string length, not the byte-length!
10182
     *
10183
     * INFO: use UTF8::strwidth() for the char-length
10184
     *
10185
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10186
     *
10187
     * @see http://php.net/manual/en/function.mb-strlen.php
10188
     *
10189
     * @param string $str        <p>The string being checked for length.</p>
10190
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10191
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10192
     *
10193
     * @psalm-pure
10194
     *
10195
     * @return false|int
10196
     *                   <p>
10197
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10198
     *                   $encoding.
10199
     *                   (One multi-byte character counted as +1).
10200
     *                   <br>
10201
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10202
     *                   chars.
10203
     *                   </p>
10204
     */
10205
    public static function strlen(
10206
        string $str,
10207
        string $encoding = 'UTF-8',
10208
        bool $clean_utf8 = false
10209
    ) {
10210 174
        if ($str === '') {
10211 21
            return 0;
10212
        }
10213
10214 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10215 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10216
        }
10217
10218 172
        if ($clean_utf8) {
10219
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10220
            // if invalid characters are found in $str
10221 4
            $str = self::clean($str);
10222
        }
10223
10224
        //
10225
        // fallback via mbstring
10226
        //
10227
10228 172
        if (self::$SUPPORT['mbstring'] === true) {
10229 166
            if ($encoding === 'UTF-8') {
10230
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10231 166
                return @\mb_strlen($str);
10232
            }
10233
10234
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10235 4
            return @\mb_strlen($str, $encoding);
10236
        }
10237
10238
        //
10239
        // fallback for binary || ascii only
10240
        //
10241
10242
        if (
10243 8
            $encoding === 'CP850'
10244
            ||
10245 8
            $encoding === 'ASCII'
10246
        ) {
10247
            return \strlen($str);
10248
        }
10249
10250
        if (
10251 8
            $encoding !== 'UTF-8'
10252
            &&
10253 8
            self::$SUPPORT['mbstring'] === false
10254
            &&
10255 8
            self::$SUPPORT['iconv'] === false
10256
        ) {
10257
            /**
10258
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10259
             */
10260 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10261
        }
10262
10263
        //
10264
        // fallback via iconv
10265
        //
10266
10267 8
        if (self::$SUPPORT['iconv'] === true) {
10268
            $return_tmp = \iconv_strlen($str, $encoding);
10269
            if ($return_tmp !== false) {
10270
                return $return_tmp;
10271
            }
10272
        }
10273
10274
        //
10275
        // fallback via intl
10276
        //
10277
10278
        if (
10279 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10280
            &&
10281 8
            self::$SUPPORT['intl'] === true
10282
        ) {
10283
            $return_tmp = \grapheme_strlen($str);
10284
            if ($return_tmp !== null) {
10285
                return $return_tmp;
10286
            }
10287
        }
10288
10289
        //
10290
        // fallback for ascii only
10291
        //
10292
10293 8
        if (ASCII::is_ascii($str)) {
10294 4
            return \strlen($str);
10295
        }
10296
10297
        //
10298
        // fallback via vanilla php
10299
        //
10300
10301 8
        \preg_match_all('/./us', $str, $parts);
10302
10303 8
        $return_tmp = \count($parts[0]);
10304 8
        if ($return_tmp === 0) {
10305
            return false;
10306
        }
10307
10308 8
        return $return_tmp;
10309
    }
10310
10311
    /**
10312
     * Get string length in byte.
10313
     *
10314
     * @param string $str
10315
     *
10316
     * @psalm-pure
10317
     *
10318
     * @return int
10319
     */
10320
    public static function strlen_in_byte(string $str): int
10321
    {
10322 1
        if ($str === '') {
10323
            return 0;
10324
        }
10325
10326 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10327
            // "mb_" is available if overload is used, so use it ...
10328
            return \mb_strlen($str, 'CP850'); // 8-BIT
10329
        }
10330
10331 1
        return \strlen($str);
10332
    }
10333
10334
    /**
10335
     * Case-insensitive string comparisons using a "natural order" algorithm.
10336
     *
10337
     * INFO: natural order version of UTF8::strcasecmp()
10338
     *
10339
     * EXAMPLES: <code>
10340
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10341
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10342
     *
10343
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10344
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10345
     * </code>
10346
     *
10347
     * @param string $str1     <p>The first string.</p>
10348
     * @param string $str2     <p>The second string.</p>
10349
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10350
     *
10351
     * @psalm-pure
10352
     *
10353
     * @return int
10354
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10355
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10356
     *             <strong>0</strong> if they are equal
10357
     */
10358
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10359
    {
10360 2
        return self::strnatcmp(
10361 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10362 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10363
        );
10364
    }
10365
10366
    /**
10367
     * String comparisons using a "natural order" algorithm
10368
     *
10369
     * INFO: natural order version of UTF8::strcmp()
10370
     *
10371
     * EXAMPLES: <code>
10372
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10373
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10374
     *
10375
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10376
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10377
     * </code>
10378
     *
10379
     * @see http://php.net/manual/en/function.strnatcmp.php
10380
     *
10381
     * @param string $str1 <p>The first string.</p>
10382
     * @param string $str2 <p>The second string.</p>
10383
     *
10384
     * @psalm-pure
10385
     *
10386
     * @return int
10387
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10388
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10389
     *             <strong>0</strong> if they are equal
10390
     */
10391
    public static function strnatcmp(string $str1, string $str2): int
10392
    {
10393 4
        if ($str1 === $str2) {
10394 4
            return 0;
10395
        }
10396
10397 4
        return \strnatcmp(
10398 4
            (string) self::strtonatfold($str1),
10399 4
            (string) self::strtonatfold($str2)
10400
        );
10401
    }
10402
10403
    /**
10404
     * Case-insensitive string comparison of the first n characters.
10405
     *
10406
     * EXAMPLE: <code>
10407
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10408
     * </code>
10409
     *
10410
     * @see http://php.net/manual/en/function.strncasecmp.php
10411
     *
10412
     * @param string $str1     <p>The first string.</p>
10413
     * @param string $str2     <p>The second string.</p>
10414
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10415
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10416
     *
10417
     * @psalm-pure
10418
     *
10419
     * @return int
10420
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10421
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10422
     *             <strong>0</strong> if they are equal
10423
     */
10424
    public static function strncasecmp(
10425
        string $str1,
10426
        string $str2,
10427
        int $len,
10428
        string $encoding = 'UTF-8'
10429
    ): int {
10430 2
        return self::strncmp(
10431 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10432 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10433 2
            $len
10434
        );
10435
    }
10436
10437
    /**
10438
     * String comparison of the first n characters.
10439
     *
10440
     * EXAMPLE: <code>
10441
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10442
     * </code>
10443
     *
10444
     * @see http://php.net/manual/en/function.strncmp.php
10445
     *
10446
     * @param string $str1     <p>The first string.</p>
10447
     * @param string $str2     <p>The second string.</p>
10448
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10449
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10450
     *
10451
     * @psalm-pure
10452
     *
10453
     * @return int
10454
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10455
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10456
     *             <strong>0</strong> if they are equal
10457
     */
10458
    public static function strncmp(
10459
        string $str1,
10460
        string $str2,
10461
        int $len,
10462
        string $encoding = 'UTF-8'
10463
    ): int {
10464 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10465
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10466
        }
10467
10468 4
        if ($encoding === 'UTF-8') {
10469 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10470 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10471
        } else {
10472
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10473
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10474
        }
10475
10476 4
        return self::strcmp($str1, $str2);
10477
    }
10478
10479
    /**
10480
     * Search a string for any of a set of characters.
10481
     *
10482
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10483
     *
10484
     * @see http://php.net/manual/en/function.strpbrk.php
10485
     *
10486
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10487
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10488
     *
10489
     * @psalm-pure
10490
     *
10491
     * @return false|string
10492
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10493
     */
10494
    public static function strpbrk(string $haystack, string $char_list)
10495
    {
10496 2
        if ($haystack === '' || $char_list === '') {
10497 2
            return false;
10498
        }
10499
10500 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10501 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10502
        }
10503
10504 2
        return false;
10505
    }
10506
10507
    /**
10508
     * Find the position of the first occurrence of a substring in a string.
10509
     *
10510
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10511
     *
10512
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10513
     *
10514
     * @see http://php.net/manual/en/function.mb-strpos.php
10515
     *
10516
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10517
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10518
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10519
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10520
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10521
     *
10522
     * @psalm-pure
10523
     *
10524
     * @return false|int
10525
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10526
     *                   string.<br> If needle is not found it returns false.
10527
     */
10528
    public static function strpos(
10529
        string $haystack,
10530
        $needle,
10531
        int $offset = 0,
10532
        string $encoding = 'UTF-8',
10533
        bool $clean_utf8 = false
10534
    ) {
10535 53
        if ($haystack === '') {
10536 4
            return false;
10537
        }
10538
10539
        // iconv and mbstring do not support integer $needle
10540 52
        if ((int) $needle === $needle) {
10541
            $needle = (string) self::chr($needle);
10542
        }
10543 52
        $needle = (string) $needle;
10544
10545 52
        if ($needle === '') {
10546 2
            return false;
10547
        }
10548
10549 52
        if ($clean_utf8) {
10550
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10551
            // if invalid characters are found in $haystack before $needle
10552 3
            $needle = self::clean($needle);
10553 3
            $haystack = self::clean($haystack);
10554
        }
10555
10556 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10557 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10558
        }
10559
10560
        //
10561
        // fallback via mbstring
10562
        //
10563
10564 52
        if (self::$SUPPORT['mbstring'] === true) {
10565 50
            if ($encoding === 'UTF-8') {
10566
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10567 50
                return @\mb_strpos($haystack, $needle, $offset);
10568
            }
10569
10570
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10571 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10572
        }
10573
10574
        //
10575
        // fallback for binary || ascii only
10576
        //
10577
        if (
10578 4
            $encoding === 'CP850'
10579
            ||
10580 4
            $encoding === 'ASCII'
10581
        ) {
10582 2
            return \strpos($haystack, $needle, $offset);
10583
        }
10584
10585
        if (
10586 4
            $encoding !== 'UTF-8'
10587
            &&
10588 4
            self::$SUPPORT['iconv'] === false
10589
            &&
10590 4
            self::$SUPPORT['mbstring'] === false
10591
        ) {
10592
            /**
10593
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10594
             */
10595 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10596
        }
10597
10598
        //
10599
        // fallback via intl
10600
        //
10601
10602
        if (
10603 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10604
            &&
10605 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10606
            &&
10607 4
            self::$SUPPORT['intl'] === true
10608
        ) {
10609
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10610
            if ($return_tmp !== false) {
10611
                return $return_tmp;
10612
            }
10613
        }
10614
10615
        //
10616
        // fallback via iconv
10617
        //
10618
10619
        if (
10620 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10621
            &&
10622 4
            self::$SUPPORT['iconv'] === true
10623
        ) {
10624
            // ignore invalid negative offset to keep compatibility
10625
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10626
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10627
            if ($return_tmp !== false) {
10628
                return $return_tmp;
10629
            }
10630
        }
10631
10632
        //
10633
        // fallback for ascii only
10634
        //
10635
10636 4
        if (ASCII::is_ascii($haystack . $needle)) {
10637
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10638 2
            return @\strpos($haystack, $needle, $offset);
10639
        }
10640
10641
        //
10642
        // fallback via vanilla php
10643
        //
10644
10645 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10646 4
        if ($haystack_tmp === false) {
10647
            $haystack_tmp = '';
10648
        }
10649 4
        $haystack = (string) $haystack_tmp;
10650
10651 4
        if ($offset < 0) {
10652
            $offset = 0;
10653
        }
10654
10655 4
        $pos = \strpos($haystack, $needle);
10656 4
        if ($pos === false) {
10657 2
            return false;
10658
        }
10659
10660 4
        if ($pos) {
10661 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10662
        }
10663
10664 2
        return $offset + 0;
10665
    }
10666
10667
    /**
10668
     * Find the position of the first occurrence of a substring in a string.
10669
     *
10670
     * @param string $haystack <p>
10671
     *                         The string being checked.
10672
     *                         </p>
10673
     * @param string $needle   <p>
10674
     *                         The position counted from the beginning of haystack.
10675
     *                         </p>
10676
     * @param int    $offset   [optional] <p>
10677
     *                         The search offset. If it is not specified, 0 is used.
10678
     *                         </p>
10679
     *
10680
     * @psalm-pure
10681
     *
10682
     * @return false|int
10683
     *                   <p>The numeric position of the first occurrence of needle in the
10684
     *                   haystack string. If needle is not found, it returns false.</p>
10685
     */
10686
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10687
    {
10688 2
        if ($haystack === '' || $needle === '') {
10689
            return false;
10690
        }
10691
10692 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10693
            // "mb_" is available if overload is used, so use it ...
10694
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10695
        }
10696
10697 2
        return \strpos($haystack, $needle, $offset);
10698
    }
10699
10700
    /**
10701
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10702
     *
10703
     * @param string $haystack <p>
10704
     *                         The string being checked.
10705
     *                         </p>
10706
     * @param string $needle   <p>
10707
     *                         The position counted from the beginning of haystack.
10708
     *                         </p>
10709
     * @param int    $offset   [optional] <p>
10710
     *                         The search offset. If it is not specified, 0 is used.
10711
     *                         </p>
10712
     *
10713
     * @psalm-pure
10714
     *
10715
     * @return false|int
10716
     *                   <p>The numeric position of the first occurrence of needle in the
10717
     *                   haystack string. If needle is not found, it returns false.</p>
10718
     */
10719
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10720
    {
10721 2
        if ($haystack === '' || $needle === '') {
10722
            return false;
10723
        }
10724
10725 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10726
            // "mb_" is available if overload is used, so use it ...
10727
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10728
        }
10729
10730 2
        return \stripos($haystack, $needle, $offset);
10731
    }
10732
10733
    /**
10734
     * Find the last occurrence of a character in a string within another.
10735
     *
10736
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10737
     *
10738
     * @see http://php.net/manual/en/function.mb-strrchr.php
10739
     *
10740
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10741
     * @param string $needle        <p>The string to find in haystack</p>
10742
     * @param bool   $before_needle [optional] <p>
10743
     *                              Determines which portion of haystack
10744
     *                              this function returns.
10745
     *                              If set to true, it returns all of haystack
10746
     *                              from the beginning to the last occurrence of needle.
10747
     *                              If set to false, it returns all of haystack
10748
     *                              from the last occurrence of needle to the end,
10749
     *                              </p>
10750
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10751
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10752
     *
10753
     * @psalm-pure
10754
     *
10755
     * @return false|string
10756
     *                      <p>The portion of haystack or false if needle is not found.</p>
10757
     */
10758
    public static function strrchr(
10759
        string $haystack,
10760
        string $needle,
10761
        bool $before_needle = false,
10762
        string $encoding = 'UTF-8',
10763
        bool $clean_utf8 = false
10764
    ) {
10765 2
        if ($haystack === '' || $needle === '') {
10766 2
            return false;
10767
        }
10768
10769 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10770 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10771
        }
10772
10773 2
        if ($clean_utf8) {
10774
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10775
            // if invalid characters are found in $haystack before $needle
10776 2
            $needle = self::clean($needle);
10777 2
            $haystack = self::clean($haystack);
10778
        }
10779
10780
        //
10781
        // fallback via mbstring
10782
        //
10783
10784 2
        if (self::$SUPPORT['mbstring'] === true) {
10785 2
            if ($encoding === 'UTF-8') {
10786 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10787
            }
10788
10789 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10790
        }
10791
10792
        //
10793
        // fallback for binary || ascii only
10794
        //
10795
10796
        if (
10797
            !$before_needle
10798
            &&
10799
            (
10800
                $encoding === 'CP850'
10801
                ||
10802
                $encoding === 'ASCII'
10803
            )
10804
        ) {
10805
            return \strrchr($haystack, $needle);
10806
        }
10807
10808
        if (
10809
            $encoding !== 'UTF-8'
10810
            &&
10811
            self::$SUPPORT['mbstring'] === false
10812
        ) {
10813
            /**
10814
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10815
             */
10816
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10817
        }
10818
10819
        //
10820
        // fallback via iconv
10821
        //
10822
10823
        if (self::$SUPPORT['iconv'] === true) {
10824
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10825
            if ($needle_tmp === false) {
10826
                return false;
10827
            }
10828
            $needle = (string) $needle_tmp;
10829
10830
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10831
            if ($pos === false) {
10832
                return false;
10833
            }
10834
10835
            if ($before_needle) {
10836
                return self::substr($haystack, 0, $pos, $encoding);
10837
            }
10838
10839
            return self::substr($haystack, $pos, null, $encoding);
10840
        }
10841
10842
        //
10843
        // fallback via vanilla php
10844
        //
10845
10846
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10847
        if ($needle_tmp === false) {
10848
            return false;
10849
        }
10850
        $needle = (string) $needle_tmp;
10851
10852
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10853
        if ($pos === false) {
10854
            return false;
10855
        }
10856
10857
        if ($before_needle) {
10858
            return self::substr($haystack, 0, $pos, $encoding);
10859
        }
10860
10861
        return self::substr($haystack, $pos, null, $encoding);
10862
    }
10863
10864
    /**
10865
     * Reverses characters order in the string.
10866
     *
10867
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10868
     *
10869
     * @param string $str      <p>The input string.</p>
10870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10871
     *
10872
     * @psalm-pure
10873
     *
10874
     * @return string
10875
     *                <p>The string with characters in the reverse sequence.</p>
10876
     */
10877
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10878
    {
10879 10
        if ($str === '') {
10880 4
            return '';
10881
        }
10882
10883
        // init
10884 8
        $reversed = '';
10885
10886 8
        $str = self::emoji_encode($str, true);
10887
10888 8
        if ($encoding === 'UTF-8') {
10889 8
            if (self::$SUPPORT['intl'] === true) {
10890
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10891 8
                $i = (int) \grapheme_strlen($str);
10892 8
                while ($i--) {
10893 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10894 8
                    if ($reversed_tmp !== false) {
10895 8
                        $reversed .= $reversed_tmp;
10896
                    }
10897
                }
10898
            } else {
10899
                $i = (int) \mb_strlen($str);
10900 8
                while ($i--) {
10901
                    $reversed_tmp = \mb_substr($str, $i, 1);
10902
                    if ($reversed_tmp !== false) {
10903
                        $reversed .= $reversed_tmp;
10904
                    }
10905
                }
10906
            }
10907
        } else {
10908
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10909
10910
            $i = (int) self::strlen($str, $encoding);
10911
            while ($i--) {
10912
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10913
                if ($reversed_tmp !== false) {
10914
                    $reversed .= $reversed_tmp;
10915
                }
10916
            }
10917
        }
10918
10919 8
        return self::emoji_decode($reversed, true);
10920
    }
10921
10922
    /**
10923
     * Find the last occurrence of a character in a string within another, case-insensitive.
10924
     *
10925
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10926
     *
10927
     * @see http://php.net/manual/en/function.mb-strrichr.php
10928
     *
10929
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10930
     * @param string $needle        <p>The string to find in haystack.</p>
10931
     * @param bool   $before_needle [optional] <p>
10932
     *                              Determines which portion of haystack
10933
     *                              this function returns.
10934
     *                              If set to true, it returns all of haystack
10935
     *                              from the beginning to the last occurrence of needle.
10936
     *                              If set to false, it returns all of haystack
10937
     *                              from the last occurrence of needle to the end,
10938
     *                              </p>
10939
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10940
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10941
     *
10942
     * @psalm-pure
10943
     *
10944
     * @return false|string
10945
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10946
     */
10947
    public static function strrichr(
10948
        string $haystack,
10949
        string $needle,
10950
        bool $before_needle = false,
10951
        string $encoding = 'UTF-8',
10952
        bool $clean_utf8 = false
10953
    ) {
10954 3
        if ($haystack === '' || $needle === '') {
10955 2
            return false;
10956
        }
10957
10958 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10959 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10960
        }
10961
10962 3
        if ($clean_utf8) {
10963
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10964
            // if invalid characters are found in $haystack before $needle
10965 2
            $needle = self::clean($needle);
10966 2
            $haystack = self::clean($haystack);
10967
        }
10968
10969
        //
10970
        // fallback via mbstring
10971
        //
10972
10973 3
        if (self::$SUPPORT['mbstring'] === true) {
10974 3
            if ($encoding === 'UTF-8') {
10975 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10976
            }
10977
10978 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10979
        }
10980
10981
        //
10982
        // fallback via vanilla php
10983
        //
10984
10985
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10986
        if ($needle_tmp === false) {
10987
            return false;
10988
        }
10989
        $needle = (string) $needle_tmp;
10990
10991
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10992
        if ($pos === false) {
10993
            return false;
10994
        }
10995
10996
        if ($before_needle) {
10997
            return self::substr($haystack, 0, $pos, $encoding);
10998
        }
10999
11000
        return self::substr($haystack, $pos, null, $encoding);
11001
    }
11002
11003
    /**
11004
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11005
     *
11006
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11007
     *
11008
     * @param string     $haystack   <p>The string to look in.</p>
11009
     * @param int|string $needle     <p>The string to look for.</p>
11010
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11011
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11012
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11013
     *
11014
     * @psalm-pure
11015
     *
11016
     * @return false|int
11017
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11018
     *                   string.<br>If needle is not found, it returns false.</p>
11019
     */
11020
    public static function strripos(
11021
        string $haystack,
11022
        $needle,
11023
        int $offset = 0,
11024
        string $encoding = 'UTF-8',
11025
        bool $clean_utf8 = false
11026
    ) {
11027 14
        if ($haystack === '') {
11028
            return false;
11029
        }
11030
11031
        // iconv and mbstring do not support integer $needle
11032 14
        if ((int) $needle === $needle && $needle >= 0) {
11033
            $needle = (string) self::chr($needle);
11034
        }
11035 14
        $needle = (string) $needle;
11036
11037 14
        if ($needle === '') {
11038
            return false;
11039
        }
11040
11041 14
        if ($clean_utf8) {
11042
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11043 3
            $needle = self::clean($needle);
11044 3
            $haystack = self::clean($haystack);
11045
        }
11046
11047 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11048 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11049
        }
11050
11051
        //
11052
        // fallback via mbstrig
11053
        //
11054
11055 14
        if (self::$SUPPORT['mbstring'] === true) {
11056 14
            if ($encoding === 'UTF-8') {
11057 14
                return \mb_strripos($haystack, $needle, $offset);
11058
            }
11059
11060
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11061
        }
11062
11063
        //
11064
        // fallback for binary || ascii only
11065
        //
11066
11067
        if (
11068
            $encoding === 'CP850'
11069
            ||
11070
            $encoding === 'ASCII'
11071
        ) {
11072
            return \strripos($haystack, $needle, $offset);
11073
        }
11074
11075
        if (
11076
            $encoding !== 'UTF-8'
11077
            &&
11078
            self::$SUPPORT['mbstring'] === false
11079
        ) {
11080
            /**
11081
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11082
             */
11083
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11084
        }
11085
11086
        //
11087
        // fallback via intl
11088
        //
11089
11090
        if (
11091
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11092
            &&
11093
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11094
            &&
11095
            self::$SUPPORT['intl'] === true
11096
        ) {
11097
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11098
            if ($return_tmp !== false) {
11099
                return $return_tmp;
11100
            }
11101
        }
11102
11103
        //
11104
        // fallback for ascii only
11105
        //
11106
11107
        if (ASCII::is_ascii($haystack . $needle)) {
11108
            return \strripos($haystack, $needle, $offset);
11109
        }
11110
11111
        //
11112
        // fallback via vanilla php
11113
        //
11114
11115
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11116
        $needle = self::strtocasefold($needle, true, false, $encoding);
11117
11118
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11119
    }
11120
11121
    /**
11122
     * Finds position of last occurrence of a string within another, case-insensitive.
11123
     *
11124
     * @param string $haystack <p>
11125
     *                         The string from which to get the position of the last occurrence
11126
     *                         of needle.
11127
     *                         </p>
11128
     * @param string $needle   <p>
11129
     *                         The string to find in haystack.
11130
     *                         </p>
11131
     * @param int    $offset   [optional] <p>
11132
     *                         The position in haystack
11133
     *                         to start searching.
11134
     *                         </p>
11135
     *
11136
     * @psalm-pure
11137
     *
11138
     * @return false|int
11139
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11140
     *                   haystack string, or false if needle is not found.</p>
11141
     */
11142
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11143
    {
11144 2
        if ($haystack === '' || $needle === '') {
11145
            return false;
11146
        }
11147
11148 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11149
            // "mb_" is available if overload is used, so use it ...
11150
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11151
        }
11152
11153 2
        return \strripos($haystack, $needle, $offset);
11154
    }
11155
11156
    /**
11157
     * Find the position of the last occurrence of a substring in a string.
11158
     *
11159
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11160
     *
11161
     * @see http://php.net/manual/en/function.mb-strrpos.php
11162
     *
11163
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11164
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11165
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11166
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11167
     *                               the end of the string.
11168
     *                               </p>
11169
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11170
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11171
     *
11172
     * @psalm-pure
11173
     *
11174
     * @return false|int
11175
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11176
     *                   string.<br>If needle is not found, it returns false.</p>
11177
     */
11178
    public static function strrpos(
11179
        string $haystack,
11180
        $needle,
11181
        int $offset = 0,
11182
        string $encoding = 'UTF-8',
11183
        bool $clean_utf8 = false
11184
    ) {
11185 35
        if ($haystack === '') {
11186 3
            return false;
11187
        }
11188
11189
        // iconv and mbstring do not support integer $needle
11190 34
        if ((int) $needle === $needle && $needle >= 0) {
11191 1
            $needle = (string) self::chr($needle);
11192
        }
11193 34
        $needle = (string) $needle;
11194
11195 34
        if ($needle === '') {
11196 2
            return false;
11197
        }
11198
11199 34
        if ($clean_utf8) {
11200
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11201 4
            $needle = self::clean($needle);
11202 4
            $haystack = self::clean($haystack);
11203
        }
11204
11205 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11206 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11207
        }
11208
11209
        //
11210
        // fallback via mbstring
11211
        //
11212
11213 34
        if (self::$SUPPORT['mbstring'] === true) {
11214 34
            if ($encoding === 'UTF-8') {
11215 34
                return \mb_strrpos($haystack, $needle, $offset);
11216
            }
11217
11218 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11219
        }
11220
11221
        //
11222
        // fallback for binary || ascii only
11223
        //
11224
11225
        if (
11226
            $encoding === 'CP850'
11227
            ||
11228
            $encoding === 'ASCII'
11229
        ) {
11230
            return \strrpos($haystack, $needle, $offset);
11231
        }
11232
11233
        if (
11234
            $encoding !== 'UTF-8'
11235
            &&
11236
            self::$SUPPORT['mbstring'] === false
11237
        ) {
11238
            /**
11239
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11240
             */
11241
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11242
        }
11243
11244
        //
11245
        // fallback via intl
11246
        //
11247
11248
        if (
11249
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11250
            &&
11251
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11252
            &&
11253
            self::$SUPPORT['intl'] === true
11254
        ) {
11255
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11256
            if ($return_tmp !== false) {
11257
                return $return_tmp;
11258
            }
11259
        }
11260
11261
        //
11262
        // fallback for ascii only
11263
        //
11264
11265
        if (ASCII::is_ascii($haystack . $needle)) {
11266
            return \strrpos($haystack, $needle, $offset);
11267
        }
11268
11269
        //
11270
        // fallback via vanilla php
11271
        //
11272
11273
        $haystack_tmp = null;
11274
        if ($offset > 0) {
11275
            $haystack_tmp = self::substr($haystack, $offset);
11276
        } elseif ($offset < 0) {
11277
            $haystack_tmp = self::substr($haystack, 0, $offset);
11278
            $offset = 0;
11279
        }
11280
11281
        if ($haystack_tmp !== null) {
11282
            if ($haystack_tmp === false) {
11283
                $haystack_tmp = '';
11284
            }
11285
            $haystack = (string) $haystack_tmp;
11286
        }
11287
11288
        $pos = \strrpos($haystack, $needle);
11289
        if ($pos === false) {
11290
            return false;
11291
        }
11292
11293
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11294
        $str_tmp = \substr($haystack, 0, $pos);
11295
        if ($str_tmp === false) {
11296
            return false;
11297
        }
11298
11299
        return $offset + (int) self::strlen($str_tmp);
11300
    }
11301
11302
    /**
11303
     * Find the position of the last occurrence of a substring in a string.
11304
     *
11305
     * @param string $haystack <p>
11306
     *                         The string being checked, for the last occurrence
11307
     *                         of needle.
11308
     *                         </p>
11309
     * @param string $needle   <p>
11310
     *                         The string to find in haystack.
11311
     *                         </p>
11312
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11313
     *                         the string. Negative values will stop searching at an arbitrary point
11314
     *                         prior to the end of the string.
11315
     *                         </p>
11316
     *
11317
     * @psalm-pure
11318
     *
11319
     * @return false|int
11320
     *                   <p>The numeric position of the last occurrence of needle in the
11321
     *                   haystack string. If needle is not found, it returns false.</p>
11322
     */
11323
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11324
    {
11325 2
        if ($haystack === '' || $needle === '') {
11326
            return false;
11327
        }
11328
11329 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11330
            // "mb_" is available if overload is used, so use it ...
11331
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11332
        }
11333
11334 2
        return \strrpos($haystack, $needle, $offset);
11335
    }
11336
11337
    /**
11338
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11339
     * mask.
11340
     *
11341
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11342
     *
11343
     * @param string   $str      <p>The input string.</p>
11344
     * @param string   $mask     <p>The mask of chars</p>
11345
     * @param int      $offset   [optional]
11346
     * @param int|null $length   [optional]
11347
     * @param string   $encoding [optional] <p>Set the charset.</p>
11348
     *
11349
     * @psalm-pure
11350
     *
11351
     * @return false|int
11352
     */
11353
    public static function strspn(
11354
        string $str,
11355
        string $mask,
11356
        int $offset = 0,
11357
        int $length = null,
11358
        string $encoding = 'UTF-8'
11359
    ) {
11360 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11361
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11362
        }
11363
11364 10
        if ($offset || $length !== null) {
11365 2
            if ($encoding === 'UTF-8') {
11366 2
                if ($length === null) {
11367
                    $str = (string) \mb_substr($str, $offset);
11368
                } else {
11369 2
                    $str = (string) \mb_substr($str, $offset, $length);
11370
                }
11371
            } else {
11372
                $str = (string) self::substr($str, $offset, $length, $encoding);
11373
            }
11374
        }
11375
11376 10
        if ($str === '' || $mask === '') {
11377 2
            return 0;
11378
        }
11379
11380 8
        $matches = [];
11381
11382 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11383
    }
11384
11385
    /**
11386
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11387
     *
11388
     * EXAMPLE: <code>
11389
     * $str = 'iñtërnâtiônàlizætiøn';
11390
     * $search = 'nât';
11391
     *
11392
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11393
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11394
     * </code>
11395
     *
11396
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11397
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11398
     * @param bool   $before_needle [optional] <p>
11399
     *                              If <b>TRUE</b>, strstr() returns the part of the
11400
     *                              haystack before the first occurrence of the needle (excluding the needle).
11401
     *                              </p>
11402
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11403
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11404
     *
11405
     * @psalm-pure
11406
     *
11407
     * @return false|string
11408
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11409
     */
11410
    public static function strstr(
11411
        string $haystack,
11412
        string $needle,
11413
        bool $before_needle = false,
11414
        string $encoding = 'UTF-8',
11415
        bool $clean_utf8 = false
11416
    ) {
11417 3
        if ($haystack === '' || $needle === '') {
11418 2
            return false;
11419
        }
11420
11421 3
        if ($clean_utf8) {
11422
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11423
            // if invalid characters are found in $haystack before $needle
11424
            $needle = self::clean($needle);
11425
            $haystack = self::clean($haystack);
11426
        }
11427
11428 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11429 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11430
        }
11431
11432
        //
11433
        // fallback via mbstring
11434
        //
11435
11436 3
        if (self::$SUPPORT['mbstring'] === true) {
11437 3
            if ($encoding === 'UTF-8') {
11438 3
                return \mb_strstr($haystack, $needle, $before_needle);
11439
            }
11440
11441 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11442
        }
11443
11444
        //
11445
        // fallback for binary || ascii only
11446
        //
11447
11448
        if (
11449
            $encoding === 'CP850'
11450
            ||
11451
            $encoding === 'ASCII'
11452
        ) {
11453
            return \strstr($haystack, $needle, $before_needle);
11454
        }
11455
11456
        if (
11457
            $encoding !== 'UTF-8'
11458
            &&
11459
            self::$SUPPORT['mbstring'] === false
11460
        ) {
11461
            /**
11462
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11463
             */
11464
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11465
        }
11466
11467
        //
11468
        // fallback via intl
11469
        //
11470
11471
        if (
11472
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11473
            &&
11474
            self::$SUPPORT['intl'] === true
11475
        ) {
11476
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11477
            if ($return_tmp !== false) {
11478
                return $return_tmp;
11479
            }
11480
        }
11481
11482
        //
11483
        // fallback for ascii only
11484
        //
11485
11486
        if (ASCII::is_ascii($haystack . $needle)) {
11487
            return \strstr($haystack, $needle, $before_needle);
11488
        }
11489
11490
        //
11491
        // fallback via vanilla php
11492
        //
11493
11494
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11495
11496
        if (!isset($match[1])) {
11497
            return false;
11498
        }
11499
11500
        if ($before_needle) {
11501
            return $match[1];
11502
        }
11503
11504
        return self::substr($haystack, (int) self::strlen($match[1]));
11505
    }
11506
11507
    /**
11508
     * Finds first occurrence of a string within another.
11509
     *
11510
     * @param string $haystack      <p>
11511
     *                              The string from which to get the first occurrence
11512
     *                              of needle.
11513
     *                              </p>
11514
     * @param string $needle        <p>
11515
     *                              The string to find in haystack.
11516
     *                              </p>
11517
     * @param bool   $before_needle [optional] <p>
11518
     *                              Determines which portion of haystack
11519
     *                              this function returns.
11520
     *                              If set to true, it returns all of haystack
11521
     *                              from the beginning to the first occurrence of needle.
11522
     *                              If set to false, it returns all of haystack
11523
     *                              from the first occurrence of needle to the end,
11524
     *                              </p>
11525
     *
11526
     * @psalm-pure
11527
     *
11528
     * @return false|string
11529
     *                      <p>The portion of haystack,
11530
     *                      or false if needle is not found.</p>
11531
     */
11532
    public static function strstr_in_byte(
11533
        string $haystack,
11534
        string $needle,
11535
        bool $before_needle = false
11536
    ) {
11537 2
        if ($haystack === '' || $needle === '') {
11538
            return false;
11539
        }
11540
11541 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11542
            // "mb_" is available if overload is used, so use it ...
11543
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11544
        }
11545
11546 2
        return \strstr($haystack, $needle, $before_needle);
11547
    }
11548
11549
    /**
11550
     * Unicode transformation for case-less matching.
11551
     *
11552
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11553
     *
11554
     * @see http://unicode.org/reports/tr21/tr21-5.html
11555
     *
11556
     * @param string      $str        <p>The input string.</p>
11557
     * @param bool        $full       [optional] <p>
11558
     *                                <b>true</b>, replace full case folding chars (default)<br>
11559
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11560
     *                                </p>
11561
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11562
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11563
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11564
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11565
     *                                is for some languages better ...</p>
11566
     *
11567
     * @psalm-pure
11568
     *
11569
     * @return string
11570
     */
11571
    public static function strtocasefold(
11572
        string $str,
11573
        bool $full = true,
11574
        bool $clean_utf8 = false,
11575
        string $encoding = 'UTF-8',
11576
        string $lang = null,
11577
        bool $lower = true
11578
    ): string {
11579 32
        if ($str === '') {
11580 5
            return '';
11581
        }
11582
11583 31
        if ($clean_utf8) {
11584
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11585
            // if invalid characters are found in $haystack before $needle
11586 2
            $str = self::clean($str);
11587
        }
11588
11589 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11590
11591 31
        if ($lang === null && $encoding === 'UTF-8') {
11592 31
            if ($lower) {
11593 2
                return \mb_strtolower($str);
11594
            }
11595
11596 29
            return \mb_strtoupper($str);
11597
        }
11598
11599 2
        if ($lower) {
11600
            return self::strtolower($str, $encoding, false, $lang);
11601
        }
11602
11603 2
        return self::strtoupper($str, $encoding, false, $lang);
11604
    }
11605
11606
    /**
11607
     * Make a string lowercase.
11608
     *
11609
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11610
     *
11611
     * @see http://php.net/manual/en/function.mb-strtolower.php
11612
     *
11613
     * @param string      $str                           <p>The string being lowercased.</p>
11614
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11615
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11616
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11617
     *                                                   tr</p>
11618
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11619
     *                                                   -> ß</p>
11620
     *
11621
     * @psalm-pure
11622
     *
11623
     * @return string
11624
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11625
     */
11626
    public static function strtolower(
11627
        $str,
11628
        string $encoding = 'UTF-8',
11629
        bool $clean_utf8 = false,
11630
        string $lang = null,
11631
        bool $try_to_keep_the_string_length = false
11632
    ): string {
11633
        // init
11634 73
        $str = (string) $str;
11635
11636 73
        if ($str === '') {
11637 1
            return '';
11638
        }
11639
11640 72
        if ($clean_utf8) {
11641
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11642
            // if invalid characters are found in $haystack before $needle
11643 2
            $str = self::clean($str);
11644
        }
11645
11646
        // hack for old php version or for the polyfill ...
11647 72
        if ($try_to_keep_the_string_length) {
11648
            $str = self::fixStrCaseHelper($str, true);
11649
        }
11650
11651 72
        if ($lang === null && $encoding === 'UTF-8') {
11652 13
            return \mb_strtolower($str);
11653
        }
11654
11655 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11656
11657 61
        if ($lang !== null) {
11658 2
            if (self::$SUPPORT['intl'] === true) {
11659 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11660
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11661
                }
11662
11663 2
                $language_code = $lang . '-Lower';
11664 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11665
                    /**
11666
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11667
                     */
11668
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11669
11670
                    $language_code = 'Any-Lower';
11671
                }
11672
11673
                /** @noinspection PhpComposerExtensionStubsInspection */
11674
                /** @noinspection UnnecessaryCastingInspection */
11675 2
                return (string) \transliterator_transliterate($language_code, $str);
11676
            }
11677
11678
            /**
11679
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11680
             */
11681
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11682
        }
11683
11684
        // always fallback via symfony polyfill
11685 61
        return \mb_strtolower($str, $encoding);
11686
    }
11687
11688
    /**
11689
     * Make a string uppercase.
11690
     *
11691
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11692
     *
11693
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11694
     *
11695
     * @param string      $str                           <p>The string being uppercased.</p>
11696
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11697
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11698
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11699
     *                                                   tr</p>
11700
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11701
     *                                                   -> ß</p>
11702
     *
11703
     * @psalm-pure
11704
     *
11705
     * @return string
11706
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11707
     */
11708
    public static function strtoupper(
11709
        $str,
11710
        string $encoding = 'UTF-8',
11711
        bool $clean_utf8 = false,
11712
        string $lang = null,
11713
        bool $try_to_keep_the_string_length = false
11714
    ): string {
11715
        // init
11716 17
        $str = (string) $str;
11717
11718 17
        if ($str === '') {
11719 1
            return '';
11720
        }
11721
11722 16
        if ($clean_utf8) {
11723
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11724
            // if invalid characters are found in $haystack before $needle
11725 2
            $str = self::clean($str);
11726
        }
11727
11728
        // hack for old php version or for the polyfill ...
11729 16
        if ($try_to_keep_the_string_length) {
11730 2
            $str = self::fixStrCaseHelper($str);
11731
        }
11732
11733 16
        if ($lang === null && $encoding === 'UTF-8') {
11734 8
            return \mb_strtoupper($str);
11735
        }
11736
11737 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11738
11739 10
        if ($lang !== null) {
11740 2
            if (self::$SUPPORT['intl'] === true) {
11741 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11742
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11743
                }
11744
11745 2
                $language_code = $lang . '-Upper';
11746 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11747
                    /**
11748
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11749
                     */
11750
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11751
11752
                    $language_code = 'Any-Upper';
11753
                }
11754
11755
                /** @noinspection PhpComposerExtensionStubsInspection */
11756
                /** @noinspection UnnecessaryCastingInspection */
11757 2
                return (string) \transliterator_transliterate($language_code, $str);
11758
            }
11759
11760
            /**
11761
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11762
             */
11763
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11764
        }
11765
11766
        // always fallback via symfony polyfill
11767 10
        return \mb_strtoupper($str, $encoding);
11768
    }
11769
11770
    /**
11771
     * Translate characters or replace sub-strings.
11772
     *
11773
     * EXAMPLE:
11774
     * <code>
11775
     * $array = [
11776
     *     'Hello'   => '○●◎',
11777
     *     '中文空白' => 'earth',
11778
     * ];
11779
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11780
     * </code>
11781
     *
11782
     * @see http://php.net/manual/en/function.strtr.php
11783
     *
11784
     * @param string          $str  <p>The string being translated.</p>
11785
     * @param string|string[] $from <p>The string replacing from.</p>
11786
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11787
     *
11788
     * @psalm-pure
11789
     *
11790
     * @return string
11791
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11792
     *                to the corresponding character in "to".</p>
11793
     */
11794
    public static function strtr(string $str, $from, $to = ''): string
11795
    {
11796 2
        if ($str === '') {
11797
            return '';
11798
        }
11799
11800 2
        if ($from === $to) {
11801
            return $str;
11802
        }
11803
11804 2
        if ($to !== '') {
11805 2
            if (!\is_array($from)) {
11806 2
                $from = self::str_split($from);
11807
            }
11808
11809 2
            if (!\is_array($to)) {
11810 2
                $to = self::str_split($to);
11811
            }
11812
11813 2
            $count_from = \count($from);
11814 2
            $count_to = \count($to);
11815
11816 2
            if ($count_from !== $count_to) {
11817 2
                if ($count_from > $count_to) {
11818 2
                    $from = \array_slice($from, 0, $count_to);
11819 2
                } elseif ($count_from < $count_to) {
11820 2
                    $to = \array_slice($to, 0, $count_from);
11821
                }
11822
            }
11823
11824 2
            $from = \array_combine($from, $to);
11825
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11826 2
            if ($from === false) {
11827
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11828
            }
11829
        }
11830
11831 2
        if (\is_string($from)) {
11832 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11832
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11833
        }
11834
11835 2
        return \strtr($str, $from);
11836
    }
11837
11838
    /**
11839
     * Return the width of a string.
11840
     *
11841
     * INFO: use UTF8::strlen() for the byte-length
11842
     *
11843
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11844
     *
11845
     * @param string $str        <p>The input string.</p>
11846
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11847
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11848
     *
11849
     * @psalm-pure
11850
     *
11851
     * @return int
11852
     */
11853
    public static function strwidth(
11854
        string $str,
11855
        string $encoding = 'UTF-8',
11856
        bool $clean_utf8 = false
11857
    ): int {
11858 2
        if ($str === '') {
11859 2
            return 0;
11860
        }
11861
11862 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11863 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11864
        }
11865
11866 2
        if ($clean_utf8) {
11867
            // iconv and mbstring are not tolerant to invalid encoding
11868
            // further, their behaviour is inconsistent with that of PHP's substr
11869 2
            $str = self::clean($str);
11870
        }
11871
11872
        //
11873
        // fallback via mbstring
11874
        //
11875
11876 2
        if (self::$SUPPORT['mbstring'] === true) {
11877 2
            if ($encoding === 'UTF-8') {
11878 2
                return \mb_strwidth($str);
11879
            }
11880
11881
            return \mb_strwidth($str, $encoding);
11882
        }
11883
11884
        //
11885
        // fallback via vanilla php
11886
        //
11887
11888
        if ($encoding !== 'UTF-8') {
11889
            $str = self::encode('UTF-8', $str, false, $encoding);
11890
        }
11891
11892
        $wide = 0;
11893
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11894
11895
        return ($wide << 1) + (int) self::strlen($str);
11896
    }
11897
11898
    /**
11899
     * Get part of a string.
11900
     *
11901
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11902
     *
11903
     * @see http://php.net/manual/en/function.mb-substr.php
11904
     *
11905
     * @param string   $str        <p>The string being checked.</p>
11906
     * @param int      $offset     <p>The first position used in str.</p>
11907
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11908
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11909
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11910
     *
11911
     * @psalm-pure
11912
     *
11913
     * @return false|string
11914
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11915
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11916
     *                      characters long, <b>FALSE</b> will be returned.
11917
     */
11918
    public static function substr(
11919
        string $str,
11920
        int $offset = 0,
11921
        int $length = null,
11922
        string $encoding = 'UTF-8',
11923
        bool $clean_utf8 = false
11924
    ) {
11925
        // empty string
11926 172
        if ($str === '' || $length === 0) {
11927 8
            return '';
11928
        }
11929
11930 168
        if ($clean_utf8) {
11931
            // iconv and mbstring are not tolerant to invalid encoding
11932
            // further, their behaviour is inconsistent with that of PHP's substr
11933 2
            $str = self::clean($str);
11934
        }
11935
11936
        // whole string
11937 168
        if (!$offset && $length === null) {
11938 7
            return $str;
11939
        }
11940
11941 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11942 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11943
        }
11944
11945
        //
11946
        // fallback via mbstring
11947
        //
11948
11949 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11950 161
            if ($length === null) {
11951 64
                return \mb_substr($str, $offset);
11952
            }
11953
11954 102
            return \mb_substr($str, $offset, $length);
11955
        }
11956
11957
        //
11958
        // fallback for binary || ascii only
11959
        //
11960
11961
        if (
11962 4
            $encoding === 'CP850'
11963
            ||
11964 4
            $encoding === 'ASCII'
11965
        ) {
11966
            if ($length === null) {
11967
                return \substr($str, $offset);
11968
            }
11969
11970
            return \substr($str, $offset, $length);
11971
        }
11972
11973
        // otherwise we need the string-length
11974 4
        $str_length = 0;
11975 4
        if ($offset || $length === null) {
11976 4
            $str_length = self::strlen($str, $encoding);
11977
        }
11978
11979
        // e.g.: invalid chars + mbstring not installed
11980 4
        if ($str_length === false) {
11981
            return false;
11982
        }
11983
11984
        // empty string
11985 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11986
            return '';
11987
        }
11988
11989
        // impossible
11990 4
        if ($offset && $offset > $str_length) {
11991
            return '';
11992
        }
11993
11994 4
        $length = $length ?? (int) $str_length;
11995
11996
        if (
11997 4
            $encoding !== 'UTF-8'
11998
            &&
11999 4
            self::$SUPPORT['mbstring'] === false
12000
        ) {
12001
            /**
12002
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12003
             */
12004 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12005
        }
12006
12007
        //
12008
        // fallback via intl
12009
        //
12010
12011
        if (
12012 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12013
            &&
12014 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12015
            &&
12016 4
            self::$SUPPORT['intl'] === true
12017
        ) {
12018
            $return_tmp = \grapheme_substr($str, $offset, $length);
12019
            if ($return_tmp !== false) {
12020
                return $return_tmp;
12021
            }
12022
        }
12023
12024
        //
12025
        // fallback via iconv
12026
        //
12027
12028
        if (
12029 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12030
            &&
12031 4
            self::$SUPPORT['iconv'] === true
12032
        ) {
12033
            $return_tmp = \iconv_substr($str, $offset, $length);
12034
            if ($return_tmp !== false) {
12035
                return $return_tmp;
12036
            }
12037
        }
12038
12039
        //
12040
        // fallback for ascii only
12041
        //
12042
12043 4
        if (ASCII::is_ascii($str)) {
12044
            return \substr($str, $offset, $length);
12045
        }
12046
12047
        //
12048
        // fallback via vanilla php
12049
        //
12050
12051
        // split to array, and remove invalid characters
12052 4
        $array = self::str_split($str);
12053
12054
        // extract relevant part, and join to make sting again
12055 4
        return \implode('', \array_slice($array, $offset, $length));
12056
    }
12057
12058
    /**
12059
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12060
     *
12061
     * EXAMPLE: <code>
12062
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12063
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12064
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12065
     * </code>
12066
     *
12067
     * @param string   $str1               <p>The main string being compared.</p>
12068
     * @param string   $str2               <p>The secondary string being compared.</p>
12069
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12070
     *                                     counting from the end of the string.</p>
12071
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12072
     *                                     of the length of the str compared to the length of main_str less the
12073
     *                                     offset.</p>
12074
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12075
     *                                     insensitive.</p>
12076
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12077
     *
12078
     * @psalm-pure
12079
     *
12080
     * @return int
12081
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12082
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12083
     *             <strong>0</strong> if they are equal
12084
     */
12085
    public static function substr_compare(
12086
        string $str1,
12087
        string $str2,
12088
        int $offset = 0,
12089
        int $length = null,
12090
        bool $case_insensitivity = false,
12091
        string $encoding = 'UTF-8'
12092
    ): int {
12093
        if (
12094 2
            $offset !== 0
12095
            ||
12096 2
            $length !== null
12097
        ) {
12098 2
            if ($encoding === 'UTF-8') {
12099 2
                if ($length === null) {
12100 2
                    $str1 = (string) \mb_substr($str1, $offset);
12101
                } else {
12102 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12103
                }
12104 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12105
            } else {
12106
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12107
12108
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12109
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12110
            }
12111
        }
12112
12113 2
        if ($case_insensitivity) {
12114 2
            return self::strcasecmp($str1, $str2, $encoding);
12115
        }
12116
12117 2
        return self::strcmp($str1, $str2);
12118
    }
12119
12120
    /**
12121
     * Count the number of substring occurrences.
12122
     *
12123
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12124
     *
12125
     * @see http://php.net/manual/en/function.substr-count.php
12126
     *
12127
     * @param string   $haystack   <p>The string to search in.</p>
12128
     * @param string   $needle     <p>The substring to search for.</p>
12129
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12130
     * @param int|null $length     [optional] <p>
12131
     *                             The maximum length after the specified offset to search for the
12132
     *                             substring. It outputs a warning if the offset plus the length is
12133
     *                             greater than the haystack length.
12134
     *                             </p>
12135
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12136
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12137
     *
12138
     * @psalm-pure
12139
     *
12140
     * @return false|int
12141
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12142
     */
12143
    public static function substr_count(
12144
        string $haystack,
12145
        string $needle,
12146
        int $offset = 0,
12147
        int $length = null,
12148
        string $encoding = 'UTF-8',
12149
        bool $clean_utf8 = false
12150
    ) {
12151 5
        if ($haystack === '' || $needle === '') {
12152 2
            return false;
12153
        }
12154
12155 5
        if ($length === 0) {
12156 2
            return 0;
12157
        }
12158
12159 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12160 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12161
        }
12162
12163 5
        if ($clean_utf8) {
12164
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12165
            // if invalid characters are found in $haystack before $needle
12166
            $needle = self::clean($needle);
12167
            $haystack = self::clean($haystack);
12168
        }
12169
12170 5
        if ($offset || $length > 0) {
12171 2
            if ($length === null) {
12172 2
                $length_tmp = self::strlen($haystack, $encoding);
12173 2
                if ($length_tmp === false) {
12174
                    return false;
12175
                }
12176 2
                $length = (int) $length_tmp;
12177
            }
12178
12179 2
            if ($encoding === 'UTF-8') {
12180 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12181
            } else {
12182 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12183
            }
12184
        }
12185
12186
        if (
12187 5
            $encoding !== 'UTF-8'
12188
            &&
12189 5
            self::$SUPPORT['mbstring'] === false
12190
        ) {
12191
            /**
12192
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12193
             */
12194
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12195
        }
12196
12197 5
        if (self::$SUPPORT['mbstring'] === true) {
12198 5
            if ($encoding === 'UTF-8') {
12199 5
                return \mb_substr_count($haystack, $needle);
12200
            }
12201
12202 2
            return \mb_substr_count($haystack, $needle, $encoding);
12203
        }
12204
12205
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12206
12207
        return \count($matches);
12208
    }
12209
12210
    /**
12211
     * Count the number of substring occurrences.
12212
     *
12213
     * @param string   $haystack <p>
12214
     *                           The string being checked.
12215
     *                           </p>
12216
     * @param string   $needle   <p>
12217
     *                           The string being found.
12218
     *                           </p>
12219
     * @param int      $offset   [optional] <p>
12220
     *                           The offset where to start counting
12221
     *                           </p>
12222
     * @param int|null $length   [optional] <p>
12223
     *                           The maximum length after the specified offset to search for the
12224
     *                           substring. It outputs a warning if the offset plus the length is
12225
     *                           greater than the haystack length.
12226
     *                           </p>
12227
     *
12228
     * @psalm-pure
12229
     *
12230
     * @return false|int
12231
     *                   <p>The number of times the
12232
     *                   needle substring occurs in the
12233
     *                   haystack string.</p>
12234
     */
12235
    public static function substr_count_in_byte(
12236
        string $haystack,
12237
        string $needle,
12238
        int $offset = 0,
12239
        int $length = null
12240
    ) {
12241 4
        if ($haystack === '' || $needle === '') {
12242 1
            return 0;
12243
        }
12244
12245
        if (
12246 3
            ($offset || $length !== null)
12247
            &&
12248 3
            self::$SUPPORT['mbstring_func_overload'] === true
12249
        ) {
12250
            if ($length === null) {
12251
                $length_tmp = self::strlen($haystack);
12252
                if ($length_tmp === false) {
12253
                    return false;
12254
                }
12255
                $length = (int) $length_tmp;
12256
            }
12257
12258
            if (
12259
                (
12260
                    $length !== 0
12261
                    &&
12262
                    $offset !== 0
12263
                )
12264
                &&
12265
                ($length + $offset) <= 0
12266
                &&
12267
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12268
            ) {
12269
                return false;
12270
            }
12271
12272
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12273
            $haystack_tmp = \substr($haystack, $offset, $length);
12274
            if ($haystack_tmp === false) {
12275
                $haystack_tmp = '';
12276
            }
12277
            $haystack = (string) $haystack_tmp;
12278
        }
12279
12280 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12281
            // "mb_" is available if overload is used, so use it ...
12282
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12283
        }
12284
12285 3
        if ($length === null) {
12286 3
            return \substr_count($haystack, $needle, $offset);
12287
        }
12288
12289
        return \substr_count($haystack, $needle, $offset, $length);
12290
    }
12291
12292
    /**
12293
     * Returns the number of occurrences of $substring in the given string.
12294
     * By default, the comparison is case-sensitive, but can be made insensitive
12295
     * by setting $case_sensitive to false.
12296
     *
12297
     * @param string $str            <p>The input string.</p>
12298
     * @param string $substring      <p>The substring to search for.</p>
12299
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12300
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12301
     *
12302
     * @psalm-pure
12303
     *
12304
     * @return int
12305
     */
12306
    public static function substr_count_simple(
12307
        string $str,
12308
        string $substring,
12309
        bool $case_sensitive = true,
12310
        string $encoding = 'UTF-8'
12311
    ): int {
12312 15
        if ($str === '' || $substring === '') {
12313 2
            return 0;
12314
        }
12315
12316 13
        if ($encoding === 'UTF-8') {
12317 7
            if ($case_sensitive) {
12318
                return (int) \mb_substr_count($str, $substring);
12319
            }
12320
12321 7
            return (int) \mb_substr_count(
12322 7
                \mb_strtoupper($str),
12323 7
                \mb_strtoupper($substring)
12324
            );
12325
        }
12326
12327 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12328
12329 6
        if ($case_sensitive) {
12330 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12331
        }
12332
12333 3
        return (int) \mb_substr_count(
12334 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12335 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12336 3
            $encoding
12337
        );
12338
    }
12339
12340
    /**
12341
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12342
     *
12343
     * EXMAPLE: <code>
12344
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12345
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12346
     * </code>
12347
     *
12348
     * @param string $haystack <p>The string to search in.</p>
12349
     * @param string $needle   <p>The substring to search for.</p>
12350
     *
12351
     * @psalm-pure
12352
     *
12353
     * @return string
12354
     *                <p>Return the sub-string.</p>
12355
     */
12356
    public static function substr_ileft(string $haystack, string $needle): string
12357
    {
12358 2
        if ($haystack === '') {
12359 2
            return '';
12360
        }
12361
12362 2
        if ($needle === '') {
12363 2
            return $haystack;
12364
        }
12365
12366 2
        if (self::str_istarts_with($haystack, $needle)) {
12367 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12368
        }
12369
12370 2
        return $haystack;
12371
    }
12372
12373
    /**
12374
     * Get part of a string process in bytes.
12375
     *
12376
     * @param string   $str    <p>The string being checked.</p>
12377
     * @param int      $offset <p>The first position used in str.</p>
12378
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12379
     *
12380
     * @psalm-pure
12381
     *
12382
     * @return false|string
12383
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12384
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12385
     *                      characters long, <b>FALSE</b> will be returned.
12386
     */
12387
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12388
    {
12389
        // empty string
12390 1
        if ($str === '' || $length === 0) {
12391
            return '';
12392
        }
12393
12394
        // whole string
12395 1
        if (!$offset && $length === null) {
12396
            return $str;
12397
        }
12398
12399 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12400
            // "mb_" is available if overload is used, so use it ...
12401
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12402
        }
12403
12404 1
        return \substr($str, $offset, $length ?? 2147483647);
12405
    }
12406
12407
    /**
12408
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12409
     *
12410
     * EXAMPLE: <code>
12411
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12412
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12413
     * </code>
12414
     *
12415
     * @param string $haystack <p>The string to search in.</p>
12416
     * @param string $needle   <p>The substring to search for.</p>
12417
     *
12418
     * @psalm-pure
12419
     *
12420
     * @return string
12421
     *                <p>Return the sub-string.<p>
12422
     */
12423
    public static function substr_iright(string $haystack, string $needle): string
12424
    {
12425 2
        if ($haystack === '') {
12426 2
            return '';
12427
        }
12428
12429 2
        if ($needle === '') {
12430 2
            return $haystack;
12431
        }
12432
12433 2
        if (self::str_iends_with($haystack, $needle)) {
12434 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12435
        }
12436
12437 2
        return $haystack;
12438
    }
12439
12440
    /**
12441
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12442
     *
12443
     * EXAMPLE: <code>
12444
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12445
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12446
     * </code>
12447
     *
12448
     * @param string $haystack <p>The string to search in.</p>
12449
     * @param string $needle   <p>The substring to search for.</p>
12450
     *
12451
     * @psalm-pure
12452
     *
12453
     * @return string
12454
     *                <p>Return the sub-string.</p>
12455
     */
12456
    public static function substr_left(string $haystack, string $needle): string
12457
    {
12458 2
        if ($haystack === '') {
12459 2
            return '';
12460
        }
12461
12462 2
        if ($needle === '') {
12463 2
            return $haystack;
12464
        }
12465
12466 2
        if (self::str_starts_with($haystack, $needle)) {
12467 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12468
        }
12469
12470 2
        return $haystack;
12471
    }
12472
12473
    /**
12474
     * Replace text within a portion of a string.
12475
     *
12476
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12477
     *
12478
     * source: https://gist.github.com/stemar/8287074
12479
     *
12480
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12481
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12482
     * @param int|int[]       $offset      <p>
12483
     *                                     If start is positive, the replacing will begin at the start'th offset
12484
     *                                     into string.
12485
     *                                     <br><br>
12486
     *                                     If start is negative, the replacing will begin at the start'th character
12487
     *                                     from the end of string.
12488
     *                                     </p>
12489
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12490
     *                                     portion of string which is to be replaced. If it is negative, it
12491
     *                                     represents the number of characters from the end of string at which to
12492
     *                                     stop replacing. If it is not given, then it will default to strlen(
12493
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12494
     *                                     length is zero then this function will have the effect of inserting
12495
     *                                     replacement into string at the given start offset.</p>
12496
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12497
     *
12498
     * @psalm-pure
12499
     *
12500
     * @return string|string[]
12501
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12502
     */
12503
    public static function substr_replace(
12504
        $str,
12505
        $replacement,
12506
        $offset,
12507
        $length = null,
12508
        string $encoding = 'UTF-8'
12509
    ) {
12510 10
        if (\is_array($str)) {
12511 1
            $num = \count($str);
12512
12513
            // the replacement
12514 1
            if (\is_array($replacement)) {
12515 1
                $replacement = \array_slice($replacement, 0, $num);
12516
            } else {
12517 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12518
            }
12519
12520
            // the offset
12521 1
            if (\is_array($offset)) {
12522 1
                $offset = \array_slice($offset, 0, $num);
12523 1
                foreach ($offset as &$value_tmp) {
12524 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12525
                }
12526 1
                unset($value_tmp);
12527
            } else {
12528 1
                $offset = \array_pad([$offset], $num, $offset);
12529
            }
12530
12531
            // the length
12532 1
            if ($length === null) {
12533 1
                $length = \array_fill(0, $num, 0);
12534 1
            } elseif (\is_array($length)) {
12535 1
                $length = \array_slice($length, 0, $num);
12536 1
                foreach ($length as &$value_tmp_V2) {
12537 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12538
                }
12539 1
                unset($value_tmp_V2);
12540
            } else {
12541 1
                $length = \array_pad([$length], $num, $length);
12542
            }
12543
12544
            // recursive call
12545 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12546
        }
12547
12548 10
        if (\is_array($replacement)) {
12549 1
            if ($replacement !== []) {
12550 1
                $replacement = $replacement[0];
12551
            } else {
12552 1
                $replacement = '';
12553
            }
12554
        }
12555
12556
        // init
12557 10
        $str = (string) $str;
12558 10
        $replacement = (string) $replacement;
12559
12560 10
        if (\is_array($length)) {
12561
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12562
        }
12563
12564 10
        if (\is_array($offset)) {
12565
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12566
        }
12567
12568 10
        if ($str === '') {
12569 1
            return $replacement;
12570
        }
12571
12572 9
        if (self::$SUPPORT['mbstring'] === true) {
12573 9
            $string_length = (int) self::strlen($str, $encoding);
12574
12575 9
            if ($offset < 0) {
12576 1
                $offset = (int) \max(0, $string_length + $offset);
12577 9
            } elseif ($offset > $string_length) {
12578 1
                $offset = $string_length;
12579
            }
12580
12581 9
            if ($length !== null && $length < 0) {
12582 1
                $length = (int) \max(0, $string_length - $offset + $length);
12583 9
            } elseif ($length === null || $length > $string_length) {
12584 4
                $length = $string_length;
12585
            }
12586
12587
            /** @noinspection AdditionOperationOnArraysInspection */
12588 9
            if (($offset + $length) > $string_length) {
12589 4
                $length = $string_length - $offset;
12590
            }
12591
12592
            /** @noinspection AdditionOperationOnArraysInspection */
12593 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12594 9
                   $replacement .
12595 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12596
        }
12597
12598
        //
12599
        // fallback for ascii only
12600
        //
12601
12602
        if (ASCII::is_ascii($str)) {
12603
            return ($length === null) ?
12604
                \substr_replace($str, $replacement, $offset) :
12605
                \substr_replace($str, $replacement, $offset, $length);
12606
        }
12607
12608
        //
12609
        // fallback via vanilla php
12610
        //
12611
12612
        \preg_match_all('/./us', $str, $str_matches);
12613
        \preg_match_all('/./us', $replacement, $replacement_matches);
12614
12615
        if ($length === null) {
12616
            $length_tmp = self::strlen($str, $encoding);
12617
            if ($length_tmp === false) {
12618
                // e.g.: non mbstring support + invalid chars
12619
                return '';
12620
            }
12621
            $length = (int) $length_tmp;
12622
        }
12623
12624
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12625
12626
        return \implode('', $str_matches[0]);
12627
    }
12628
12629
    /**
12630
     * Removes a suffix ($needle) from the end of the string ($haystack).
12631
     *
12632
     * EXAMPLE: <code>
12633
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12634
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12635
     * </code>
12636
     *
12637
     * @param string $haystack <p>The string to search in.</p>
12638
     * @param string $needle   <p>The substring to search for.</p>
12639
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12640
     *
12641
     * @psalm-pure
12642
     *
12643
     * @return string
12644
     *                <p>Return the sub-string.</p>
12645
     */
12646
    public static function substr_right(
12647
        string $haystack,
12648
        string $needle,
12649
        string $encoding = 'UTF-8'
12650
    ): string {
12651 2
        if ($haystack === '') {
12652 2
            return '';
12653
        }
12654
12655 2
        if ($needle === '') {
12656 2
            return $haystack;
12657
        }
12658
12659
        if (
12660 2
            $encoding === 'UTF-8'
12661
            &&
12662 2
            \substr($haystack, -\strlen($needle)) === $needle
12663
        ) {
12664 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12665
        }
12666
12667 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12668
            return (string) self::substr(
12669
                $haystack,
12670
                0,
12671
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12672
                $encoding
12673
            );
12674
        }
12675
12676 2
        return $haystack;
12677
    }
12678
12679
    /**
12680
     * Returns a case swapped version of the string.
12681
     *
12682
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12683
     *
12684
     * @param string $str        <p>The input string.</p>
12685
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12686
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12687
     *
12688
     * @psalm-pure
12689
     *
12690
     * @return string
12691
     *                <p>Each character's case swapped.</p>
12692
     */
12693
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12694
    {
12695 6
        if ($str === '') {
12696 1
            return '';
12697
        }
12698
12699 6
        if ($clean_utf8) {
12700
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12701
            // if invalid characters are found in $haystack before $needle
12702 2
            $str = self::clean($str);
12703
        }
12704
12705 6
        if ($encoding === 'UTF-8') {
12706 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12707
        }
12708
12709 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12710
    }
12711
12712
    /**
12713
     * Checks whether symfony-polyfills are used.
12714
     *
12715
     * @psalm-pure
12716
     *
12717
     * @return bool
12718
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12719
     *
12720
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12721
     */
12722
    public static function symfony_polyfill_used(): bool
12723
    {
12724
        // init
12725
        $return = false;
12726
12727
        $return_tmp = \extension_loaded('mbstring');
12728
        if (!$return_tmp && \function_exists('mb_strlen')) {
12729
            $return = true;
12730
        }
12731
12732
        $return_tmp = \extension_loaded('iconv');
12733
        if (!$return_tmp && \function_exists('iconv')) {
12734
            $return = true;
12735
        }
12736
12737
        return $return;
12738
    }
12739
12740
    /**
12741
     * @param string $str
12742
     * @param int    $tab_length
12743
     *
12744
     * @psalm-pure
12745
     *
12746
     * @return string
12747
     */
12748
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12749
    {
12750 6
        if ($tab_length === 4) {
12751 3
            $spaces = '    ';
12752 3
        } elseif ($tab_length === 2) {
12753 1
            $spaces = '  ';
12754
        } else {
12755 2
            $spaces = \str_repeat(' ', $tab_length);
12756
        }
12757
12758 6
        return \str_replace("\t", $spaces, $str);
12759
    }
12760
12761
    /**
12762
     * Converts the first character of each word in the string to uppercase
12763
     * and all other chars to lowercase.
12764
     *
12765
     * @param string      $str                           <p>The input string.</p>
12766
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12767
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12768
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12769
     *                                                   tr</p>
12770
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12771
     *                                                   -> ß</p>
12772
     *
12773
     * @psalm-pure
12774
     *
12775
     * @return string
12776
     *                <p>A string with all characters of $str being title-cased.</p>
12777
     */
12778
    public static function titlecase(
12779
        string $str,
12780
        string $encoding = 'UTF-8',
12781
        bool $clean_utf8 = false,
12782
        string $lang = null,
12783
        bool $try_to_keep_the_string_length = false
12784
    ): string {
12785 5
        if ($clean_utf8) {
12786
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12787
            // if invalid characters are found in $haystack before $needle
12788
            $str = self::clean($str);
12789
        }
12790
12791
        if (
12792 5
            $lang === null
12793
            &&
12794 5
            !$try_to_keep_the_string_length
12795
        ) {
12796 5
            if ($encoding === 'UTF-8') {
12797 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12798
            }
12799
12800 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12801
12802 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12803
        }
12804
12805
        return self::str_titleize(
12806
            $str,
12807
            null,
12808
            $encoding,
12809
            false,
12810
            $lang,
12811
            $try_to_keep_the_string_length,
12812
            false
12813
        );
12814
    }
12815
12816
    /**
12817
     * alias for "UTF8::to_ascii()"
12818
     *
12819
     * @param string $str
12820
     * @param string $subst_chr
12821
     * @param bool   $strict
12822
     *
12823
     * @psalm-pure
12824
     *
12825
     * @return string
12826
     *
12827
     * @see        UTF8::to_ascii()
12828
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12829
     */
12830
    public static function toAscii(
12831
        string $str,
12832
        string $subst_chr = '?',
12833
        bool $strict = false
12834
    ): string {
12835 7
        return self::to_ascii($str, $subst_chr, $strict);
12836
    }
12837
12838
    /**
12839
     * alias for "UTF8::to_iso8859()"
12840
     *
12841
     * @param string|string[] $str
12842
     *
12843
     * @psalm-pure
12844
     *
12845
     * @return string|string[]
12846
     *
12847
     * @see        UTF8::to_iso8859()
12848
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12849
     */
12850
    public static function toIso8859($str)
12851
    {
12852 2
        return self::to_iso8859($str);
12853
    }
12854
12855
    /**
12856
     * alias for "UTF8::to_latin1()"
12857
     *
12858
     * @param string|string[] $str
12859
     *
12860
     * @psalm-pure
12861
     *
12862
     * @return string|string[]
12863
     *
12864
     * @see        UTF8::to_iso8859()
12865
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12866
     */
12867
    public static function toLatin1($str)
12868
    {
12869 2
        return self::to_iso8859($str);
12870
    }
12871
12872
    /**
12873
     * alias for "UTF8::to_utf8()"
12874
     *
12875
     * @param string|string[] $str
12876
     *
12877
     * @psalm-pure
12878
     *
12879
     * @return string|string[]
12880
     *
12881
     * @see        UTF8::to_utf8()
12882
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12883
     */
12884
    public static function toUTF8($str)
12885
    {
12886 2
        return self::to_utf8($str);
12887
    }
12888
12889
    /**
12890
     * Convert a string into ASCII.
12891
     *
12892
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12893
     *
12894
     * @param string $str     <p>The input string.</p>
12895
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12896
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12897
     *                        performance</p>
12898
     *
12899
     * @psalm-pure
12900
     *
12901
     * @return string
12902
     */
12903
    public static function to_ascii(
12904
        string $str,
12905
        string $unknown = '?',
12906
        bool $strict = false
12907
    ): string {
12908 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12909
    }
12910
12911
    /**
12912
     * @param bool|int|string $str
12913
     *
12914
     * @psalm-param bool|int|numeric-string $str
12915
     *
12916
     * @psalm-pure
12917
     *
12918
     * @return bool
12919
     */
12920
    public static function to_boolean($str): bool
12921
    {
12922
        // init
12923 19
        $str = (string) $str;
12924
12925 19
        if ($str === '') {
12926 2
            return false;
12927
        }
12928
12929
        // Info: http://php.net/manual/en/filter.filters.validate.php
12930
        $map = [
12931 17
            'true'  => true,
12932
            '1'     => true,
12933
            'on'    => true,
12934
            'yes'   => true,
12935
            'false' => false,
12936
            '0'     => false,
12937
            'off'   => false,
12938
            'no'    => false,
12939
        ];
12940
12941 17
        if (isset($map[$str])) {
12942 11
            return $map[$str];
12943
        }
12944
12945 6
        $key = \strtolower($str);
12946 6
        if (isset($map[$key])) {
12947 2
            return $map[$key];
12948
        }
12949
12950 4
        if (\is_numeric($str)) {
12951 2
            return ((float) $str + 0) > 0;
12952
        }
12953
12954 2
        return (bool) \trim($str);
12955
    }
12956
12957
    /**
12958
     * Convert given string to safe filename (and keep string case).
12959
     *
12960
     * @param string $str
12961
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12962
     *                                  simply replaced with hyphen.
12963
     * @param string $fallback_char
12964
     *
12965
     * @psalm-pure
12966
     *
12967
     * @return string
12968
     */
12969
    public static function to_filename(
12970
        string $str,
12971
        bool $use_transliterate = false,
12972
        string $fallback_char = '-'
12973
    ): string {
12974 1
        return ASCII::to_filename(
12975 1
            $str,
12976 1
            $use_transliterate,
12977 1
            $fallback_char
12978
        );
12979
    }
12980
12981
    /**
12982
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12983
     *
12984
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12985
     *
12986
     * @param string|string[] $str
12987
     *
12988
     * @psalm-pure
12989
     *
12990
     * @return string|string[]
12991
     */
12992
    public static function to_iso8859($str)
12993
    {
12994 8
        if (\is_array($str)) {
12995 2
            foreach ($str as $k => &$v) {
12996 2
                $v = self::to_iso8859($v);
12997
            }
12998
12999 2
            return $str;
13000
        }
13001
13002 8
        $str = (string) $str;
13003 8
        if ($str === '') {
13004 2
            return '';
13005
        }
13006
13007 8
        return self::utf8_decode($str);
13008
    }
13009
13010
    /**
13011
     * alias for "UTF8::to_iso8859()"
13012
     *
13013
     * @param string|string[] $str
13014
     *
13015
     * @psalm-pure
13016
     *
13017
     * @return string|string[]
13018
     *
13019
     * @see        UTF8::to_iso8859()
13020
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13021
     */
13022
    public static function to_latin1($str)
13023
    {
13024 2
        return self::to_iso8859($str);
13025
    }
13026
13027
    /**
13028
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13029
     *
13030
     * <ul>
13031
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13032
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13033
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13034
     * case.</li>
13035
     * </ul>
13036
     *
13037
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13038
     *
13039
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13040
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13041
     *
13042
     * @psalm-pure
13043
     *
13044
     * @return string|string[]
13045
     *                         <p>The UTF-8 encoded string</p>
13046
     *
13047
     * @template TToUtf8
13048
     * @psalm-param TToUtf8 $str
13049
     * @psalm-return TToUtf8
13050
     *
13051
     * @noinspection SuspiciousBinaryOperationInspection
13052
     */
13053
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13054
    {
13055 44
        if (\is_array($str)) {
13056 4
            foreach ($str as $k => &$v) {
13057 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13058
            }
13059
13060 4
            return $str;
13061
        }
13062
13063
        /** @psalm-var TToUtf8 $str */
13064 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13065
13066 44
        return $str;
13067
    }
13068
13069
    /**
13070
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13071
     *
13072
     * <ul>
13073
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13074
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13075
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13076
     * case.</li>
13077
     * </ul>
13078
     *
13079
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13080
     *
13081
     * @param string $str                        <p>Any string.</p>
13082
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13083
     *
13084
     * @psalm-pure
13085
     *
13086
     * @return string
13087
     *                <p>The UTF-8 encoded string</p>
13088
     *
13089
     * @noinspection SuspiciousBinaryOperationInspection
13090
     */
13091
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13092
    {
13093 44
        if ($str === '') {
13094 7
            return $str;
13095
        }
13096
13097 44
        $max = \strlen($str);
13098 44
        $buf = '';
13099
13100 44
        for ($i = 0; $i < $max; ++$i) {
13101 44
            $c1 = $str[$i];
13102
13103 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13104
13105 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13106
13107 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13108
13109 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13110 22
                        $buf .= $c1 . $c2;
13111 22
                        ++$i;
13112
                    } else { // not valid UTF8 - convert it
13113 36
                        $buf .= self::to_utf8_convert_helper($c1);
13114
                    }
13115 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13116
13117 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13118 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13119
13120 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13121 17
                        $buf .= $c1 . $c2 . $c3;
13122 17
                        $i += 2;
13123
                    } else { // not valid UTF8 - convert it
13124 36
                        $buf .= self::to_utf8_convert_helper($c1);
13125
                    }
13126 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13127
13128 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13129 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13130 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13131
13132 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13133 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13134 10
                        $i += 3;
13135
                    } else { // not valid UTF8 - convert it
13136 28
                        $buf .= self::to_utf8_convert_helper($c1);
13137
                    }
13138
                } else { // doesn't look like UTF8, but should be converted
13139
13140 40
                    $buf .= self::to_utf8_convert_helper($c1);
13141
                }
13142 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13143
13144 4
                $buf .= self::to_utf8_convert_helper($c1);
13145
            } else { // it doesn't need conversion
13146
13147 41
                $buf .= $c1;
13148
            }
13149
        }
13150
13151
        // decode unicode escape sequences + unicode surrogate pairs
13152 44
        $buf = \preg_replace_callback(
13153 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13154
            /**
13155
             * @param array $matches
13156
             *
13157
             * @psalm-pure
13158
             *
13159
             * @return string
13160
             */
13161
            static function (array $matches): string {
13162 13
                if (isset($matches[3])) {
13163 13
                    $cp = (int) \hexdec($matches[3]);
13164
                } else {
13165
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13166 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13167 1
                          + (int) \hexdec($matches[2])
13168 1
                          + 0x10000
13169 1
                          - (0xD800 << 10)
13170 1
                          - 0xDC00;
13171
                }
13172
13173
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13174
                //
13175
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13176
13177 13
                if ($cp < 0x80) {
13178 8
                    return (string) self::chr($cp);
13179
                }
13180
13181 10
                if ($cp < 0xA0) {
13182
                    /** @noinspection UnnecessaryCastingInspection */
13183
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13184
                }
13185
13186 10
                return self::decimal_to_chr($cp);
13187 44
            },
13188 44
            $buf
13189
        );
13190
13191 44
        if ($buf === null) {
13192
            return '';
13193
        }
13194
13195
        // decode UTF-8 codepoints
13196 44
        if ($decode_html_entity_to_utf8) {
13197 3
            $buf = self::html_entity_decode($buf);
13198
        }
13199
13200 44
        return $buf;
13201
    }
13202
13203
    /**
13204
     * Returns the given string as an integer, or null if the string isn't numeric.
13205
     *
13206
     * @param string $str
13207
     *
13208
     * @psalm-pure
13209
     *
13210
     * @return int|null
13211
     *                  <p>null if the string isn't numeric</p>
13212
     */
13213
    public static function to_int(string $str)
13214
    {
13215 1
        if (\is_numeric($str)) {
13216 1
            return (int) $str;
13217
        }
13218
13219 1
        return null;
13220
    }
13221
13222
    /**
13223
     * Returns the given input as string, or null if the input isn't int|float|string
13224
     * and do not implement the "__toString()" method.
13225
     *
13226
     * @param float|int|object|string|null $input
13227
     *
13228
     * @psalm-pure
13229
     *
13230
     * @return string|null
13231
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13232
     */
13233
    public static function to_string($input)
13234
    {
13235 1
        if ($input === null) {
13236
            return null;
13237
        }
13238
13239
        /** @var string $input_type - hack for psalm */
13240 1
        $input_type = \gettype($input);
13241
13242
        if (
13243 1
            $input_type === 'string'
13244
            ||
13245 1
            $input_type === 'integer'
13246
            ||
13247 1
            $input_type === 'float'
13248
            ||
13249 1
            $input_type === 'double'
13250
        ) {
13251 1
            return (string) $input;
13252
        }
13253
13254 1
        if ($input_type === 'object') {
13255
            /** @noinspection PhpSillyAssignmentInspection */
13256
            /** @var object $input - hack for psalm / phpstan */
13257 1
            $input = $input;
13258
            /** @noinspection NestedPositiveIfStatementsInspection */
13259
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13260 1
            if (\method_exists($input, '__toString')) {
13261 1
                return (string) $input;
13262
            }
13263
        }
13264
13265 1
        return null;
13266
    }
13267
13268
    /**
13269
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13270
     *
13271
     * INFO: This is slower then "trim()"
13272
     *
13273
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13274
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13275
     *
13276
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13277
     *
13278
     * @param string      $str   <p>The string to be trimmed</p>
13279
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13280
     *
13281
     * @psalm-pure
13282
     *
13283
     * @return string
13284
     *                <p>The trimmed string.</p>
13285
     */
13286
    public static function trim(string $str = '', string $chars = null): string
13287
    {
13288 57
        if ($str === '') {
13289 9
            return '';
13290
        }
13291
13292 50
        if (self::$SUPPORT['mbstring'] === true) {
13293 50
            if ($chars !== null) {
13294
                /** @noinspection PregQuoteUsageInspection */
13295 28
                $chars = \preg_quote($chars);
13296 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13297
            } else {
13298 22
                $pattern = '^[\\s]+|[\\s]+$';
13299
            }
13300
13301
            /** @noinspection PhpComposerExtensionStubsInspection */
13302 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13303
        }
13304
13305 8
        if ($chars !== null) {
13306
            $chars = \preg_quote($chars, '/');
13307
            $pattern = "^[${chars}]+|[${chars}]+\$";
13308
        } else {
13309 8
            $pattern = '^[\\s]+|[\\s]+$';
13310
        }
13311
13312 8
        return self::regex_replace($str, $pattern, '');
13313
    }
13314
13315
    /**
13316
     * Makes string's first char uppercase.
13317
     *
13318
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13319
     *
13320
     * @param string      $str                           <p>The input string.</p>
13321
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13322
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13323
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13324
     *                                                   tr</p>
13325
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13326
     *                                                   -> ß</p>
13327
     *
13328
     * @psalm-pure
13329
     *
13330
     * @return string
13331
     *                <p>The resulting string with with char uppercase.</p>
13332
     */
13333
    public static function ucfirst(
13334
        string $str,
13335
        string $encoding = 'UTF-8',
13336
        bool $clean_utf8 = false,
13337
        string $lang = null,
13338
        bool $try_to_keep_the_string_length = false
13339
    ): string {
13340 69
        if ($str === '') {
13341 3
            return '';
13342
        }
13343
13344 68
        if ($clean_utf8) {
13345
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13346
            // if invalid characters are found in $haystack before $needle
13347 1
            $str = self::clean($str);
13348
        }
13349
13350 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13351
13352 68
        if ($encoding === 'UTF-8') {
13353 22
            $str_part_two = (string) \mb_substr($str, 1);
13354
13355 22
            if ($use_mb_functions) {
13356 22
                $str_part_one = \mb_strtoupper(
13357 22
                    (string) \mb_substr($str, 0, 1)
13358
                );
13359
            } else {
13360
                $str_part_one = self::strtoupper(
13361
                    (string) \mb_substr($str, 0, 1),
13362
                    $encoding,
13363
                    false,
13364
                    $lang,
13365 22
                    $try_to_keep_the_string_length
13366
                );
13367
            }
13368
        } else {
13369 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13370
13371 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13372
13373 47
            if ($use_mb_functions) {
13374 47
                $str_part_one = \mb_strtoupper(
13375 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13376 47
                    $encoding
13377
                );
13378
            } else {
13379
                $str_part_one = self::strtoupper(
13380
                    (string) self::substr($str, 0, 1, $encoding),
13381
                    $encoding,
13382
                    false,
13383
                    $lang,
13384
                    $try_to_keep_the_string_length
13385
                );
13386
            }
13387
        }
13388
13389 68
        return $str_part_one . $str_part_two;
13390
    }
13391
13392
    /**
13393
     * alias for "UTF8::ucfirst()"
13394
     *
13395
     * @param string $str
13396
     * @param string $encoding
13397
     * @param bool   $clean_utf8
13398
     *
13399
     * @psalm-pure
13400
     *
13401
     * @return string
13402
     *
13403
     * @see        UTF8::ucfirst()
13404
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13405
     */
13406
    public static function ucword(
13407
        string $str,
13408
        string $encoding = 'UTF-8',
13409
        bool $clean_utf8 = false
13410
    ): string {
13411 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13412
    }
13413
13414
    /**
13415
     * Uppercase for all words in the string.
13416
     *
13417
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13418
     *
13419
     * @param string   $str        <p>The input string.</p>
13420
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13421
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13422
     *                             word.</p>
13423
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13424
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13425
     *
13426
     * @psalm-pure
13427
     *
13428
     * @return string
13429
     */
13430
    public static function ucwords(
13431
        string $str,
13432
        array $exceptions = [],
13433
        string $char_list = '',
13434
        string $encoding = 'UTF-8',
13435
        bool $clean_utf8 = false
13436
    ): string {
13437 8
        if (!$str) {
13438 2
            return '';
13439
        }
13440
13441
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13442
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13443
13444 7
        if ($clean_utf8) {
13445
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13446
            // if invalid characters are found in $haystack before $needle
13447 1
            $str = self::clean($str);
13448
        }
13449
13450 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13451
13452
        if (
13453 7
            $use_php_default_functions
13454
            &&
13455 7
            ASCII::is_ascii($str)
13456
        ) {
13457
            return \ucwords($str);
13458
        }
13459
13460 7
        $words = self::str_to_words($str, $char_list);
13461 7
        $use_exceptions = $exceptions !== [];
13462
13463 7
        $words_str = '';
13464 7
        foreach ($words as &$word) {
13465 7
            if (!$word) {
13466 7
                continue;
13467
            }
13468
13469
            if (
13470 7
                !$use_exceptions
13471
                ||
13472 7
                !\in_array($word, $exceptions, true)
13473
            ) {
13474 7
                $words_str .= self::ucfirst($word, $encoding);
13475
            } else {
13476 7
                $words_str .= $word;
13477
            }
13478
        }
13479
13480 7
        return $words_str;
13481
    }
13482
13483
    /**
13484
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13485
     *
13486
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13487
     *
13488
     * e.g:
13489
     * 'test+test'                     => 'test test'
13490
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13491
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13492
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13493
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13494
     * 'Düsseldorf'                   => 'Düsseldorf'
13495
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13496
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13497
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13498
     *
13499
     * @param string $str          <p>The input string.</p>
13500
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13501
     *
13502
     * @psalm-pure
13503
     *
13504
     * @return string
13505
     */
13506
    public static function urldecode(string $str, bool $multi_decode = true): string
13507
    {
13508 4
        if ($str === '') {
13509 3
            return '';
13510
        }
13511
13512
        if (
13513 4
            \strpos($str, '&') === false
13514
            &&
13515 4
            \strpos($str, '%') === false
13516
            &&
13517 4
            \strpos($str, '+') === false
13518
            &&
13519 4
            \strpos($str, '\u') === false
13520
        ) {
13521 3
            return self::fix_simple_utf8($str);
13522
        }
13523
13524 4
        $str = self::urldecode_unicode_helper($str);
13525
13526 4
        if ($multi_decode) {
13527
            do {
13528 3
                $str_compare = $str;
13529
13530
                /**
13531
                 * @psalm-suppress PossiblyInvalidArgument
13532
                 */
13533 3
                $str = self::fix_simple_utf8(
13534 3
                    \urldecode(
13535 3
                        self::html_entity_decode(
13536 3
                            self::to_utf8($str),
13537 3
                            \ENT_QUOTES | \ENT_HTML5
13538
                        )
13539
                    )
13540
                );
13541 3
            } while ($str_compare !== $str);
13542
        } else {
13543
            /**
13544
             * @psalm-suppress PossiblyInvalidArgument
13545
             */
13546 1
            $str = self::fix_simple_utf8(
13547 1
                \urldecode(
13548 1
                    self::html_entity_decode(
13549 1
                        self::to_utf8($str),
13550 1
                        \ENT_QUOTES | \ENT_HTML5
13551
                    )
13552
                )
13553
            );
13554
        }
13555
13556 4
        return $str;
13557
    }
13558
13559
    /**
13560
     * Return a array with "urlencoded"-win1252 -> UTF-8
13561
     *
13562
     * @psalm-pure
13563
     *
13564
     * @return string[]
13565
     *
13566
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13567
     */
13568
    public static function urldecode_fix_win1252_chars(): array
13569
    {
13570
        return [
13571 2
            '%20' => ' ',
13572
            '%21' => '!',
13573
            '%22' => '"',
13574
            '%23' => '#',
13575
            '%24' => '$',
13576
            '%25' => '%',
13577
            '%26' => '&',
13578
            '%27' => "'",
13579
            '%28' => '(',
13580
            '%29' => ')',
13581
            '%2A' => '*',
13582
            '%2B' => '+',
13583
            '%2C' => ',',
13584
            '%2D' => '-',
13585
            '%2E' => '.',
13586
            '%2F' => '/',
13587
            '%30' => '0',
13588
            '%31' => '1',
13589
            '%32' => '2',
13590
            '%33' => '3',
13591
            '%34' => '4',
13592
            '%35' => '5',
13593
            '%36' => '6',
13594
            '%37' => '7',
13595
            '%38' => '8',
13596
            '%39' => '9',
13597
            '%3A' => ':',
13598
            '%3B' => ';',
13599
            '%3C' => '<',
13600
            '%3D' => '=',
13601
            '%3E' => '>',
13602
            '%3F' => '?',
13603
            '%40' => '@',
13604
            '%41' => 'A',
13605
            '%42' => 'B',
13606
            '%43' => 'C',
13607
            '%44' => 'D',
13608
            '%45' => 'E',
13609
            '%46' => 'F',
13610
            '%47' => 'G',
13611
            '%48' => 'H',
13612
            '%49' => 'I',
13613
            '%4A' => 'J',
13614
            '%4B' => 'K',
13615
            '%4C' => 'L',
13616
            '%4D' => 'M',
13617
            '%4E' => 'N',
13618
            '%4F' => 'O',
13619
            '%50' => 'P',
13620
            '%51' => 'Q',
13621
            '%52' => 'R',
13622
            '%53' => 'S',
13623
            '%54' => 'T',
13624
            '%55' => 'U',
13625
            '%56' => 'V',
13626
            '%57' => 'W',
13627
            '%58' => 'X',
13628
            '%59' => 'Y',
13629
            '%5A' => 'Z',
13630
            '%5B' => '[',
13631
            '%5C' => '\\',
13632
            '%5D' => ']',
13633
            '%5E' => '^',
13634
            '%5F' => '_',
13635
            '%60' => '`',
13636
            '%61' => 'a',
13637
            '%62' => 'b',
13638
            '%63' => 'c',
13639
            '%64' => 'd',
13640
            '%65' => 'e',
13641
            '%66' => 'f',
13642
            '%67' => 'g',
13643
            '%68' => 'h',
13644
            '%69' => 'i',
13645
            '%6A' => 'j',
13646
            '%6B' => 'k',
13647
            '%6C' => 'l',
13648
            '%6D' => 'm',
13649
            '%6E' => 'n',
13650
            '%6F' => 'o',
13651
            '%70' => 'p',
13652
            '%71' => 'q',
13653
            '%72' => 'r',
13654
            '%73' => 's',
13655
            '%74' => 't',
13656
            '%75' => 'u',
13657
            '%76' => 'v',
13658
            '%77' => 'w',
13659
            '%78' => 'x',
13660
            '%79' => 'y',
13661
            '%7A' => 'z',
13662
            '%7B' => '{',
13663
            '%7C' => '|',
13664
            '%7D' => '}',
13665
            '%7E' => '~',
13666
            '%7F' => '',
13667
            '%80' => '`',
13668
            '%81' => '',
13669
            '%82' => '‚',
13670
            '%83' => 'ƒ',
13671
            '%84' => '„',
13672
            '%85' => '…',
13673
            '%86' => '†',
13674
            '%87' => '‡',
13675
            '%88' => 'ˆ',
13676
            '%89' => '‰',
13677
            '%8A' => 'Š',
13678
            '%8B' => '‹',
13679
            '%8C' => 'Œ',
13680
            '%8D' => '',
13681
            '%8E' => 'Ž',
13682
            '%8F' => '',
13683
            '%90' => '',
13684
            '%91' => '‘',
13685
            '%92' => '’',
13686
            '%93' => '“',
13687
            '%94' => '”',
13688
            '%95' => '•',
13689
            '%96' => '–',
13690
            '%97' => '—',
13691
            '%98' => '˜',
13692
            '%99' => '™',
13693
            '%9A' => 'š',
13694
            '%9B' => '›',
13695
            '%9C' => 'œ',
13696
            '%9D' => '',
13697
            '%9E' => 'ž',
13698
            '%9F' => 'Ÿ',
13699
            '%A0' => '',
13700
            '%A1' => '¡',
13701
            '%A2' => '¢',
13702
            '%A3' => '£',
13703
            '%A4' => '¤',
13704
            '%A5' => '¥',
13705
            '%A6' => '¦',
13706
            '%A7' => '§',
13707
            '%A8' => '¨',
13708
            '%A9' => '©',
13709
            '%AA' => 'ª',
13710
            '%AB' => '«',
13711
            '%AC' => '¬',
13712
            '%AD' => '',
13713
            '%AE' => '®',
13714
            '%AF' => '¯',
13715
            '%B0' => '°',
13716
            '%B1' => '±',
13717
            '%B2' => '²',
13718
            '%B3' => '³',
13719
            '%B4' => '´',
13720
            '%B5' => 'µ',
13721
            '%B6' => '¶',
13722
            '%B7' => '·',
13723
            '%B8' => '¸',
13724
            '%B9' => '¹',
13725
            '%BA' => 'º',
13726
            '%BB' => '»',
13727
            '%BC' => '¼',
13728
            '%BD' => '½',
13729
            '%BE' => '¾',
13730
            '%BF' => '¿',
13731
            '%C0' => 'À',
13732
            '%C1' => 'Á',
13733
            '%C2' => 'Â',
13734
            '%C3' => 'Ã',
13735
            '%C4' => 'Ä',
13736
            '%C5' => 'Å',
13737
            '%C6' => 'Æ',
13738
            '%C7' => 'Ç',
13739
            '%C8' => 'È',
13740
            '%C9' => 'É',
13741
            '%CA' => 'Ê',
13742
            '%CB' => 'Ë',
13743
            '%CC' => 'Ì',
13744
            '%CD' => 'Í',
13745
            '%CE' => 'Î',
13746
            '%CF' => 'Ï',
13747
            '%D0' => 'Ð',
13748
            '%D1' => 'Ñ',
13749
            '%D2' => 'Ò',
13750
            '%D3' => 'Ó',
13751
            '%D4' => 'Ô',
13752
            '%D5' => 'Õ',
13753
            '%D6' => 'Ö',
13754
            '%D7' => '×',
13755
            '%D8' => 'Ø',
13756
            '%D9' => 'Ù',
13757
            '%DA' => 'Ú',
13758
            '%DB' => 'Û',
13759
            '%DC' => 'Ü',
13760
            '%DD' => 'Ý',
13761
            '%DE' => 'Þ',
13762
            '%DF' => 'ß',
13763
            '%E0' => 'à',
13764
            '%E1' => 'á',
13765
            '%E2' => 'â',
13766
            '%E3' => 'ã',
13767
            '%E4' => 'ä',
13768
            '%E5' => 'å',
13769
            '%E6' => 'æ',
13770
            '%E7' => 'ç',
13771
            '%E8' => 'è',
13772
            '%E9' => 'é',
13773
            '%EA' => 'ê',
13774
            '%EB' => 'ë',
13775
            '%EC' => 'ì',
13776
            '%ED' => 'í',
13777
            '%EE' => 'î',
13778
            '%EF' => 'ï',
13779
            '%F0' => 'ð',
13780
            '%F1' => 'ñ',
13781
            '%F2' => 'ò',
13782
            '%F3' => 'ó',
13783
            '%F4' => 'ô',
13784
            '%F5' => 'õ',
13785
            '%F6' => 'ö',
13786
            '%F7' => '÷',
13787
            '%F8' => 'ø',
13788
            '%F9' => 'ù',
13789
            '%FA' => 'ú',
13790
            '%FB' => 'û',
13791
            '%FC' => 'ü',
13792
            '%FD' => 'ý',
13793
            '%FE' => 'þ',
13794
            '%FF' => 'ÿ',
13795
        ];
13796
    }
13797
13798
    /**
13799
     * Decodes a UTF-8 string to ISO-8859-1.
13800
     *
13801
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13802
     *
13803
     * @param string $str             <p>The input string.</p>
13804
     * @param bool   $keep_utf8_chars
13805
     *
13806
     * @psalm-pure
13807
     *
13808
     * @return string
13809
     *
13810
     * @noinspection SuspiciousBinaryOperationInspection
13811
     */
13812
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13813
    {
13814 14
        if ($str === '') {
13815 6
            return '';
13816
        }
13817
13818
        // save for later comparision
13819 14
        $str_backup = $str;
13820 14
        $len = \strlen($str);
13821
13822 14
        if (self::$ORD === null) {
13823
            self::$ORD = self::getData('ord');
13824
        }
13825
13826 14
        if (self::$CHR === null) {
13827
            self::$CHR = self::getData('chr');
13828
        }
13829
13830 14
        $no_char_found = '?';
13831
        /** @noinspection ForeachInvariantsInspection */
13832 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13833 14
            switch ($str[$i] & "\xF0") {
13834 14
                case "\xC0":
13835 13
                case "\xD0":
13836 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13837 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13838
13839 13
                    break;
13840
13841
                /** @noinspection PhpMissingBreakStatementInspection */
13842 13
                case "\xF0":
13843
                    ++$i;
13844
13845
                // no break
13846
13847 13
                case "\xE0":
13848 11
                    $str[$j] = $no_char_found;
13849 11
                    $i += 2;
13850
13851 11
                    break;
13852
13853
                default:
13854 12
                    $str[$j] = $str[$i];
13855
            }
13856
        }
13857
13858
        /** @var false|string $return - needed for PhpStan (stubs error) */
13859 14
        $return = \substr($str, 0, $j);
13860 14
        if ($return === false) {
13861
            $return = '';
13862
        }
13863
13864
        if (
13865 14
            $keep_utf8_chars
13866
            &&
13867 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13868
        ) {
13869 2
            return $str_backup;
13870
        }
13871
13872 14
        return $return;
13873
    }
13874
13875
    /**
13876
     * Encodes an ISO-8859-1 string to UTF-8.
13877
     *
13878
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13879
     *
13880
     * @param string $str <p>The input string.</p>
13881
     *
13882
     * @psalm-pure
13883
     *
13884
     * @return string
13885
     */
13886
    public static function utf8_encode(string $str): string
13887
    {
13888 16
        if ($str === '') {
13889 14
            return '';
13890
        }
13891
13892
        /** @var false|string $str - the polyfill maybe return false */
13893 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13893
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13894
13895
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13896
        /** @psalm-suppress TypeDoesNotContainType */
13897 16
        if ($str === false) {
13898
            return '';
13899
        }
13900
13901 16
        return $str;
13902
    }
13903
13904
    /**
13905
     * fix -> utf8-win1252 chars
13906
     *
13907
     * @param string $str <p>The input string.</p>
13908
     *
13909
     * @psalm-pure
13910
     *
13911
     * @return string
13912
     *
13913
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13914
     */
13915
    public static function utf8_fix_win1252_chars(string $str): string
13916
    {
13917 2
        return self::fix_simple_utf8($str);
13918
    }
13919
13920
    /**
13921
     * Returns an array with all utf8 whitespace characters.
13922
     *
13923
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13924
     *
13925
     * @psalm-pure
13926
     *
13927
     * @return string[]
13928
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13929
     *                  as defined in above URL
13930
     */
13931
    public static function whitespace_table(): array
13932
    {
13933 2
        return self::$WHITESPACE_TABLE;
13934
    }
13935
13936
    /**
13937
     * Limit the number of words in a string.
13938
     *
13939
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13940
     *
13941
     * @param string $str        <p>The input string.</p>
13942
     * @param int    $limit      <p>The limit of words as integer.</p>
13943
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13944
     *
13945
     * @psalm-pure
13946
     *
13947
     * @return string
13948
     */
13949
    public static function words_limit(
13950
        string $str,
13951
        int $limit = 100,
13952
        string $str_add_on = '…'
13953
    ): string {
13954 2
        if ($str === '' || $limit < 1) {
13955 2
            return '';
13956
        }
13957
13958 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13959
13960
        if (
13961 2
            !isset($matches[0])
13962
            ||
13963 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13964
        ) {
13965 2
            return $str;
13966
        }
13967
13968 2
        return \rtrim($matches[0]) . $str_add_on;
13969
    }
13970
13971
    /**
13972
     * Wraps a string to a given number of characters
13973
     *
13974
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13975
     *
13976
     * @see http://php.net/manual/en/function.wordwrap.php
13977
     *
13978
     * @param string $str   <p>The input string.</p>
13979
     * @param int    $width [optional] <p>The column width.</p>
13980
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13981
     * @param bool   $cut   [optional] <p>
13982
     *                      If the cut is set to true, the string is
13983
     *                      always wrapped at or before the specified width. So if you have
13984
     *                      a word that is larger than the given width, it is broken apart.
13985
     *                      </p>
13986
     *
13987
     * @psalm-pure
13988
     *
13989
     * @return string
13990
     *                <p>The given string wrapped at the specified column.</p>
13991
     */
13992
    public static function wordwrap(
13993
        string $str,
13994
        int $width = 75,
13995
        string $break = "\n",
13996
        bool $cut = false
13997
    ): string {
13998 12
        if ($str === '' || $break === '') {
13999 4
            return '';
14000
        }
14001
14002 10
        $str_split = \explode($break, $str);
14003 10
        if ($str_split === false) {
14004
            return '';
14005
        }
14006
14007
        /** @var string[] $charsArray */
14008 10
        $charsArray = [];
14009 10
        $word_split = '';
14010 10
        foreach ($str_split as $i => $i_value) {
14011 10
            if ($i) {
14012 3
                $charsArray[] = $break;
14013 3
                $word_split .= '#';
14014
            }
14015
14016 10
            foreach (self::str_split($i_value) as $c) {
14017 10
                $charsArray[] = $c;
14018 10
                if ($c === ' ') {
14019 3
                    $word_split .= ' ';
14020
                } else {
14021 10
                    $word_split .= '?';
14022
                }
14023
            }
14024
        }
14025
14026 10
        $str_return = '';
14027 10
        $j = 0;
14028 10
        $b = -1;
14029 10
        $i = -1;
14030 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14031
14032 10
        $max = \mb_strlen($word_split);
14033 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14034 8
            for (++$i; $i < $b; ++$i) {
14035 8
                if (isset($charsArray[$j])) {
14036 8
                    $str_return .= $charsArray[$j];
14037 8
                    unset($charsArray[$j]);
14038
                }
14039 8
                ++$j;
14040
14041
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14042 8
                if ($i > $max) {
14043
                    break 2;
14044
                }
14045
            }
14046
14047
            if (
14048 8
                $break === $charsArray[$j]
14049
                ||
14050 8
                $charsArray[$j] === ' '
14051
            ) {
14052 5
                unset($charsArray[$j++]);
14053
            }
14054
14055 8
            $str_return .= $break;
14056
14057
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14058 8
            if ($b > $max) {
14059
                break;
14060
            }
14061
        }
14062
14063 10
        return $str_return . \implode('', $charsArray);
14064
    }
14065
14066
    /**
14067
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14068
     *    ... so that we wrap the per line.
14069
     *
14070
     * @param string      $str             <p>The input string.</p>
14071
     * @param int         $width           [optional] <p>The column width.</p>
14072
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14073
     * @param bool        $cut             [optional] <p>
14074
     *                                     If the cut is set to true, the string is
14075
     *                                     always wrapped at or before the specified width. So if you have
14076
     *                                     a word that is larger than the given width, it is broken apart.
14077
     *                                     </p>
14078
     * @param bool        $add_final_break [optional] <p>
14079
     *                                     If this flag is true, then the method will add a $break at the end
14080
     *                                     of the result string.
14081
     *                                     </p>
14082
     * @param string|null $delimiter       [optional] <p>
14083
     *                                     You can change the default behavior, where we split the string by newline.
14084
     *                                     </p>
14085
     *
14086
     * @psalm-pure
14087
     *
14088
     * @return string
14089
     */
14090
    public static function wordwrap_per_line(
14091
        string $str,
14092
        int $width = 75,
14093
        string $break = "\n",
14094
        bool $cut = false,
14095
        bool $add_final_break = true,
14096
        string $delimiter = null
14097
    ): string {
14098 1
        if ($delimiter === null) {
14099 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14100
        } else {
14101 1
            $strings = \explode($delimiter, $str);
14102
        }
14103
14104 1
        $string_helper_array = [];
14105 1
        if ($strings !== false) {
14106 1
            foreach ($strings as $value) {
14107 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14108
            }
14109
        }
14110
14111 1
        if ($add_final_break) {
14112 1
            $final_break = $break;
14113
        } else {
14114 1
            $final_break = '';
14115
        }
14116
14117 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14118
    }
14119
14120
    /**
14121
     * Returns an array of Unicode White Space characters.
14122
     *
14123
     * @psalm-pure
14124
     *
14125
     * @return string[]
14126
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14127
     */
14128
    public static function ws(): array
14129
    {
14130 2
        return self::$WHITESPACE;
14131
    }
14132
14133
    /**
14134
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14135
     *
14136
     * EXAMPLE: <code>
14137
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14138
     * //
14139
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14140
     * </code>
14141
     *
14142
     * @see          http://hsivonen.iki.fi/php-utf8/
14143
     *
14144
     * @param string $str    <p>The string to be checked.</p>
14145
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14146
     *
14147
     * @psalm-pure
14148
     *
14149
     * @return bool
14150
     *
14151
     * @noinspection ReturnTypeCanBeDeclaredInspection
14152
     */
14153
    private static function is_utf8_string(string $str, bool $strict = false)
14154
    {
14155 110
        if ($str === '') {
14156 15
            return true;
14157
        }
14158
14159 103
        if ($strict) {
14160 2
            $is_binary = self::is_binary($str, true);
14161
14162 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14163 2
                return false;
14164
            }
14165
14166
            if ($is_binary && self::is_utf32($str, false) !== false) {
14167
                return false;
14168
            }
14169
        }
14170
14171 103
        if (self::$SUPPORT['pcre_utf8']) {
14172
            // If even just the first character can be matched, when the /u
14173
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14174
            // invalid, nothing at all will match, even if the string contains
14175
            // some valid sequences
14176 103
            return \preg_match('/^./us', $str) === 1;
14177
        }
14178
14179 2
        $mState = 0; // cached expected number of octets after the current octet
14180
        // until the beginning of the next UTF8 character sequence
14181 2
        $mUcs4 = 0; // cached Unicode character
14182 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14183
14184 2
        if (self::$ORD === null) {
14185
            self::$ORD = self::getData('ord');
14186
        }
14187
14188 2
        $len = \strlen($str);
14189
        /** @noinspection ForeachInvariantsInspection */
14190 2
        for ($i = 0; $i < $len; ++$i) {
14191 2
            $in = self::$ORD[$str[$i]];
14192
14193 2
            if ($mState === 0) {
14194
                // When mState is zero we expect either a US-ASCII character or a
14195
                // multi-octet sequence.
14196 2
                if ((0x80 & $in) === 0) {
14197
                    // US-ASCII, pass straight through.
14198 2
                    $mBytes = 1;
14199 2
                } elseif ((0xE0 & $in) === 0xC0) {
14200
                    // First octet of 2 octet sequence.
14201 2
                    $mUcs4 = $in;
14202 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14203 2
                    $mState = 1;
14204 2
                    $mBytes = 2;
14205 2
                } elseif ((0xF0 & $in) === 0xE0) {
14206
                    // First octet of 3 octet sequence.
14207 2
                    $mUcs4 = $in;
14208 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14209 2
                    $mState = 2;
14210 2
                    $mBytes = 3;
14211
                } elseif ((0xF8 & $in) === 0xF0) {
14212
                    // First octet of 4 octet sequence.
14213
                    $mUcs4 = $in;
14214
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14215
                    $mState = 3;
14216
                    $mBytes = 4;
14217
                } elseif ((0xFC & $in) === 0xF8) {
14218
                    /* First octet of 5 octet sequence.
14219
                     *
14220
                     * This is illegal because the encoded codepoint must be either
14221
                     * (a) not the shortest form or
14222
                     * (b) outside the Unicode range of 0-0x10FFFF.
14223
                     * Rather than trying to resynchronize, we will carry on until the end
14224
                     * of the sequence and let the later error handling code catch it.
14225
                     */
14226
                    $mUcs4 = $in;
14227
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14228
                    $mState = 4;
14229
                    $mBytes = 5;
14230
                } elseif ((0xFE & $in) === 0xFC) {
14231
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14232
                    $mUcs4 = $in;
14233
                    $mUcs4 = ($mUcs4 & 1) << 30;
14234
                    $mState = 5;
14235
                    $mBytes = 6;
14236
                } else {
14237
                    // Current octet is neither in the US-ASCII range nor a legal first
14238
                    // octet of a multi-octet sequence.
14239 2
                    return false;
14240
                }
14241 2
            } elseif ((0xC0 & $in) === 0x80) {
14242
14243
                // When mState is non-zero, we expect a continuation of the multi-octet
14244
                // sequence
14245
14246
                // Legal continuation.
14247 2
                $shift = ($mState - 1) * 6;
14248 2
                $tmp = $in;
14249 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14250 2
                $mUcs4 |= $tmp;
14251
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14252
                // Unicode code point to be output.
14253 2
                if (--$mState === 0) {
14254
                    // Check for illegal sequences and code points.
14255
                    //
14256
                    // From Unicode 3.1, non-shortest form is illegal
14257
                    if (
14258 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14259
                        ||
14260 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14261
                        ||
14262 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14263
                        ||
14264 2
                        ($mBytes > 4)
14265
                        ||
14266
                        // From Unicode 3.2, surrogate characters are illegal.
14267 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14268
                        ||
14269
                        // Code points outside the Unicode range are illegal.
14270 2
                        ($mUcs4 > 0x10FFFF)
14271
                    ) {
14272
                        return false;
14273
                    }
14274
                    // initialize UTF8 cache
14275 2
                    $mState = 0;
14276 2
                    $mUcs4 = 0;
14277 2
                    $mBytes = 1;
14278
                }
14279
            } else {
14280
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14281
                // Incomplete multi-octet sequence.
14282
                return false;
14283
            }
14284
        }
14285
14286 2
        return $mState === 0;
14287
    }
14288
14289
    /**
14290
     * @param string $str
14291
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14292
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14293
     *
14294
     * @psalm-pure
14295
     *
14296
     * @return string
14297
     *
14298
     * @noinspection ReturnTypeCanBeDeclaredInspection
14299
     */
14300
    private static function fixStrCaseHelper(
14301
        string $str,
14302
        bool $use_lowercase = false,
14303
        bool $use_full_case_fold = false
14304
    ) {
14305 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14306 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14307
14308 33
        if ($use_lowercase) {
14309 2
            $str = \str_replace(
14310 2
                $upper,
14311 2
                $lower,
14312 2
                $str
14313
            );
14314
        } else {
14315 31
            $str = \str_replace(
14316 31
                $lower,
14317 31
                $upper,
14318 31
                $str
14319
            );
14320
        }
14321
14322 33
        if ($use_full_case_fold) {
14323
            /**
14324
             * @psalm-suppress ImpureStaticVariable
14325
             *
14326
             * @var array<mixed>|null
14327
             */
14328 31
            static $FULL_CASE_FOLD = null;
14329 31
            if ($FULL_CASE_FOLD === null) {
14330 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14331
            }
14332
14333 31
            if ($use_lowercase) {
14334 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14335
            } else {
14336 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14337
            }
14338
        }
14339
14340 33
        return $str;
14341
    }
14342
14343
    /**
14344
     * get data from "/data/*.php"
14345
     *
14346
     * @param string $file
14347
     *
14348
     * @psalm-pure
14349
     *
14350
     * @return array
14351
     *
14352
     * @noinspection ReturnTypeCanBeDeclaredInspection
14353
     */
14354
    private static function getData(string $file)
14355
    {
14356
        /** @noinspection PhpIncludeInspection */
14357
        /** @noinspection UsingInclusionReturnValueInspection */
14358
        /** @psalm-suppress UnresolvableInclude */
14359 6
        return include __DIR__ . '/data/' . $file . '.php';
14360
    }
14361
14362
    /**
14363
     * @psalm-pure
14364
     *
14365
     * @return true|null
14366
     */
14367
    private static function initEmojiData()
14368
    {
14369 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14370 1
            if (self::$EMOJI === null) {
14371 1
                self::$EMOJI = self::getData('emoji');
14372
            }
14373
14374
            /**
14375
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14376
             */
14377 1
            \uksort(
14378 1
                self::$EMOJI,
14379
                static function (string $a, string $b): int {
14380 1
                    return \strlen($b) <=> \strlen($a);
14381 1
                }
14382
            );
14383
14384 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14385 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14386
14387 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14388 1
                $tmp_key = \crc32($key);
14389 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14390
            }
14391
14392 1
            return true;
14393
        }
14394
14395 12
        return null;
14396
    }
14397
14398
    /**
14399
     * Checks whether mbstring "overloaded" is active on the server.
14400
     *
14401
     * @psalm-pure
14402
     *
14403
     * @return bool
14404
     *
14405
     * @noinspection ReturnTypeCanBeDeclaredInspection
14406
     */
14407
    private static function mbstring_overloaded()
14408
    {
14409
        /**
14410
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14411
         */
14412
14413
        /** @noinspection PhpComposerExtensionStubsInspection */
14414
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14415
        return \defined('MB_OVERLOAD_STRING')
14416
               &&
14417
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14418
    }
14419
14420
    /**
14421
     * @param array    $strings
14422
     * @param bool     $remove_empty_values
14423
     * @param int|null $remove_short_values
14424
     *
14425
     * @psalm-pure
14426
     *
14427
     * @return array
14428
     *
14429
     * @noinspection ReturnTypeCanBeDeclaredInspection
14430
     */
14431
    private static function reduce_string_array(
14432
        array $strings,
14433
        bool $remove_empty_values,
14434
        int $remove_short_values = null
14435
    ) {
14436
        // init
14437 2
        $return = [];
14438
14439 2
        foreach ($strings as &$str) {
14440
            if (
14441 2
                $remove_short_values !== null
14442
                &&
14443 2
                \mb_strlen($str) <= $remove_short_values
14444
            ) {
14445 2
                continue;
14446
            }
14447
14448
            if (
14449 2
                $remove_empty_values
14450
                &&
14451 2
                \trim($str) === ''
14452
            ) {
14453 2
                continue;
14454
            }
14455
14456 2
            $return[] = $str;
14457
        }
14458
14459 2
        return $return;
14460
    }
14461
14462
    /**
14463
     * rxClass
14464
     *
14465
     * @param string $s
14466
     * @param string $class
14467
     *
14468
     * @psalm-pure
14469
     *
14470
     * @return string
14471
     *
14472
     * @noinspection ReturnTypeCanBeDeclaredInspection
14473
     */
14474
    private static function rxClass(string $s, string $class = '')
14475
    {
14476
        /**
14477
         * @psalm-suppress ImpureStaticVariable
14478
         *
14479
         * @var array<string,string>
14480
         */
14481 33
        static $RX_CLASS_CACHE = [];
14482
14483 33
        $cache_key = $s . '_' . $class;
14484
14485 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14486 21
            return $RX_CLASS_CACHE[$cache_key];
14487
        }
14488
14489
        /** @var string[] $class_array */
14490 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14491
14492
        /** @noinspection SuspiciousLoopInspection */
14493
        /** @noinspection AlterInForeachInspection */
14494 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14495 15
            if ($s === '-') {
14496
                $class_array[0] = '-' . $class_array[0];
14497 15
            } elseif (!isset($s[2])) {
14498 15
                $class_array[0] .= \preg_quote($s, '/');
14499 1
            } elseif (self::strlen($s) === 1) {
14500 1
                $class_array[0] .= $s;
14501
            } else {
14502 15
                $class_array[] = $s;
14503
            }
14504
        }
14505
14506 16
        if ($class_array[0]) {
14507 16
            $class_array[0] = '[' . $class_array[0] . ']';
14508
        }
14509
14510 16
        if (\count($class_array) === 1) {
14511 16
            $return = $class_array[0];
14512
        } else {
14513
            $return = '(?:' . \implode('|', $class_array) . ')';
14514
        }
14515
14516 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14517
14518 16
        return $return;
14519
    }
14520
14521
    /**
14522
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14523
     *
14524
     * @param string $names
14525
     * @param string $delimiter
14526
     * @param string $encoding
14527
     *
14528
     * @psalm-pure
14529
     *
14530
     * @return string
14531
     *
14532
     * @noinspection ReturnTypeCanBeDeclaredInspection
14533
     */
14534
    private static function str_capitalize_name_helper(
14535
        string $names,
14536
        string $delimiter,
14537
        string $encoding = 'UTF-8'
14538
    ) {
14539
        // init
14540 1
        $name_helper_array = \explode($delimiter, $names);
14541 1
        if ($name_helper_array === false) {
14542
            return '';
14543
        }
14544
14545
        $special_cases = [
14546 1
            'names' => [
14547
                'ab',
14548
                'af',
14549
                'al',
14550
                'and',
14551
                'ap',
14552
                'bint',
14553
                'binte',
14554
                'da',
14555
                'de',
14556
                'del',
14557
                'den',
14558
                'der',
14559
                'di',
14560
                'dit',
14561
                'ibn',
14562
                'la',
14563
                'mac',
14564
                'nic',
14565
                'of',
14566
                'ter',
14567
                'the',
14568
                'und',
14569
                'van',
14570
                'von',
14571
                'y',
14572
                'zu',
14573
            ],
14574
            'prefixes' => [
14575
                'al-',
14576
                "d'",
14577
                'ff',
14578
                "l'",
14579
                'mac',
14580
                'mc',
14581
                'nic',
14582
            ],
14583
        ];
14584
14585 1
        foreach ($name_helper_array as &$name) {
14586 1
            if (\in_array($name, $special_cases['names'], true)) {
14587 1
                continue;
14588
            }
14589
14590 1
            $continue = false;
14591
14592 1
            if ($delimiter === '-') {
14593
                /** @noinspection AlterInForeachInspection */
14594 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14595 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14596 1
                        $continue = true;
14597
14598 1
                        break;
14599
                    }
14600
                }
14601
            }
14602
14603
            /** @noinspection AlterInForeachInspection */
14604 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14605 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14606 1
                    $continue = true;
14607
14608 1
                    break;
14609
                }
14610
            }
14611
14612 1
            if ($continue) {
14613 1
                continue;
14614
            }
14615
14616 1
            $name = self::ucfirst($name);
14617
        }
14618
14619 1
        return \implode($delimiter, $name_helper_array);
14620
    }
14621
14622
    /**
14623
     * Generic case-sensitive transformation for collation matching.
14624
     *
14625
     * @param string $str <p>The input string</p>
14626
     *
14627
     * @psalm-pure
14628
     *
14629
     * @return string|null
14630
     */
14631
    private static function strtonatfold(string $str)
14632
    {
14633
        /** @noinspection PhpUndefinedClassInspection */
14634 6
        return \preg_replace(
14635 6
            '/\p{Mn}+/u',
14636 6
            '',
14637 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14638
        );
14639
    }
14640
14641
    /**
14642
     * @param int|string $input
14643
     *
14644
     * @psalm-pure
14645
     *
14646
     * @return string
14647
     *
14648
     * @noinspection ReturnTypeCanBeDeclaredInspection
14649
     * @noinspection SuspiciousBinaryOperationInspection
14650
     */
14651
    private static function to_utf8_convert_helper($input)
14652
    {
14653
        // init
14654 32
        $buf = '';
14655
14656 32
        if (self::$ORD === null) {
14657 1
            self::$ORD = self::getData('ord');
14658
        }
14659
14660 32
        if (self::$CHR === null) {
14661 1
            self::$CHR = self::getData('chr');
14662
        }
14663
14664 32
        if (self::$WIN1252_TO_UTF8 === null) {
14665 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14666
        }
14667
14668 32
        $ordC1 = self::$ORD[$input];
14669 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14670 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14671
        } else {
14672
            /** @noinspection OffsetOperationsInspection */
14673 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14674 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14675 1
            $buf .= $cc1 . $cc2;
14676
        }
14677
14678 32
        return $buf;
14679
    }
14680
14681
    /**
14682
     * @param string $str
14683
     *
14684
     * @psalm-pure
14685
     *
14686
     * @return string
14687
     *
14688
     * @noinspection ReturnTypeCanBeDeclaredInspection
14689
     */
14690
    private static function urldecode_unicode_helper(string $str)
14691
    {
14692 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14693 10
        if (\preg_match($pattern, $str)) {
14694 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14695
        }
14696
14697 10
        return $str;
14698
    }
14699
}
14700