Passed
Push — master ( f14ed6...d44683 )
by Lars
04:14
created

UTF8   F

Complexity

Total Complexity 1775

Size/Duplication

Total Lines 14784
Duplicated Lines 0 %

Test Coverage

Coverage 80.38%

Importance

Changes 109
Bugs 53 Features 6
Metric Value
eloc 4518
dl 0
loc 14784
ccs 3054
cts 3799
cp 0.8038
rs 0.8
c 109
b 53
f 6
wmc 1775

310 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 4 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A get_unique_string() 0 22 3
A encode_mimeheader() 0 26 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A isBinary() 0 3 1
A emoji_decode() 0 18 2
D chr() 0 109 19
A html_escape() 0 6 1
B get_file_type() 0 65 7
A chr_to_int() 0 3 1
A isHtml() 0 3 1
C filter() 0 57 12
A isBase64() 0 3 1
A decode_mimeheader() 0 8 3
A html_decode() 0 6 1
A isUtf32() 0 3 1
A chunk_split() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A css_identifier() 0 56 6
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A decimal_to_chr() 0 3 1
A has_whitespace() 0 8 2
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A intlChar_loaded() 0 3 1
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
F extract_text() 0 175 34
A isBom() 0 3 1
A int_to_chr() 0 3 1
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A isAscii() 0 3 1
A filter_var() 0 15 2
A isUtf16() 0 3 1
F encode() 0 147 37
A is_alphanumeric() 0 8 2
A fix_simple_utf8() 0 32 4
A checkForSupport() 0 48 4
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A isJson() 0 3 1
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A is_punctuation() 0 3 1
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 59 13
B file_get_contents() 0 56 11
A emoji_from_country_code() 0 17 3
A chr_size_list() 0 17 3
B html_encode() 0 54 11
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A is_utf8() 0 13 4
A lcword() 0 13 1
C is_utf16() 0 71 16
A is_html() 0 14 2
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A is_blank() 0 8 2
A lowerCaseFirst() 0 13 1
B is_binary() 0 38 9
A lcfirst() 0 44 5
B is_url() 0 44 7
A is_binary_file() 0 16 4
A json_loaded() 0 3 1
A is_lowercase() 0 8 2
A lcwords() 0 34 6
A is_empty() 0 3 1
C is_utf32() 0 71 16
A json_decode() 0 14 2
A is_printable() 0 3 1
B is_json() 0 27 8
A json_encode() 0 10 2
A is_base64() 0 17 5
A max() 0 14 3
A parse_str() 0 18 4
A max_chr_width() 0 8 2
A ltrim() 0 27 5
A mbstring_loaded() 0 3 1
D normalize_encoding() 0 147 16
A normalize_whitespace() 0 11 1
A normalize_line_ending() 0 3 1
D range() 0 72 23
A normalize_msword() 0 3 1
A pcre_utf8_support() 0 4 1
A normalizeEncoding() 0 3 1
C ord() 0 77 16
A min() 0 14 3
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
B str_camelize() 0 74 10
A str_contains() 0 15 3
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
C utf8_decode() 0 61 13
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 72 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A toUTF8() 0 3 1
B str_obfuscate() 0 47 8
A string() 0 16 4
B rxClass() 0 45 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A str_isubstr_after_last_separator() 0 26 5
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
A rawurldecode() 0 35 4
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 138 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 136 31
A remove_html_breaks() 0 3 1
A showSupport() 0 17 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 15 3
A to_utf8() 0 14 3
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 13 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
B to_string() 0 33 8
A strtonatfold() 0 13 2
C strcspn() 0 49 12
A fixStrCaseHelper() 0 41 5
B str_split_pattern() 0 49 11
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 138 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 7 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @noinspection OffsetOperationsInspection */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089 19
                $clean_utf8,
1090 19
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        self::initEmojiData();
1300
1301 9
        if ($use_reversible_string_mappings) {
1302 9
            return (string) \str_replace(
1303 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1304 9
                (array) self::$EMOJI_VALUES_CACHE,
1305 9
                $str
1306
            );
1307
        }
1308
1309 1
        return (string) \str_replace(
1310 1
            (array) self::$EMOJI_KEYS_CACHE,
1311 1
            (array) self::$EMOJI_VALUES_CACHE,
1312 1
            $str
1313
        );
1314
    }
1315
1316
    /**
1317
     * Encode a string with emoji chars into a non-emoji string.
1318
     *
1319
     * INFO: opposite to UTF8::emoji_decode()
1320
     *
1321
     * EXAMPLE: <code>
1322
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1323
     * //
1324
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1325
     * </code>
1326
     *
1327
     * @param string $str                            <p>The input string</p>
1328
     * @param bool   $use_reversible_string_mappings [optional] <p>
1329
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1330
     *                                               between "emoji_encode" and "emoji_decode"</p>
1331
     *
1332
     * @psalm-pure
1333
     *
1334
     * @return string
1335
     */
1336 12
    public static function emoji_encode(
1337
        string $str,
1338
        bool $use_reversible_string_mappings = false
1339
    ): string {
1340 12
        self::initEmojiData();
1341
1342 12
        if ($use_reversible_string_mappings) {
1343 9
            return (string) \str_replace(
1344 9
                (array) self::$EMOJI_VALUES_CACHE,
1345 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1346 9
                $str
1347
            );
1348
        }
1349
1350 4
        return (string) \str_replace(
1351 4
            (array) self::$EMOJI_VALUES_CACHE,
1352 4
            (array) self::$EMOJI_KEYS_CACHE,
1353 4
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Encode a string with a new charset-encoding.
1359
     *
1360
     * INFO:  This function will also try to fix broken / double encoding,
1361
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1362
     *
1363
     * EXAMPLE: <code>
1364
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1365
     * //
1366
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1367
     * //
1368
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1369
     * //
1370
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1371
     * </code>
1372
     *
1373
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1374
     * @param string $str                           <p>The input string</p>
1375
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1376
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1377
     *                                              string-encoding</p>
1378
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1379
     *                                              A empty string will trigger the autodetect anyway.</p>
1380
     *
1381
     * @psalm-pure
1382
     *
1383
     * @return string
1384
     *
1385
     * @psalm-suppress InvalidReturnStatement
1386
     */
1387 29
    public static function encode(
1388
        string $to_encoding,
1389
        string $str,
1390
        bool $auto_detect_the_from_encoding = true,
1391
        string $from_encoding = ''
1392
    ): string {
1393 29
        if ($str === '' || $to_encoding === '') {
1394 13
            return $str;
1395
        }
1396
1397 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1398 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1399
        }
1400
1401 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1402 2
            $from_encoding = self::normalize_encoding($from_encoding);
1403
        }
1404
1405
        if (
1406 29
            $to_encoding
1407
            &&
1408 29
            $from_encoding
1409
            &&
1410 29
            $from_encoding === $to_encoding
1411
        ) {
1412
            return $str;
1413
        }
1414
1415 29
        if ($to_encoding === 'JSON') {
1416 1
            $return = self::json_encode($str);
1417 1
            if ($return === false) {
1418
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1419
            }
1420
1421 1
            return $return;
1422
        }
1423 29
        if ($from_encoding === 'JSON') {
1424 1
            $str = self::json_decode($str);
1425 1
            $from_encoding = '';
1426
        }
1427
1428 29
        if ($to_encoding === 'BASE64') {
1429 2
            return \base64_encode($str);
1430
        }
1431 29
        if ($from_encoding === 'BASE64') {
1432 2
            $str = \base64_decode($str, true);
1433 2
            $from_encoding = '';
1434
        }
1435
1436 29
        if ($to_encoding === 'HTML-ENTITIES') {
1437 2
            return self::html_encode($str, true);
1438
        }
1439 29
        if ($from_encoding === 'HTML-ENTITIES') {
1440 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1441 2
            $from_encoding = '';
1442
        }
1443
1444 29
        $from_encoding_auto_detected = false;
1445
        if (
1446 29
            $auto_detect_the_from_encoding
1447
            ||
1448 29
            !$from_encoding
1449
        ) {
1450 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1451
        }
1452
1453
        // DEBUG
1454
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1455
1456 29
        if ($from_encoding_auto_detected !== false) {
1457
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1458 25
            $from_encoding = $from_encoding_auto_detected;
1459 7
        } elseif ($auto_detect_the_from_encoding) {
1460
            // fallback for the "autodetect"-mode
1461 7
            return self::to_utf8($str);
1462
        }
1463
1464
        if (
1465 25
            !$from_encoding
1466
            ||
1467 25
            $from_encoding === $to_encoding
1468
        ) {
1469 15
            return $str;
1470
        }
1471
1472
        if (
1473 20
            $to_encoding === 'UTF-8'
1474
            &&
1475
            (
1476 18
                $from_encoding === 'WINDOWS-1252'
1477
                ||
1478 20
                $from_encoding === 'ISO-8859-1'
1479
            )
1480
        ) {
1481 14
            return self::to_utf8($str);
1482
        }
1483
1484
        if (
1485 12
            $to_encoding === 'ISO-8859-1'
1486
            &&
1487
            (
1488 6
                $from_encoding === 'WINDOWS-1252'
1489
                ||
1490 12
                $from_encoding === 'UTF-8'
1491
            )
1492
        ) {
1493 6
            return self::to_iso8859($str);
1494
        }
1495
1496
        /** @noinspection InArrayCanBeUsedInspection */
1497
        if (
1498 10
            $to_encoding !== 'UTF-8'
1499
            &&
1500 10
            $to_encoding !== 'ISO-8859-1'
1501
            &&
1502 10
            $to_encoding !== 'WINDOWS-1252'
1503
            &&
1504 10
            self::$SUPPORT['mbstring'] === false
1505
        ) {
1506
            /**
1507
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1508
             */
1509
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1510
        }
1511
1512 10
        if (self::$SUPPORT['mbstring'] === true) {
1513
            // warning: do not use the symfony polyfill here
1514 10
            $str_encoded = \mb_convert_encoding(
1515 10
                $str,
1516 10
                $to_encoding,
1517 10
                $from_encoding
1518
            );
1519
1520 10
            if ($str_encoded) {
1521 10
                \assert(\is_string($str_encoded));
1522
1523 10
                return $str_encoded;
1524
            }
1525
        }
1526
1527
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1528
        $return = @\iconv($from_encoding, $to_encoding, $str);
1529
        if ($return !== false) {
1530
            return $return;
1531
        }
1532
1533
        return $str;
1534
    }
1535
1536
    /**
1537
     * @param string $str
1538
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1539
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1540
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1541
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1542
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1543
     *
1544
     * @psalm-pure
1545
     *
1546
     * @return false|string
1547
     *                      <p>An encoded MIME field on success,
1548
     *                      or false if an error occurs during the encoding.</p>
1549
     */
1550 1
    public static function encode_mimeheader(
1551
        string $str,
1552
        string $from_charset = 'UTF-8',
1553
        string $to_charset = 'UTF-8',
1554
        string $transfer_encoding = 'Q',
1555
        string $linefeed = "\r\n",
1556
        int $indent = 76
1557
    ) {
1558 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1559
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1560
        }
1561
1562 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1563 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1564
        }
1565
1566
        // always fallback via symfony polyfill
1567 1
        return \iconv_mime_encode(
1568 1
            '',
1569 1
            $str,
1570
            [
1571 1
                'scheme'           => $transfer_encoding,
1572 1
                'line-length'      => $indent,
1573 1
                'input-charset'    => $from_charset,
1574 1
                'output-charset'   => $to_charset,
1575 1
                'line-break-chars' => $linefeed,
1576
            ]
1577
        );
1578
    }
1579
1580
    /**
1581
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1582
     *
1583
     * @param string   $str                       <p>The input string.</p>
1584
     * @param string   $search                    <p>The searched string.</p>
1585
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1586
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1587
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1588
     *
1589
     * @psalm-pure
1590
     *
1591
     * @return string
1592
     */
1593 1
    public static function extract_text(
1594
        string $str,
1595
        string $search = '',
1596
        int $length = null,
1597
        string $replacer_for_skipped_text = '…',
1598
        string $encoding = 'UTF-8'
1599
    ): string {
1600 1
        if ($str === '') {
1601 1
            return '';
1602
        }
1603
1604 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1605
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1606
        }
1607
1608 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1609
1610 1
        if ($length === null) {
1611 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1612
        }
1613
1614 1
        if ($search === '') {
1615 1
            if ($encoding === 'UTF-8') {
1616 1
                if ($length > 0) {
1617 1
                    $string_length = (int) \mb_strlen($str);
1618 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1619
                } else {
1620 1
                    $end = 0;
1621
                }
1622
1623 1
                $pos = (int) \min(
1624 1
                    \mb_strpos($str, ' ', $end),
1625 1
                    \mb_strpos($str, '.', $end)
1626
                );
1627
            } else {
1628
                if ($length > 0) {
1629
                    $string_length = (int) self::strlen($str, $encoding);
1630
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1631
                } else {
1632
                    $end = 0;
1633
                }
1634
1635
                $pos = (int) \min(
1636
                    self::strpos($str, ' ', $end, $encoding),
1637
                    self::strpos($str, '.', $end, $encoding)
1638
                );
1639
            }
1640
1641 1
            if ($pos) {
1642 1
                if ($encoding === 'UTF-8') {
1643 1
                    $str_sub = \mb_substr($str, 0, $pos);
1644
                } else {
1645
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1646
                }
1647
1648 1
                if ($str_sub === false) {
1649
                    return '';
1650
                }
1651
1652 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1653
            }
1654
1655
            return $str;
1656
        }
1657
1658 1
        if ($encoding === 'UTF-8') {
1659 1
            $word_position = (int) \mb_stripos($str, $search);
1660 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1661
        } else {
1662
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1663
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1664
        }
1665
1666 1
        $pos_start = 0;
1667 1
        if ($half_side > 0) {
1668 1
            if ($encoding === 'UTF-8') {
1669 1
                $half_text = \mb_substr($str, 0, $half_side);
1670
            } else {
1671
                $half_text = self::substr($str, 0, $half_side, $encoding);
1672
            }
1673 1
            if ($half_text !== false) {
1674 1
                if ($encoding === 'UTF-8') {
1675 1
                    $pos_start = (int) \max(
1676 1
                        \mb_strrpos($half_text, ' '),
1677 1
                        \mb_strrpos($half_text, '.')
1678
                    );
1679
                } else {
1680
                    $pos_start = (int) \max(
1681
                        self::strrpos($half_text, ' ', 0, $encoding),
1682
                        self::strrpos($half_text, '.', 0, $encoding)
1683
                    );
1684
                }
1685
            }
1686
        }
1687
1688 1
        if ($word_position && $half_side > 0) {
1689 1
            $offset = $pos_start + $length - 1;
1690 1
            $real_length = (int) self::strlen($str, $encoding);
1691
1692 1
            if ($offset > $real_length) {
1693
                $offset = $real_length;
1694
            }
1695
1696 1
            if ($encoding === 'UTF-8') {
1697 1
                $pos_end = (int) \min(
1698 1
                    \mb_strpos($str, ' ', $offset),
1699 1
                    \mb_strpos($str, '.', $offset)
1700 1
                ) - $pos_start;
1701
            } else {
1702
                $pos_end = (int) \min(
1703
                    self::strpos($str, ' ', $offset, $encoding),
1704
                    self::strpos($str, '.', $offset, $encoding)
1705
                ) - $pos_start;
1706
            }
1707
1708 1
            if (!$pos_end || $pos_end <= 0) {
1709 1
                if ($encoding === 'UTF-8') {
1710 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1711
                } else {
1712
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1713
                }
1714 1
                if ($str_sub !== false) {
1715 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1716
                } else {
1717 1
                    $extract = '';
1718
                }
1719
            } else {
1720 1
                if ($encoding === 'UTF-8') {
1721 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1722
                } else {
1723
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1724
                }
1725 1
                if ($str_sub !== false) {
1726 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1727
                } else {
1728 1
                    $extract = '';
1729
                }
1730
            }
1731
        } else {
1732 1
            $offset = $length - 1;
1733 1
            $true_length = (int) self::strlen($str, $encoding);
1734
1735 1
            if ($offset > $true_length) {
1736
                $offset = $true_length;
1737
            }
1738
1739 1
            if ($encoding === 'UTF-8') {
1740 1
                $pos_end = (int) \min(
1741 1
                    \mb_strpos($str, ' ', $offset),
1742 1
                    \mb_strpos($str, '.', $offset)
1743
                );
1744
            } else {
1745
                $pos_end = (int) \min(
1746
                    self::strpos($str, ' ', $offset, $encoding),
1747
                    self::strpos($str, '.', $offset, $encoding)
1748
                );
1749
            }
1750
1751 1
            if ($pos_end) {
1752 1
                if ($encoding === 'UTF-8') {
1753 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1754
                } else {
1755
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1756
                }
1757 1
                if ($str_sub !== false) {
1758 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1759
                } else {
1760 1
                    $extract = '';
1761
                }
1762
            } else {
1763 1
                $extract = $str;
1764
            }
1765
        }
1766
1767 1
        return $extract;
1768
    }
1769
1770
    /**
1771
     * Reads entire file into a string.
1772
     *
1773
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1774
     *
1775
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1776
     *
1777
     * @see http://php.net/manual/en/function.file-get-contents.php
1778
     *
1779
     * @param string        $filename         <p>
1780
     *                                        Name of the file to read.
1781
     *                                        </p>
1782
     * @param bool          $use_include_path [optional] <p>
1783
     *                                        Prior to PHP 5, this parameter is called
1784
     *                                        use_include_path and is a bool.
1785
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1786
     *                                        to trigger include path
1787
     *                                        search.
1788
     *                                        </p>
1789
     * @param resource|null $context          [optional] <p>
1790
     *                                        A valid context resource created with
1791
     *                                        stream_context_create. If you don't need to use a
1792
     *                                        custom context, you can skip this parameter by &null;.
1793
     *                                        </p>
1794
     * @param int|null      $offset           [optional] <p>
1795
     *                                        The offset where the reading starts.
1796
     *                                        </p>
1797
     * @param int|null      $max_length       [optional] <p>
1798
     *                                        Maximum length of data read. The default is to read until end
1799
     *                                        of file is reached.
1800
     *                                        </p>
1801
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1802
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1803
     *                                        some files, because they used non default utf-8 chars. Binary files
1804
     *                                        like images or pdf will not be converted.</p>
1805
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1806
     *                                        A empty string will trigger the autodetect anyway.</p>
1807
     *
1808
     * @psalm-pure
1809
     *
1810
     * @return false|string
1811
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1812
     *
1813
     * @noinspection PhpTooManyParametersInspection
1814
     */
1815 12
    public static function file_get_contents(
1816
        string $filename,
1817
        bool $use_include_path = false,
1818
        $context = null,
1819
        int $offset = null,
1820
        int $max_length = null,
1821
        int $timeout = 10,
1822
        bool $convert_to_utf8 = true,
1823
        string $from_encoding = ''
1824
    ) {
1825
        // init
1826 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1827
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1828 12
        if ($filename === false) {
1829
            return false;
1830
        }
1831
1832 12
        if ($timeout && $context === null) {
1833 9
            $context = \stream_context_create(
1834
                [
1835
                    'http' => [
1836 9
                        'timeout' => $timeout,
1837
                    ],
1838
                ]
1839
            );
1840
        }
1841
1842 12
        if ($offset === null) {
1843 12
            $offset = 0;
1844
        }
1845
1846 12
        if (\is_int($max_length)) {
1847 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1848
        } else {
1849 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1850
        }
1851
1852
        // return false on error
1853 12
        if ($data === false) {
1854
            return false;
1855
        }
1856
1857 12
        if ($convert_to_utf8) {
1858
            if (
1859 12
                !self::is_binary($data, true)
1860
                ||
1861 9
                self::is_utf16($data, false) !== false
1862
                ||
1863 12
                self::is_utf32($data, false) !== false
1864
            ) {
1865 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1866 9
                $data = self::cleanup($data);
1867
            }
1868
        }
1869
1870 12
        return $data;
1871
    }
1872
1873
    /**
1874
     * Checks if a file starts with BOM (Byte Order Mark) character.
1875
     *
1876
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1877
     *
1878
     * @param string $file_path <p>Path to a valid file.</p>
1879
     *
1880
     * @throws \RuntimeException if file_get_contents() returned false
1881
     *
1882
     * @return bool
1883
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1884
     *
1885
     * @psalm-pure
1886
     */
1887 2
    public static function file_has_bom(string $file_path): bool
1888
    {
1889 2
        $file_content = \file_get_contents($file_path);
1890 2
        if ($file_content === false) {
1891
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1892
        }
1893
1894 2
        return self::string_has_bom($file_content);
1895
    }
1896
1897
    /**
1898
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1899
     *
1900
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1901
     *
1902
     * @param array|object|string $var
1903
     * @param int                 $normalization_form
1904
     * @param string              $leading_combining
1905
     *
1906
     * @psalm-pure
1907
     *
1908
     * @return mixed
1909
     *
1910
     * @template TFilter
1911
     * @phpstan-param TFilter $var
1912
     * @phpstan-return TFilter
1913
     */
1914 65
    public static function filter(
1915
        $var,
1916
        int $normalization_form = \Normalizer::NFC,
1917
        string $leading_combining = '◌'
1918
    ) {
1919 65
        switch (\gettype($var)) {
1920 65
            case 'object':
1921 65
            case 'array':
1922 6
                foreach ($var as $k => &$v) {
1923 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1924
                }
1925 6
                unset($v);
1926
1927 6
                break;
1928 65
            case 'string':
1929
1930 63
                if (\strpos($var, "\r") !== false) {
1931 3
                    $var = self::normalize_line_ending($var);
1932
                }
1933
1934 63
                if (!ASCII::is_ascii($var)) {
1935 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1936 27
                        $n = '-';
1937
                    } else {
1938 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1939
1940 13
                        if (isset($n[0])) {
1941 7
                            $var = $n;
1942
                        } else {
1943 9
                            $var = self::encode('UTF-8', $var);
1944
                        }
1945
                    }
1946
1947 33
                    \assert(\is_string($var));
1948
                    if (
1949 33
                        $var[0] >= "\x80"
1950
                        &&
1951 33
                        isset($n[0], $leading_combining[0])
1952
                        &&
1953 33
                        \preg_match('/^\\p{Mn}/u', $var)
1954
                    ) {
1955
                        // Prevent leading combining chars
1956
                        // for NFC-safe concatenations.
1957 3
                        $var = $leading_combining . $var;
1958
                    }
1959
                }
1960
1961 63
                break;
1962
            default:
1963
                // nothing
1964
        }
1965
1966
        /** @noinspection PhpSillyAssignmentInspection */
1967
        /** @phpstan-var TFilter $var */
1968 65
        $var = $var;
1969
1970 65
        return $var;
1971
    }
1972
1973
    /**
1974
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1975
     *
1976
     * Gets a specific external variable by name and optionally filters it.
1977
     *
1978
     * EXAMPLE: <code>
1979
     * // _GET['foo'] = 'bar';
1980
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1981
     * </code>
1982
     *
1983
     * @see http://php.net/manual/en/function.filter-input.php
1984
     *
1985
     * @param int            $type          <p>
1986
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1987
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1988
     *                                      <b>INPUT_ENV</b>.
1989
     *                                      </p>
1990
     * @param string         $variable_name <p>
1991
     *                                      Name of a variable to get.
1992
     *                                      </p>
1993
     * @param int            $filter        [optional] <p>
1994
     *                                      The ID of the filter to apply. The
1995
     *                                      manual page lists the available filters.
1996
     *                                      </p>
1997
     * @param int|int[]|null $options       [optional] <p>
1998
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1999
     *                                      accepts options, flags can be provided in "flags" field of array.
2000
     *                                      </p>
2001
     *
2002
     * @psalm-pure
2003
     *
2004
     * @return mixed
2005
     *               <p>
2006
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2007
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2008
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2009
     *               </p>
2010
     */
2011 1
    public static function filter_input(
2012
        int $type,
2013
        string $variable_name,
2014
        int $filter = \FILTER_DEFAULT,
2015
        $options = null
2016
    ) {
2017
        /**
2018
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2019
         */
2020 1
        if ($options === null || \func_num_args() < 4) {
2021 1
            $var = \filter_input($type, $variable_name, $filter);
2022
        } else {
2023
            $var = \filter_input($type, $variable_name, $filter, $options);
2024
        }
2025
2026 1
        return self::filter($var);
2027
    }
2028
2029
    /**
2030
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2031
     *
2032
     * Gets external variables and optionally filters them.
2033
     *
2034
     * EXAMPLE: <code>
2035
     * // _GET['foo'] = 'bar';
2036
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2037
     * </code>
2038
     *
2039
     * @see http://php.net/manual/en/function.filter-input-array.php
2040
     *
2041
     * @param int        $type       <p>
2042
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2043
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2044
     *                               <b>INPUT_ENV</b>.
2045
     *                               </p>
2046
     * @param array|null $definition [optional] <p>
2047
     *                               An array defining the arguments. A valid key is a string
2048
     *                               containing a variable name and a valid value is either a filter type, or an array
2049
     *                               optionally specifying the filter, flags and options. If the value is an
2050
     *                               array, valid keys are filter which specifies the
2051
     *                               filter type,
2052
     *                               flags which specifies any flags that apply to the
2053
     *                               filter, and options which specifies any options that
2054
     *                               apply to the filter. See the example below for a better understanding.
2055
     *                               </p>
2056
     *                               <p>
2057
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2058
     *                               input array are filtered by this filter.
2059
     *                               </p>
2060
     * @param bool       $add_empty  [optional] <p>
2061
     *                               Add missing keys as <b>NULL</b> to the return value.
2062
     *                               </p>
2063
     *
2064
     * @psalm-pure
2065
     *
2066
     * @return mixed
2067
     *               <p>
2068
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2069
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2070
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2071
     *               is not set and <b>NULL</b> if the filter fails.
2072
     *               </p>
2073
     */
2074 1
    public static function filter_input_array(
2075
        int $type,
2076
        $definition = null,
2077
        bool $add_empty = true
2078
    ) {
2079
        /**
2080
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2081
         */
2082 1
        if ($definition === null || \func_num_args() < 2) {
2083
            $a = \filter_input_array($type);
2084
        } else {
2085 1
            $a = \filter_input_array($type, $definition, $add_empty);
2086
        }
2087
2088 1
        return self::filter($a);
2089
    }
2090
2091
    /**
2092
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2093
     *
2094
     * Filters a variable with a specified filter.
2095
     *
2096
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2097
     *
2098
     * @see http://php.net/manual/en/function.filter-var.php
2099
     *
2100
     * @param float|int|string|null $variable <p>
2101
     *                                        Value to filter.
2102
     *                                        </p>
2103
     * @param int                   $filter   [optional] <p>
2104
     *                                        The ID of the filter to apply. The
2105
     *                                        manual page lists the available filters.
2106
     *                                        </p>
2107
     * @param int|int[]|null        $options  [optional] <p>
2108
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2109
     *                                        accepts options, flags can be provided in "flags" field of array. For
2110
     *                                        the "callback" filter, callable type should be passed. The
2111
     *                                        callback must accept one argument, the value to be filtered, and return
2112
     *                                        the value after filtering/sanitizing it.
2113
     *                                        </p>
2114
     *                                        <p>
2115
     *                                        <code>
2116
     *                                        // for filters that accept options, use this format
2117
     *                                        $options = array(
2118
     *                                        'options' => array(
2119
     *                                        'default' => 3, // value to return if the filter fails
2120
     *                                        // other options here
2121
     *                                        'min_range' => 0
2122
     *                                        ),
2123
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2124
     *                                        );
2125
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2126
     *                                        // for filter that only accept flags, you can pass them directly
2127
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2128
     *                                        // for filter that only accept flags, you can also pass as an array
2129
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2130
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2131
     *                                        // callback validate filter
2132
     *                                        function foo($value)
2133
     *                                        {
2134
     *                                        // Expected format: Surname, GivenNames
2135
     *                                        if (strpos($value, ", ") === false) return false;
2136
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2137
     *                                        $empty = (empty($surname) || empty($givennames));
2138
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2139
     *                                        if ($empty || $notstrings) {
2140
     *                                        return false;
2141
     *                                        } else {
2142
     *                                        return $value;
2143
     *                                        }
2144
     *                                        }
2145
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2146
     *                                        </code>
2147
     *                                        </p>
2148
     *
2149
     * @psalm-pure
2150
     *
2151
     * @return mixed
2152
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2153
     */
2154 2
    public static function filter_var(
2155
        $variable,
2156
        int $filter = \FILTER_DEFAULT,
2157
        $options = null
2158
    ) {
2159
        /**
2160
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2161
         */
2162 2
        if (\func_num_args() < 3) {
2163 2
            $variable = \filter_var($variable, $filter);
2164
        } else {
2165 2
            $variable = \filter_var($variable, $filter, $options);
2166
        }
2167
2168 2
        return self::filter($variable);
2169
    }
2170
2171
    /**
2172
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2173
     *
2174
     * Gets multiple variables and optionally filters them.
2175
     *
2176
     * EXAMPLE: <code>
2177
     * $filters = [
2178
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2179
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2180
     *     'email' => FILTER_VALIDATE_EMAIL,
2181
     * ];
2182
     *
2183
     * $data = [
2184
     *     'name' => 'κόσμε',
2185
     *     'age' => '18',
2186
     *     'email' => '[email protected]'
2187
     * ];
2188
     *
2189
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2190
     * </code>
2191
     *
2192
     * @see http://php.net/manual/en/function.filter-var-array.php
2193
     *
2194
     * @param array<mixed>   $data       <p>
2195
     *                                   An array with string keys containing the data to filter.
2196
     *                                   </p>
2197
     * @param array|int|null $definition [optional] <p>
2198
     *                                   An array defining the arguments. A valid key is a string
2199
     *                                   containing a variable name and a valid value is either a
2200
     *                                   filter type, or an
2201
     *                                   array optionally specifying the filter, flags and options.
2202
     *                                   If the value is an array, valid keys are filter
2203
     *                                   which specifies the filter type,
2204
     *                                   flags which specifies any flags that apply to the
2205
     *                                   filter, and options which specifies any options that
2206
     *                                   apply to the filter. See the example below for a better understanding.
2207
     *                                   </p>
2208
     *                                   <p>
2209
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2210
     *                                   in the input array are filtered by this filter.
2211
     *                                   </p>
2212
     * @param bool           $add_empty  [optional] <p>
2213
     *                                   Add missing keys as <b>NULL</b> to the return value.
2214
     *                                   </p>
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return mixed
2219
     *               <p>
2220
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2221
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2222
     *               set.
2223
     *               </p>
2224
     */
2225 2
    public static function filter_var_array(
2226
        array $data,
2227
        $definition = null,
2228
        bool $add_empty = true
2229
    ) {
2230
        /**
2231
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2232
         */
2233 2
        if (\func_num_args() < 2) {
2234 2
            $a = \filter_var_array($data);
2235
        } else {
2236 2
            $a = \filter_var_array($data, $definition, $add_empty);
2237
        }
2238
2239 2
        return self::filter($a);
2240
    }
2241
2242
    /**
2243
     * Checks whether finfo is available on the server.
2244
     *
2245
     * @psalm-pure
2246
     *
2247
     * @return bool
2248
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2249
     *
2250
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2251
     */
2252
    public static function finfo_loaded(): bool
2253
    {
2254
        return \class_exists('finfo');
2255
    }
2256
2257
    /**
2258
     * Returns the first $n characters of the string.
2259
     *
2260
     * @param string $str      <p>The input string.</p>
2261
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2262
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2263
     *
2264
     * @psalm-pure
2265
     *
2266
     * @return string
2267
     */
2268 13
    public static function first_char(
2269
        string $str,
2270
        int $n = 1,
2271
        string $encoding = 'UTF-8'
2272
    ): string {
2273 13
        if ($str === '' || $n <= 0) {
2274 5
            return '';
2275
        }
2276
2277 8
        if ($encoding === 'UTF-8') {
2278 4
            return (string) \mb_substr($str, 0, $n);
2279
        }
2280
2281 4
        return (string) self::substr($str, 0, $n, $encoding);
2282
    }
2283
2284
    /**
2285
     * Check if the number of Unicode characters isn't greater than the specified integer.
2286
     *
2287
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2288
     *
2289
     * @param string $str      the original string to be checked
2290
     * @param int    $box_size the size in number of chars to be checked against string
2291
     *
2292
     * @psalm-pure
2293
     *
2294
     * @return bool
2295
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2296
     */
2297 2
    public static function fits_inside(string $str, int $box_size): bool
2298
    {
2299 2
        return (int) self::strlen($str) <= $box_size;
2300
    }
2301
2302
    /**
2303
     * Try to fix simple broken UTF-8 strings.
2304
     *
2305
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2306
     *
2307
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2308
     *
2309
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2310
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2311
     * See: http://en.wikipedia.org/wiki/Windows-1252
2312
     *
2313
     * @param string $str <p>The input string</p>
2314
     *
2315
     * @psalm-pure
2316
     *
2317
     * @return string
2318
     */
2319 47
    public static function fix_simple_utf8(string $str): string
2320
    {
2321 47
        if ($str === '') {
2322 4
            return '';
2323
        }
2324
2325
        /**
2326
         * @psalm-suppress ImpureStaticVariable
2327
         *
2328
         * @var array<mixed>|null
2329
         */
2330 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2331
2332
        /**
2333
         * @psalm-suppress ImpureStaticVariable
2334
         *
2335
         * @var array<mixed>|null
2336
         */
2337 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2338
2339 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2340 1
            if (self::$BROKEN_UTF8_FIX === null) {
2341 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2342
            }
2343
2344 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2345 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2346
        }
2347
2348 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2349
2350 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2351
    }
2352
2353
    /**
2354
     * Fix a double (or multiple) encoded UTF8 string.
2355
     *
2356
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2357
     *
2358
     * @param string|string[] $str you can use a string or an array of strings
2359
     *
2360
     * @psalm-pure
2361
     *
2362
     * @return string|string[]
2363
     *                         Will return the fixed input-"array" or
2364
     *                         the fixed input-"string"
2365
     *
2366
     * @psalm-suppress InvalidReturnType
2367
     */
2368 2
    public static function fix_utf8($str)
2369
    {
2370 2
        if (\is_array($str)) {
2371 2
            foreach ($str as $k => &$v) {
2372 2
                $v = self::fix_utf8($v);
2373
            }
2374 2
            unset($v);
2375
2376
            /**
2377
             * @psalm-suppress InvalidReturnStatement
2378
             */
2379 2
            return $str;
2380
        }
2381
2382 2
        $str = (string) $str;
2383 2
        $last = '';
2384 2
        while ($last !== $str) {
2385 2
            $last = $str;
2386
            /**
2387
             * @psalm-suppress PossiblyInvalidArgument
2388
             */
2389 2
            $str = self::to_utf8(
2390 2
                self::utf8_decode($str, true)
2391
            );
2392
        }
2393
2394
        /**
2395
         * @psalm-suppress InvalidReturnStatement
2396
         */
2397 2
        return $str;
2398
    }
2399
2400
    /**
2401
     * Get character of a specific character.
2402
     *
2403
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2404
     *
2405
     * @param string $char
2406
     *
2407
     * @psalm-pure
2408
     *
2409
     * @return string
2410
     *                <p>'RTL' or 'LTR'.</p>
2411
     */
2412 2
    public static function getCharDirection(string $char): string
2413
    {
2414 2
        if (self::$SUPPORT['intlChar'] === true) {
2415
            /** @noinspection PhpComposerExtensionStubsInspection */
2416 2
            $tmp_return = \IntlChar::charDirection($char);
2417
2418
            // from "IntlChar"-Class
2419
            $char_direction = [
2420 2
                'RTL' => [1, 13, 14, 15, 21],
2421
                'LTR' => [0, 11, 12, 20],
2422
            ];
2423
2424 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2425
                return 'LTR';
2426
            }
2427
2428 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2429 2
                return 'RTL';
2430
            }
2431
        }
2432
2433 2
        $c = static::chr_to_decimal($char);
2434
2435 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2436 2
            return 'LTR';
2437
        }
2438
2439 2
        if ($c <= 0x85e) {
2440 2
            if ($c === 0x5be ||
2441 2
                $c === 0x5c0 ||
2442 2
                $c === 0x5c3 ||
2443 2
                $c === 0x5c6 ||
2444 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2445 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2446 2
                $c === 0x608 ||
2447 2
                $c === 0x60b ||
2448 2
                $c === 0x60d ||
2449 2
                $c === 0x61b ||
2450 2
                ($c >= 0x61e && $c <= 0x64a) ||
2451
                ($c >= 0x66d && $c <= 0x66f) ||
2452
                ($c >= 0x671 && $c <= 0x6d5) ||
2453
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2454
                ($c >= 0x6ee && $c <= 0x6ef) ||
2455
                ($c >= 0x6fa && $c <= 0x70d) ||
2456
                $c === 0x710 ||
2457
                ($c >= 0x712 && $c <= 0x72f) ||
2458
                ($c >= 0x74d && $c <= 0x7a5) ||
2459
                $c === 0x7b1 ||
2460
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2461
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2462
                $c === 0x7fa ||
2463
                ($c >= 0x800 && $c <= 0x815) ||
2464
                $c === 0x81a ||
2465
                $c === 0x824 ||
2466
                $c === 0x828 ||
2467
                ($c >= 0x830 && $c <= 0x83e) ||
2468
                ($c >= 0x840 && $c <= 0x858) ||
2469 2
                $c === 0x85e
2470
            ) {
2471 2
                return 'RTL';
2472
            }
2473 2
        } elseif ($c === 0x200f) {
2474
            return 'RTL';
2475 2
        } elseif ($c >= 0xfb1d) {
2476 2
            if ($c === 0xfb1d ||
2477 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2478 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2479 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2480 2
                $c === 0xfb3e ||
2481 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2482 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2483 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2484 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2485 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2486 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2487 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2488 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2489 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2490 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2491 2
                $c === 0x10808 ||
2492 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2493 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2494 2
                $c === 0x1083c ||
2495 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2496 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2497 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2498 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2499 2
                $c === 0x1093f ||
2500 2
                $c === 0x10a00 ||
2501 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2502 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2503 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2504 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2505 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2506 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2507 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2508 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2509 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2510 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2511
            ) {
2512 2
                return 'RTL';
2513
            }
2514
        }
2515
2516 2
        return 'LTR';
2517
    }
2518
2519
    /**
2520
     * Check for php-support.
2521
     *
2522
     * @param string|null $key
2523
     *
2524
     * @psalm-pure
2525
     *
2526
     * @return mixed
2527
     *               Return the full support-"array", if $key === null<br>
2528
     *               return bool-value, if $key is used and available<br>
2529
     *               otherwise return <strong>null</strong>
2530
     */
2531 27
    public static function getSupportInfo(string $key = null)
2532
    {
2533 27
        if ($key === null) {
2534 4
            return self::$SUPPORT;
2535
        }
2536
2537 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2538 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2539
        }
2540
        // compatibility fix for old versions
2541 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2542
2543 25
        return self::$SUPPORT[$key] ?? null;
2544
    }
2545
2546
    /**
2547
     * Warning: this method only works for some file-types (png, jpg)
2548
     *          if you need more supported types, please use e.g. "finfo"
2549
     *
2550
     * @param string $str
2551
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2552
     *
2553
     * @psalm-pure
2554
     *
2555
     * @return null[]|string[]
2556
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2557
     *
2558
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2559
     */
2560 40
    public static function get_file_type(
2561
        string $str,
2562
        array $fallback = [
2563
            'ext'  => null,
2564
            'mime' => 'application/octet-stream',
2565
            'type' => null,
2566
        ]
2567
    ): array {
2568 40
        if ($str === '') {
2569
            return $fallback;
2570
        }
2571
2572
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2573 40
        $str_info = \substr($str, 0, 2);
2574 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2575 11
            return $fallback;
2576
        }
2577
2578
        // DEBUG
2579
        //var_dump($str_info);
2580
2581 36
        $str_info = \unpack('C2chars', $str_info);
2582
2583
        /** @noinspection PhpSillyAssignmentInspection */
2584
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2585 36
        $str_info = $str_info;
2586
2587 36
        if ($str_info === false) {
2588
            return $fallback;
2589
        }
2590
        /** @noinspection OffsetOperationsInspection */
2591 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2592
2593
        // DEBUG
2594
        //var_dump($type_code);
2595
2596
        //
2597
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2598
        //
2599
        switch ($type_code) {
2600
            // WARNING: do not add too simple comparisons, because of false-positive results:
2601
            //
2602
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2603
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2604
            //
2605 36
            case 255216:
2606
                $ext = 'jpg';
2607
                $mime = 'image/jpeg';
2608
                $type = 'binary';
2609
2610
                break;
2611 36
            case 13780:
2612 7
                $ext = 'png';
2613 7
                $mime = 'image/png';
2614 7
                $type = 'binary';
2615
2616 7
                break;
2617
            default:
2618 35
                return $fallback;
2619
        }
2620
2621
        return [
2622 7
            'ext'  => $ext,
2623 7
            'mime' => $mime,
2624 7
            'type' => $type,
2625
        ];
2626
    }
2627
2628
    /**
2629
     * @param int    $length         <p>Length of the random string.</p>
2630
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2631
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2632
     *
2633
     * @return string
2634
     */
2635 1
    public static function get_random_string(
2636
        int $length,
2637
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2638
        string $encoding = 'UTF-8'
2639
    ): string {
2640
        // init
2641 1
        $i = 0;
2642 1
        $str = '';
2643
2644
        //
2645
        // add random chars
2646
        //
2647
2648 1
        if ($encoding === 'UTF-8') {
2649 1
            $max_length = (int) \mb_strlen($possible_chars);
2650 1
            if ($max_length === 0) {
2651 1
                return '';
2652
            }
2653
2654 1
            while ($i < $length) {
2655
                try {
2656 1
                    $rand_int = \random_int(0, $max_length - 1);
2657
                } catch (\Exception $e) {
2658
                    /** @noinspection RandomApiMigrationInspection */
2659
                    $rand_int = \mt_rand(0, $max_length - 1);
2660
                }
2661 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2662 1
                if ($char !== false) {
2663 1
                    $str .= $char;
2664 1
                    ++$i;
2665
                }
2666
            }
2667
        } else {
2668
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2669
2670
            $max_length = (int) self::strlen($possible_chars, $encoding);
2671
            if ($max_length === 0) {
2672
                return '';
2673
            }
2674
2675
            while ($i < $length) {
2676
                try {
2677
                    $rand_int = \random_int(0, $max_length - 1);
2678
                } catch (\Exception $e) {
2679
                    /** @noinspection RandomApiMigrationInspection */
2680
                    $rand_int = \mt_rand(0, $max_length - 1);
2681
                }
2682
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2683
                if ($char !== false) {
2684
                    $str .= $char;
2685
                    ++$i;
2686
                }
2687
            }
2688
        }
2689
2690 1
        return $str;
2691
    }
2692
2693
    /**
2694
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2695
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2696
     *
2697
     * @return string
2698
     */
2699 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2700
    {
2701
        try {
2702 1
            $rand_int = \random_int(0, \mt_getrandmax());
2703
        } catch (\Exception $e) {
2704
            /** @noinspection RandomApiMigrationInspection */
2705
            $rand_int = \mt_rand(0, \mt_getrandmax());
2706
        }
2707
2708
        $unique_helper = $rand_int .
2709 1
                         \session_id() .
2710 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2711 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2712 1
                         $extra_entropy;
2713
2714 1
        $unique_string = \uniqid($unique_helper, true);
2715
2716 1
        if ($use_md5) {
2717 1
            $unique_string = \md5($unique_string . $unique_helper);
2718
        }
2719
2720 1
        return $unique_string;
2721
    }
2722
2723
    /**
2724
     * alias for "UTF8::string_has_bom()"
2725
     *
2726
     * @param string $str
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *
2732
     * @see        UTF8::string_has_bom()
2733
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2734
     */
2735 2
    public static function hasBom(string $str): bool
2736
    {
2737 2
        return self::string_has_bom($str);
2738
    }
2739
2740
    /**
2741
     * Returns true if the string contains a lower case char, false otherwise.
2742
     *
2743
     * @param string $str <p>The input string.</p>
2744
     *
2745
     * @psalm-pure
2746
     *
2747
     * @return bool
2748
     *              <p>Whether or not the string contains a lower case character.</p>
2749
     */
2750 47
    public static function has_lowercase(string $str): bool
2751
    {
2752 47
        if (self::$SUPPORT['mbstring'] === true) {
2753
            /** @noinspection PhpComposerExtensionStubsInspection */
2754 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2755
        }
2756
2757
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2758
    }
2759
2760
    /**
2761
     * Returns true if the string contains whitespace, false otherwise.
2762
     *
2763
     * @param string $str <p>The input string.</p>
2764
     *
2765
     * @psalm-pure
2766
     *
2767
     * @return bool
2768
     *              <p>Whether or not the string contains whitespace.</p>
2769
     */
2770 11
    public static function has_whitespace(string $str): bool
2771
    {
2772 11
        if (self::$SUPPORT['mbstring'] === true) {
2773
            /** @noinspection PhpComposerExtensionStubsInspection */
2774 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2775
        }
2776
2777
        return self::str_matches_pattern($str, '.*[[:space:]]');
2778
    }
2779
2780
    /**
2781
     * Returns true if the string contains an upper case char, false otherwise.
2782
     *
2783
     * @param string $str <p>The input string.</p>
2784
     *
2785
     * @psalm-pure
2786
     *
2787
     * @return bool
2788
     *              <p>Whether or not the string contains an upper case character.</p>
2789
     */
2790 12
    public static function has_uppercase(string $str): bool
2791
    {
2792 12
        if (self::$SUPPORT['mbstring'] === true) {
2793
            /** @noinspection PhpComposerExtensionStubsInspection */
2794 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2795
        }
2796
2797
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2798
    }
2799
2800
    /**
2801
     * Converts a hexadecimal value into a UTF-8 character.
2802
     *
2803
     * INFO: opposite to UTF8::chr_to_hex()
2804
     *
2805
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2806
     *
2807
     * @param string $hexdec <p>The hexadecimal value.</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return false|string one single UTF-8 character
2812
     */
2813 4
    public static function hex_to_chr(string $hexdec)
2814
    {
2815
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2816 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2817
    }
2818
2819
    /**
2820
     * Converts hexadecimal U+xxxx code point representation to integer.
2821
     *
2822
     * INFO: opposite to UTF8::int_to_hex()
2823
     *
2824
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2825
     *
2826
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2827
     *
2828
     * @psalm-pure
2829
     *
2830
     * @return false|int
2831
     *                   <p>The code point, or false on failure.</p>
2832
     */
2833 2
    public static function hex_to_int($hexdec)
2834
    {
2835
        // init
2836 2
        $hexdec = (string) $hexdec;
2837
2838 2
        if ($hexdec === '') {
2839 2
            return false;
2840
        }
2841
2842 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2843 2
            return \intval($match[1], 16);
2844
        }
2845
2846 2
        return false;
2847
    }
2848
2849
    /**
2850
     * alias for "UTF8::html_entity_decode()"
2851
     *
2852
     * @param string   $str
2853
     * @param int|null $flags
2854
     * @param string   $encoding
2855
     *
2856
     * @psalm-pure
2857
     *
2858
     * @return string
2859
     *
2860
     * @see        UTF8::html_entity_decode()
2861
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2862
     */
2863 2
    public static function html_decode(
2864
        string $str,
2865
        int $flags = null,
2866
        string $encoding = 'UTF-8'
2867
    ): string {
2868 2
        return self::html_entity_decode($str, $flags, $encoding);
2869
    }
2870
2871
    /**
2872
     * Converts a UTF-8 string to a series of HTML numbered entities.
2873
     *
2874
     * INFO: opposite to UTF8::html_decode()
2875
     *
2876
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2877
     *
2878
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2879
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2880
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2881
     *
2882
     * @psalm-pure
2883
     *
2884
     * @return string HTML numbered entities
2885
     */
2886 14
    public static function html_encode(
2887
        string $str,
2888
        bool $keep_ascii_chars = false,
2889
        string $encoding = 'UTF-8'
2890
    ): string {
2891 14
        if ($str === '') {
2892 4
            return '';
2893
        }
2894
2895 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2896 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2897
        }
2898
2899
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2900 14
        if (self::$SUPPORT['mbstring'] === true) {
2901 14
            if ($keep_ascii_chars) {
2902 13
                $start_code = 0x80;
2903
            } else {
2904 3
                $start_code = 0x00;
2905
            }
2906
2907 14
            if ($encoding === 'UTF-8') {
2908
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2909 14
                $return = \mb_encode_numericentity(
2910 14
                    $str,
2911 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2912
                );
2913 14
                if ($return !== null && $return !== false) {
2914 14
                    return $return;
2915
                }
2916
            }
2917
2918
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2919 4
            $return = \mb_encode_numericentity(
2920 4
                $str,
2921 4
                [$start_code, 0xfffff, 0, 0xfffff],
2922 4
                $encoding
2923
            );
2924 4
            if ($return !== null && $return !== false) {
2925 4
                return $return;
2926
            }
2927
        }
2928
2929
        //
2930
        // fallback via vanilla php
2931
        //
2932
2933
        return \implode(
2934
            '',
2935
            \array_map(
2936
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2937
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2938
                },
2939
                self::str_split($str)
2940
            )
2941
        );
2942
    }
2943
2944
    /**
2945
     * UTF-8 version of html_entity_decode()
2946
     *
2947
     * The reason we are not using html_entity_decode() by itself is because
2948
     * while it is not technically correct to leave out the semicolon
2949
     * at the end of an entity most browsers will still interpret the entity
2950
     * correctly. html_entity_decode() does not convert entities without
2951
     * semicolons, so we are left with our own little solution here. Bummer.
2952
     *
2953
     * Convert all HTML entities to their applicable characters.
2954
     *
2955
     * INFO: opposite to UTF8::html_encode()
2956
     *
2957
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2958
     *
2959
     * @see http://php.net/manual/en/function.html-entity-decode.php
2960
     *
2961
     * @param string   $str      <p>
2962
     *                           The input string.
2963
     *                           </p>
2964
     * @param int|null $flags    [optional] <p>
2965
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2966
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2967
     *                           <table>
2968
     *                           Available <i>flags</i> constants
2969
     *                           <tr valign="top">
2970
     *                           <td>Constant Name</td>
2971
     *                           <td>Description</td>
2972
     *                           </tr>
2973
     *                           <tr valign="top">
2974
     *                           <td><b>ENT_COMPAT</b></td>
2975
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2976
     *                           </tr>
2977
     *                           <tr valign="top">
2978
     *                           <td><b>ENT_QUOTES</b></td>
2979
     *                           <td>Will convert both double and single quotes.</td>
2980
     *                           </tr>
2981
     *                           <tr valign="top">
2982
     *                           <td><b>ENT_NOQUOTES</b></td>
2983
     *                           <td>Will leave both double and single quotes unconverted.</td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_HTML401</b></td>
2987
     *                           <td>
2988
     *                           Handle code as HTML 4.01.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XML1</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XML 1.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_XHTML</b></td>
2999
     *                           <td>
3000
     *                           Handle code as XHTML.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           <tr valign="top">
3004
     *                           <td><b>ENT_HTML5</b></td>
3005
     *                           <td>
3006
     *                           Handle code as HTML 5.
3007
     *                           </td>
3008
     *                           </tr>
3009
     *                           </table>
3010
     *                           </p>
3011
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3012
     *
3013
     * @psalm-pure
3014
     *
3015
     * @return string the decoded string
3016
     */
3017 51
    public static function html_entity_decode(
3018
        string $str,
3019
        int $flags = null,
3020
        string $encoding = 'UTF-8'
3021
    ): string {
3022
        if (
3023 51
            !isset($str[3]) // examples: &; || &x;
3024
            ||
3025 51
            \strpos($str, '&') === false // no "&"
3026
        ) {
3027 24
            return $str;
3028
        }
3029
3030 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3031 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3032
        }
3033
3034 49
        if ($flags === null) {
3035 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3036
        }
3037
3038
        /** @noinspection InArrayCanBeUsedInspection */
3039
        if (
3040 49
            $encoding !== 'UTF-8'
3041
            &&
3042 49
            $encoding !== 'ISO-8859-1'
3043
            &&
3044 49
            $encoding !== 'WINDOWS-1252'
3045
            &&
3046 49
            self::$SUPPORT['mbstring'] === false
3047
        ) {
3048
            /**
3049
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3050
             */
3051
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3052
        }
3053
3054
        do {
3055 49
            $str_compare = $str;
3056
3057 49
            if (\strpos($str, '&') !== false) {
3058 49
                if (\strpos($str, '&#') !== false) {
3059
                    // decode also numeric & UTF16 two byte entities
3060 41
                    $str = (string) \preg_replace(
3061 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3062 41
                        '$1;',
3063 41
                        $str
3064
                    );
3065
                }
3066
3067 49
                $str = \html_entity_decode(
3068 49
                    $str,
3069 49
                    $flags,
3070 49
                    $encoding
3071
                );
3072
            }
3073 49
        } while ($str_compare !== $str);
3074
3075 49
        return $str;
3076
    }
3077
3078
    /**
3079
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3080
     *
3081
     * @param string $str
3082
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3083
     *
3084
     * @psalm-pure
3085
     *
3086
     * @return string
3087
     */
3088 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3089
    {
3090 6
        return self::htmlspecialchars(
3091 6
            $str,
3092 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3093 6
            $encoding
3094
        );
3095
    }
3096
3097
    /**
3098
     * Remove empty html-tag.
3099
     *
3100
     * e.g.: <pre><tag></tag></pre>
3101
     *
3102
     * @param string $str
3103
     *
3104
     * @psalm-pure
3105
     *
3106
     * @return string
3107
     */
3108 1
    public static function html_stripe_empty_tags(string $str): string
3109
    {
3110 1
        return (string) \preg_replace(
3111 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3112 1
            '',
3113 1
            $str
3114
        );
3115
    }
3116
3117
    /**
3118
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3119
     *
3120
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3121
     *
3122
     * @see http://php.net/manual/en/function.htmlentities.php
3123
     *
3124
     * @param string $str           <p>
3125
     *                              The input string.
3126
     *                              </p>
3127
     * @param int    $flags         [optional] <p>
3128
     *                              A bitmask of one or more of the following flags, which specify how to handle
3129
     *                              quotes, invalid code unit sequences and the used document type. The default is
3130
     *                              ENT_COMPAT | ENT_HTML401.
3131
     *                              <table>
3132
     *                              Available <i>flags</i> constants
3133
     *                              <tr valign="top">
3134
     *                              <td>Constant Name</td>
3135
     *                              <td>Description</td>
3136
     *                              </tr>
3137
     *                              <tr valign="top">
3138
     *                              <td><b>ENT_COMPAT</b></td>
3139
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3140
     *                              </tr>
3141
     *                              <tr valign="top">
3142
     *                              <td><b>ENT_QUOTES</b></td>
3143
     *                              <td>Will convert both double and single quotes.</td>
3144
     *                              </tr>
3145
     *                              <tr valign="top">
3146
     *                              <td><b>ENT_NOQUOTES</b></td>
3147
     *                              <td>Will leave both double and single quotes unconverted.</td>
3148
     *                              </tr>
3149
     *                              <tr valign="top">
3150
     *                              <td><b>ENT_IGNORE</b></td>
3151
     *                              <td>
3152
     *                              Silently discard invalid code unit sequences instead of returning
3153
     *                              an empty string. Using this flag is discouraged as it
3154
     *                              may have security implications.
3155
     *                              </td>
3156
     *                              </tr>
3157
     *                              <tr valign="top">
3158
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3159
     *                              <td>
3160
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3161
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3162
     *                              string.
3163
     *                              </td>
3164
     *                              </tr>
3165
     *                              <tr valign="top">
3166
     *                              <td><b>ENT_DISALLOWED</b></td>
3167
     *                              <td>
3168
     *                              Replace invalid code points for the given document type with a
3169
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3170
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3171
     *                              instance, to ensure the well-formedness of XML documents with
3172
     *                              embedded external content.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_HTML401</b></td>
3177
     *                              <td>
3178
     *                              Handle code as HTML 4.01.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XML1</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XML 1.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_XHTML</b></td>
3189
     *                              <td>
3190
     *                              Handle code as XHTML.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              <tr valign="top">
3194
     *                              <td><b>ENT_HTML5</b></td>
3195
     *                              <td>
3196
     *                              Handle code as HTML 5.
3197
     *                              </td>
3198
     *                              </tr>
3199
     *                              </table>
3200
     *                              </p>
3201
     * @param string $encoding      [optional] <p>
3202
     *                              Like <b>htmlspecialchars</b>,
3203
     *                              <b>htmlentities</b> takes an optional third argument
3204
     *                              <i>encoding</i> which defines encoding used in
3205
     *                              conversion.
3206
     *                              Although this argument is technically optional, you are highly
3207
     *                              encouraged to specify the correct value for your code.
3208
     *                              </p>
3209
     * @param bool   $double_encode [optional] <p>
3210
     *                              When <i>double_encode</i> is turned off PHP will not
3211
     *                              encode existing html entities. The default is to convert everything.
3212
     *                              </p>
3213
     *
3214
     * @psalm-pure
3215
     *
3216
     * @return string
3217
     *                <p>
3218
     *                The encoded string.
3219
     *                <br><br>
3220
     *                If the input <i>string</i> contains an invalid code unit
3221
     *                sequence within the given <i>encoding</i> an empty string
3222
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3223
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3224
     *                </p>
3225
     */
3226 9
    public static function htmlentities(
3227
        string $str,
3228
        int $flags = \ENT_COMPAT,
3229
        string $encoding = 'UTF-8',
3230
        bool $double_encode = true
3231
    ): string {
3232 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3233 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3234
        }
3235
3236 9
        $str = \htmlentities(
3237 9
            $str,
3238 9
            $flags,
3239 9
            $encoding,
3240 9
            $double_encode
3241
        );
3242
3243
        /**
3244
         * PHP doesn't replace a backslash to its html entity since this is something
3245
         * that's mostly used to escape characters when inserting in a database. Since
3246
         * we're using a decent database layer, we don't need this shit and we're replacing
3247
         * the double backslashes by its' html entity equivalent.
3248
         *
3249
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3250
         */
3251 9
        $str = \str_replace('\\', '&#92;', $str);
3252
3253 9
        return self::html_encode($str, true, $encoding);
3254
    }
3255
3256
    /**
3257
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3258
     *
3259
     * INFO: Take a look at "UTF8::htmlentities()"
3260
     *
3261
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3262
     *
3263
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3264
     *
3265
     * @param string $str           <p>
3266
     *                              The string being converted.
3267
     *                              </p>
3268
     * @param int    $flags         [optional] <p>
3269
     *                              A bitmask of one or more of the following flags, which specify how to handle
3270
     *                              quotes, invalid code unit sequences and the used document type. The default is
3271
     *                              ENT_COMPAT | ENT_HTML401.
3272
     *                              <table>
3273
     *                              Available <i>flags</i> constants
3274
     *                              <tr valign="top">
3275
     *                              <td>Constant Name</td>
3276
     *                              <td>Description</td>
3277
     *                              </tr>
3278
     *                              <tr valign="top">
3279
     *                              <td><b>ENT_COMPAT</b></td>
3280
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3281
     *                              </tr>
3282
     *                              <tr valign="top">
3283
     *                              <td><b>ENT_QUOTES</b></td>
3284
     *                              <td>Will convert both double and single quotes.</td>
3285
     *                              </tr>
3286
     *                              <tr valign="top">
3287
     *                              <td><b>ENT_NOQUOTES</b></td>
3288
     *                              <td>Will leave both double and single quotes unconverted.</td>
3289
     *                              </tr>
3290
     *                              <tr valign="top">
3291
     *                              <td><b>ENT_IGNORE</b></td>
3292
     *                              <td>
3293
     *                              Silently discard invalid code unit sequences instead of returning
3294
     *                              an empty string. Using this flag is discouraged as it
3295
     *                              may have security implications.
3296
     *                              </td>
3297
     *                              </tr>
3298
     *                              <tr valign="top">
3299
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3300
     *                              <td>
3301
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3302
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3303
     *                              string.
3304
     *                              </td>
3305
     *                              </tr>
3306
     *                              <tr valign="top">
3307
     *                              <td><b>ENT_DISALLOWED</b></td>
3308
     *                              <td>
3309
     *                              Replace invalid code points for the given document type with a
3310
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3311
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3312
     *                              instance, to ensure the well-formedness of XML documents with
3313
     *                              embedded external content.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_HTML401</b></td>
3318
     *                              <td>
3319
     *                              Handle code as HTML 4.01.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XML1</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XML 1.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_XHTML</b></td>
3330
     *                              <td>
3331
     *                              Handle code as XHTML.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              <tr valign="top">
3335
     *                              <td><b>ENT_HTML5</b></td>
3336
     *                              <td>
3337
     *                              Handle code as HTML 5.
3338
     *                              </td>
3339
     *                              </tr>
3340
     *                              </table>
3341
     *                              </p>
3342
     * @param string $encoding      [optional] <p>
3343
     *                              Defines encoding used in conversion.
3344
     *                              </p>
3345
     *                              <p>
3346
     *                              For the purposes of this function, the encodings
3347
     *                              ISO-8859-1, ISO-8859-15,
3348
     *                              UTF-8, cp866,
3349
     *                              cp1251, cp1252, and
3350
     *                              KOI8-R are effectively equivalent, provided the
3351
     *                              <i>string</i> itself is valid for the encoding, as
3352
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3353
     *                              the same positions in all of these encodings.
3354
     *                              </p>
3355
     * @param bool   $double_encode [optional] <p>
3356
     *                              When <i>double_encode</i> is turned off PHP will not
3357
     *                              encode existing html entities, the default is to convert everything.
3358
     *                              </p>
3359
     *
3360
     * @psalm-pure
3361
     *
3362
     * @return string the converted string.
3363
     *                </p>
3364
     *                <p>
3365
     *                If the input <i>string</i> contains an invalid code unit
3366
     *                sequence within the given <i>encoding</i> an empty string
3367
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3368
     *                <b>ENT_SUBSTITUTE</b> flags are set
3369
     */
3370 8
    public static function htmlspecialchars(
3371
        string $str,
3372
        int $flags = \ENT_COMPAT,
3373
        string $encoding = 'UTF-8',
3374
        bool $double_encode = true
3375
    ): string {
3376 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3377 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3378
        }
3379
3380 8
        return \htmlspecialchars(
3381 8
            $str,
3382 8
            $flags,
3383 8
            $encoding,
3384 8
            $double_encode
3385
        );
3386
    }
3387
3388
    /**
3389
     * Checks whether iconv is available on the server.
3390
     *
3391
     * @psalm-pure
3392
     *
3393
     * @return bool
3394
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3395
     *
3396
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3397
     */
3398
    public static function iconv_loaded(): bool
3399
    {
3400
        return \extension_loaded('iconv');
3401
    }
3402
3403
    /**
3404
     * alias for "UTF8::decimal_to_chr()"
3405
     *
3406
     * @param int|string $int
3407
     *
3408
     * @phpstan-param int|numeric-string $int
3409
     *
3410
     * @psalm-pure
3411
     *
3412
     * @return string
3413
     *
3414
     * @see        UTF8::decimal_to_chr()
3415
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3416
     */
3417 4
    public static function int_to_chr($int): string
3418
    {
3419 4
        return self::decimal_to_chr($int);
3420
    }
3421
3422
    /**
3423
     * Converts Integer to hexadecimal U+xxxx code point representation.
3424
     *
3425
     * INFO: opposite to UTF8::hex_to_int()
3426
     *
3427
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3428
     *
3429
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3430
     * @param string $prefix [optional]
3431
     *
3432
     * @psalm-pure
3433
     *
3434
     * @return string the code point, or empty string on failure
3435
     */
3436 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3437
    {
3438 6
        $hex = \dechex($int);
3439
3440 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3441
3442 6
        return $prefix . $hex . '';
3443
    }
3444
3445
    /**
3446
     * Checks whether intl-char is available on the server.
3447
     *
3448
     * @psalm-pure
3449
     *
3450
     * @return bool
3451
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3452
     *
3453
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3454
     */
3455
    public static function intlChar_loaded(): bool
3456
    {
3457
        return \class_exists('IntlChar');
3458
    }
3459
3460
    /**
3461
     * Checks whether intl is available on the server.
3462
     *
3463
     * @psalm-pure
3464
     *
3465
     * @return bool
3466
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3467
     *
3468
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3469
     */
3470 5
    public static function intl_loaded(): bool
3471
    {
3472 5
        return \extension_loaded('intl');
3473
    }
3474
3475
    /**
3476
     * alias for "UTF8::is_ascii()"
3477
     *
3478
     * @param string $str
3479
     *
3480
     * @psalm-pure
3481
     *
3482
     * @return bool
3483
     *
3484
     * @see        UTF8::is_ascii()
3485
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3486
     */
3487 2
    public static function isAscii(string $str): bool
3488
    {
3489 2
        return ASCII::is_ascii($str);
3490
    }
3491
3492
    /**
3493
     * alias for "UTF8::is_base64()"
3494
     *
3495
     * @param string $str
3496
     *
3497
     * @psalm-pure
3498
     *
3499
     * @return bool
3500
     *
3501
     * @see        UTF8::is_base64()
3502
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3503
     */
3504 2
    public static function isBase64($str): bool
3505
    {
3506 2
        return self::is_base64($str);
3507
    }
3508
3509
    /**
3510
     * alias for "UTF8::is_binary()"
3511
     *
3512
     * @param int|string $str
3513
     * @param bool       $strict
3514
     *
3515
     * @psalm-pure
3516
     *
3517
     * @return bool
3518
     *
3519
     * @see        UTF8::is_binary()
3520
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3521
     */
3522 4
    public static function isBinary($str, bool $strict = false): bool
3523
    {
3524 4
        return self::is_binary($str, $strict);
3525
    }
3526
3527
    /**
3528
     * alias for "UTF8::is_bom()"
3529
     *
3530
     * @param string $utf8_chr
3531
     *
3532
     * @psalm-pure
3533
     *
3534
     * @return bool
3535
     *
3536
     * @see        UTF8::is_bom()
3537
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3538
     */
3539 2
    public static function isBom(string $utf8_chr): bool
3540
    {
3541 2
        return self::is_bom($utf8_chr);
3542
    }
3543
3544
    /**
3545
     * alias for "UTF8::is_html()"
3546
     *
3547
     * @param string $str
3548
     *
3549
     * @psalm-pure
3550
     *
3551
     * @return bool
3552
     *
3553
     * @see        UTF8::is_html()
3554
     * @deprecated <p>please use "UTF8::is_html()"</p>
3555
     */
3556 2
    public static function isHtml(string $str): bool
3557
    {
3558 2
        return self::is_html($str);
3559
    }
3560
3561
    /**
3562
     * alias for "UTF8::is_json()"
3563
     *
3564
     * @param string $str
3565
     *
3566
     * @return bool
3567
     *
3568
     * @see        UTF8::is_json()
3569
     * @deprecated <p>please use "UTF8::is_json()"</p>
3570
     */
3571 1
    public static function isJson(string $str): bool
3572
    {
3573 1
        return self::is_json($str);
3574
    }
3575
3576
    /**
3577
     * alias for "UTF8::is_utf16()"
3578
     *
3579
     * @param string $str
3580
     *
3581
     * @psalm-pure
3582
     *
3583
     * @return false|int
3584
     *                   <strong>false</strong> if is't not UTF16,<br>
3585
     *                   <strong>1</strong> for UTF-16LE,<br>
3586
     *                   <strong>2</strong> for UTF-16BE
3587
     *
3588
     * @see        UTF8::is_utf16()
3589
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3590
     */
3591 2
    public static function isUtf16($str)
3592
    {
3593 2
        return self::is_utf16($str);
3594
    }
3595
3596
    /**
3597
     * alias for "UTF8::is_utf32()"
3598
     *
3599
     * @param string $str
3600
     *
3601
     * @psalm-pure
3602
     *
3603
     * @return false|int
3604
     *                   <strong>false</strong> if is't not UTF16,
3605
     *                   <strong>1</strong> for UTF-32LE,
3606
     *                   <strong>2</strong> for UTF-32BE
3607
     *
3608
     * @see        UTF8::is_utf32()
3609
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3610
     */
3611 2
    public static function isUtf32($str)
3612
    {
3613 2
        return self::is_utf32($str);
3614
    }
3615
3616
    /**
3617
     * alias for "UTF8::is_utf8()"
3618
     *
3619
     * @param string $str
3620
     * @param bool   $strict
3621
     *
3622
     * @psalm-pure
3623
     *
3624
     * @return bool
3625
     *
3626
     * @see        UTF8::is_utf8()
3627
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3628
     */
3629 17
    public static function isUtf8($str, bool $strict = false): bool
3630
    {
3631 17
        return self::is_utf8($str, $strict);
3632
    }
3633
3634
    /**
3635
     * Returns true if the string contains only alphabetic chars, false otherwise.
3636
     *
3637
     * @param string $str <p>The input string.</p>
3638
     *
3639
     * @psalm-pure
3640
     *
3641
     * @return bool
3642
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3643
     */
3644 10
    public static function is_alpha(string $str): bool
3645
    {
3646 10
        if (self::$SUPPORT['mbstring'] === true) {
3647
            /** @noinspection PhpComposerExtensionStubsInspection */
3648 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3649
        }
3650
3651
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3652
    }
3653
3654
    /**
3655
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3656
     *
3657
     * @param string $str <p>The input string.</p>
3658
     *
3659
     * @psalm-pure
3660
     *
3661
     * @return bool
3662
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3663
     */
3664 13
    public static function is_alphanumeric(string $str): bool
3665
    {
3666 13
        if (self::$SUPPORT['mbstring'] === true) {
3667
            /** @noinspection PhpComposerExtensionStubsInspection */
3668 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3669
        }
3670
3671
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3672
    }
3673
3674
    /**
3675
     * Returns true if the string contains only punctuation chars, false otherwise.
3676
     *
3677
     * @param string $str <p>The input string.</p>
3678
     *
3679
     * @psalm-pure
3680
     *
3681
     * @return bool
3682
     *              <p>Whether or not $str contains only punctuation chars.</p>
3683
     */
3684 10
    public static function is_punctuation(string $str): bool
3685
    {
3686 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3687
    }
3688
3689
    /**
3690
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3691
     *
3692
     * @param string $str                       <p>The input string.</p>
3693
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3694
     *
3695
     * @psalm-pure
3696
     *
3697
     * @return bool
3698
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3699
     */
3700 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3701
    {
3702 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3703
    }
3704
3705
    /**
3706
     * Checks if a string is 7 bit ASCII.
3707
     *
3708
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3709
     *
3710
     * @param string $str <p>The string to check.</p>
3711
     *
3712
     * @psalm-pure
3713
     *
3714
     * @return bool
3715
     *              <p>
3716
     *              <strong>true</strong> if it is ASCII<br>
3717
     *              <strong>false</strong> otherwise
3718
     *              </p>
3719
     */
3720 8
    public static function is_ascii(string $str): bool
3721
    {
3722 8
        return ASCII::is_ascii($str);
3723
    }
3724
3725
    /**
3726
     * Returns true if the string is base64 encoded, false otherwise.
3727
     *
3728
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3729
     *
3730
     * @param string|null $str                   <p>The input string.</p>
3731
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3732
     *
3733
     * @psalm-pure
3734
     *
3735
     * @return bool
3736
     *              <p>Whether or not $str is base64 encoded.</p>
3737
     */
3738 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3739
    {
3740
        if (
3741 16
            !$empty_string_is_valid
3742
            &&
3743 16
            $str === ''
3744
        ) {
3745 3
            return false;
3746
        }
3747
3748 15
        if (!\is_string($str)) {
3749 2
            return false;
3750
        }
3751
3752 15
        $base64String = \base64_decode($str, true);
3753
3754 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3755
    }
3756
3757
    /**
3758
     * Check if the input is binary... (is look like a hack).
3759
     *
3760
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3761
     *
3762
     * @param int|string $input
3763
     * @param bool       $strict
3764
     *
3765
     * @psalm-pure
3766
     *
3767
     * @return bool
3768
     */
3769 40
    public static function is_binary($input, bool $strict = false): bool
3770
    {
3771 40
        $input = (string) $input;
3772 40
        if ($input === '') {
3773 10
            return false;
3774
        }
3775
3776 40
        if (\preg_match('~^[01]+$~', $input)) {
3777 13
            return true;
3778
        }
3779
3780 40
        $ext = self::get_file_type($input);
3781 40
        if ($ext['type'] === 'binary') {
3782 7
            return true;
3783
        }
3784
3785 39
        $test_length = \strlen($input);
3786 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3787 39
        if (($test_null_counting / $test_length) > 0.25) {
3788 15
            return true;
3789
        }
3790
3791 35
        if ($strict) {
3792 35
            if (self::$SUPPORT['finfo'] === false) {
3793
                throw new \RuntimeException('ext-fileinfo: is not installed');
3794
            }
3795
3796
            /**
3797
             * @noinspection   PhpComposerExtensionStubsInspection
3798
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3799
             */
3800 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3801 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3802 15
                return true;
3803
            }
3804
        }
3805
3806 31
        return false;
3807
    }
3808
3809
    /**
3810
     * Check if the file is binary.
3811
     *
3812
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3813
     *
3814
     * @param string $file
3815
     *
3816
     * @return bool
3817
     */
3818 6
    public static function is_binary_file($file): bool
3819
    {
3820
        // init
3821 6
        $block = '';
3822
3823 6
        $fp = \fopen($file, 'rb');
3824 6
        if (\is_resource($fp)) {
3825 6
            $block = \fread($fp, 512);
3826 6
            \fclose($fp);
3827
        }
3828
3829 6
        if ($block === '' || $block === false) {
3830 2
            return false;
3831
        }
3832
3833 6
        return self::is_binary($block, true);
3834
    }
3835
3836
    /**
3837
     * Returns true if the string contains only whitespace chars, false otherwise.
3838
     *
3839
     * @param string $str <p>The input string.</p>
3840
     *
3841
     * @psalm-pure
3842
     *
3843
     * @return bool
3844
     *              <p>Whether or not $str contains only whitespace characters.</p>
3845
     */
3846 15
    public static function is_blank(string $str): bool
3847
    {
3848 15
        if (self::$SUPPORT['mbstring'] === true) {
3849
            /** @noinspection PhpComposerExtensionStubsInspection */
3850 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3851
        }
3852
3853
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3854
    }
3855
3856
    /**
3857
     * Checks if the given string is equal to any "Byte Order Mark".
3858
     *
3859
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3860
     *
3861
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3862
     *
3863
     * @param string $str <p>The input string.</p>
3864
     *
3865
     * @psalm-pure
3866
     *
3867
     * @return bool
3868
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3869
     */
3870 2
    public static function is_bom($str): bool
3871
    {
3872
        /** @noinspection PhpUnusedLocalVariableInspection */
3873 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3874 2
            if ($str === $bom_string) {
3875 2
                return true;
3876
            }
3877
        }
3878
3879 2
        return false;
3880
    }
3881
3882
    /**
3883
     * Determine whether the string is considered to be empty.
3884
     *
3885
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3886
     * empty() does not generate a warning if the variable does not exist.
3887
     *
3888
     * @param array|float|int|string $str
3889
     *
3890
     * @psalm-pure
3891
     *
3892
     * @return bool
3893
     *              <p>Whether or not $str is empty().</p>
3894
     */
3895 1
    public static function is_empty($str): bool
3896
    {
3897 1
        return empty($str);
3898
    }
3899
3900
    /**
3901
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3902
     *
3903
     * @param string $str <p>The input string.</p>
3904
     *
3905
     * @psalm-pure
3906
     *
3907
     * @return bool
3908
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3909
     */
3910 13
    public static function is_hexadecimal(string $str): bool
3911
    {
3912 13
        if (self::$SUPPORT['mbstring'] === true) {
3913
            /** @noinspection PhpComposerExtensionStubsInspection */
3914 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3915
        }
3916
3917
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3918
    }
3919
3920
    /**
3921
     * Check if the string contains any HTML tags.
3922
     *
3923
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3924
     *
3925
     * @param string $str <p>The input string.</p>
3926
     *
3927
     * @psalm-pure
3928
     *
3929
     * @return bool
3930
     *              <p>Whether or not $str contains html elements.</p>
3931
     */
3932 3
    public static function is_html(string $str): bool
3933
    {
3934 3
        if ($str === '') {
3935 3
            return false;
3936
        }
3937
3938
        // init
3939 3
        $matches = [];
3940
3941 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3942
3943 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3944
3945 3
        return $matches !== [];
3946
    }
3947
3948
    /**
3949
     * Check if $url is an correct url.
3950
     *
3951
     * @param string $url
3952
     * @param bool   $disallow_localhost
3953
     *
3954
     * @psalm-pure
3955
     *
3956
     * @return bool
3957
     */
3958 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3959
    {
3960 1
        if ($url === '') {
3961 1
            return false;
3962
        }
3963
3964
        // WARNING: keep this as hack protection
3965 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3966 1
            return false;
3967
        }
3968
3969
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3970 1
        if ($disallow_localhost) {
3971 1
            if (self::str_istarts_with_any(
3972 1
                $url,
3973
                [
3974 1
                    'http://localhost',
3975
                    'https://localhost',
3976
                    'http://127.0.0.1',
3977
                    'https://127.0.0.1',
3978
                    'http://::1',
3979
                    'https://::1',
3980
                ]
3981
            )) {
3982 1
                return false;
3983
            }
3984
3985 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3986
            /** @noinspection BypassedUrlValidationInspection */
3987 1
            if (\preg_match($regex, $url)) {
3988 1
                return false;
3989
            }
3990
        }
3991
3992
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3993
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3994 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3995
        /** @noinspection BypassedUrlValidationInspection */
3996 1
        if (\preg_match($regex, $url)) {
3997 1
            return true;
3998
        }
3999
4000
        /** @noinspection BypassedUrlValidationInspection */
4001 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4002
    }
4003
4004
    /**
4005
     * Try to check if "$str" is a JSON-string.
4006
     *
4007
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4008
     *
4009
     * @param string $str                                    <p>The input string.</p>
4010
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4011
     *                                                       results.</p>
4012
     *
4013
     * @return bool
4014
     *              <p>Whether or not the $str is in JSON format.</p>
4015
     */
4016 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4017
    {
4018 42
        if ($str === '') {
4019 4
            return false;
4020
        }
4021
4022 40
        if (self::$SUPPORT['json'] === false) {
4023
            throw new \RuntimeException('ext-json: is not installed');
4024
        }
4025
4026 40
        $jsonOrNull = self::json_decode($str);
4027 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4028 18
            return false;
4029
        }
4030
4031
        if (
4032 24
            $only_array_or_object_results_are_valid
4033
            &&
4034 24
            !\is_object($jsonOrNull)
4035
            &&
4036 24
            !\is_array($jsonOrNull)
4037
        ) {
4038 5
            return false;
4039
        }
4040
4041
        /** @noinspection PhpComposerExtensionStubsInspection */
4042 19
        return \json_last_error() === \JSON_ERROR_NONE;
4043
    }
4044
4045
    /**
4046
     * @param string $str <p>The input string.</p>
4047
     *
4048
     * @psalm-pure
4049
     *
4050
     * @return bool
4051
     *              <p>Whether or not $str contains only lowercase chars.</p>
4052
     */
4053 8
    public static function is_lowercase(string $str): bool
4054
    {
4055 8
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4058
        }
4059
4060
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4061
    }
4062
4063
    /**
4064
     * Returns true if the string is serialized, false otherwise.
4065
     *
4066
     * @param string $str <p>The input string.</p>
4067
     *
4068
     * @psalm-pure
4069
     *
4070
     * @return bool
4071
     *              <p>Whether or not $str is serialized.</p>
4072
     */
4073 7
    public static function is_serialized(string $str): bool
4074
    {
4075 7
        if ($str === '') {
4076 1
            return false;
4077
        }
4078
4079
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4080
        /** @noinspection UnserializeExploitsInspection */
4081 6
        return $str === 'b:0;'
4082
               ||
4083 6
               @\unserialize($str) !== false;
4084
    }
4085
4086
    /**
4087
     * Returns true if the string contains only lower case chars, false
4088
     * otherwise.
4089
     *
4090
     * @param string $str <p>The input string.</p>
4091
     *
4092
     * @psalm-pure
4093
     *
4094
     * @return bool
4095
     *              <p>Whether or not $str contains only lower case characters.</p>
4096
     */
4097 8
    public static function is_uppercase(string $str): bool
4098
    {
4099 8
        if (self::$SUPPORT['mbstring'] === true) {
4100
            /** @noinspection PhpComposerExtensionStubsInspection */
4101 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4102
        }
4103
4104
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4105
    }
4106
4107
    /**
4108
     * Check if the string is UTF-16.
4109
     *
4110
     * EXAMPLE: <code>
4111
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4112
     * //
4113
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4114
     * //
4115
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4116
     * </code>
4117
     *
4118
     * @param string $str                       <p>The input string.</p>
4119
     * @param bool   $check_if_string_is_binary
4120
     *
4121
     * @psalm-pure
4122
     *
4123
     * @return false|int
4124
     *                   <strong>false</strong> if is't not UTF-16,<br>
4125
     *                   <strong>1</strong> for UTF-16LE,<br>
4126
     *                   <strong>2</strong> for UTF-16BE
4127
     */
4128 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4129
    {
4130
        // init
4131 22
        $str = (string) $str;
4132 22
        $str_chars = [];
4133
4134
        if (
4135 22
            $check_if_string_is_binary
4136
            &&
4137 22
            !self::is_binary($str, true)
4138
        ) {
4139 2
            return false;
4140
        }
4141
4142 22
        if (self::$SUPPORT['mbstring'] === false) {
4143
            /**
4144
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4145
             */
4146 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4147
        }
4148
4149 22
        $str = self::remove_bom($str);
4150
4151 22
        $maybe_utf16le = 0;
4152 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4153 22
        if ($test) {
4154 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4155 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4156 15
            if ($test3 === $test) {
4157
                /**
4158
                 * @psalm-suppress RedundantCondition
4159
                 */
4160 15
                if ($str_chars === []) {
4161 15
                    $str_chars = self::count_chars($str, true, false);
4162
                }
4163 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4164 15
                    if (\in_array($test3char, $str_chars, true)) {
4165 15
                        ++$maybe_utf16le;
4166
                    }
4167
                }
4168 15
                unset($test3charEmpty);
4169
            }
4170
        }
4171
4172 22
        $maybe_utf16be = 0;
4173 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4174 22
        if ($test) {
4175 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4176 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4177 15
            if ($test3 === $test) {
4178 15
                if ($str_chars === []) {
4179 7
                    $str_chars = self::count_chars($str, true, false);
4180
                }
4181 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4182 15
                    if (\in_array($test3char, $str_chars, true)) {
4183 15
                        ++$maybe_utf16be;
4184
                    }
4185
                }
4186 15
                unset($test3charEmpty);
4187
            }
4188
        }
4189
4190 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4191 7
            if ($maybe_utf16le > $maybe_utf16be) {
4192 5
                return 1;
4193
            }
4194
4195 6
            return 2;
4196
        }
4197
4198 18
        return false;
4199
    }
4200
4201
    /**
4202
     * Check if the string is UTF-32.
4203
     *
4204
     * EXAMPLE: <code>
4205
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4206
     * //
4207
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4208
     * //
4209
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4210
     * </code>
4211
     *
4212
     * @param string $str                       <p>The input string.</p>
4213
     * @param bool   $check_if_string_is_binary
4214
     *
4215
     * @psalm-pure
4216
     *
4217
     * @return false|int
4218
     *                   <strong>false</strong> if is't not UTF-32,<br>
4219
     *                   <strong>1</strong> for UTF-32LE,<br>
4220
     *                   <strong>2</strong> for UTF-32BE
4221
     */
4222 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4223
    {
4224
        // init
4225 20
        $str = (string) $str;
4226 20
        $str_chars = [];
4227
4228
        if (
4229 20
            $check_if_string_is_binary
4230
            &&
4231 20
            !self::is_binary($str, true)
4232
        ) {
4233 2
            return false;
4234
        }
4235
4236 20
        if (self::$SUPPORT['mbstring'] === false) {
4237
            /**
4238
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4239
             */
4240 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4241
        }
4242
4243 20
        $str = self::remove_bom($str);
4244
4245 20
        $maybe_utf32le = 0;
4246 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4247 20
        if ($test) {
4248 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4249 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4250 13
            if ($test3 === $test) {
4251
                /**
4252
                 * @psalm-suppress RedundantCondition
4253
                 */
4254 13
                if ($str_chars === []) {
4255 13
                    $str_chars = self::count_chars($str, true, false);
4256
                }
4257 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4258 13
                    if (\in_array($test3char, $str_chars, true)) {
4259 13
                        ++$maybe_utf32le;
4260
                    }
4261
                }
4262 13
                unset($test3charEmpty);
4263
            }
4264
        }
4265
4266 20
        $maybe_utf32be = 0;
4267 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4268 20
        if ($test) {
4269 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4270 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4271 13
            if ($test3 === $test) {
4272 13
                if ($str_chars === []) {
4273 7
                    $str_chars = self::count_chars($str, true, false);
4274
                }
4275 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4276 13
                    if (\in_array($test3char, $str_chars, true)) {
4277 13
                        ++$maybe_utf32be;
4278
                    }
4279
                }
4280 13
                unset($test3charEmpty);
4281
            }
4282
        }
4283
4284 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4285 3
            if ($maybe_utf32le > $maybe_utf32be) {
4286 2
                return 1;
4287
            }
4288
4289 3
            return 2;
4290
        }
4291
4292 20
        return false;
4293
    }
4294
4295
    /**
4296
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4297
     *
4298
     * EXAMPLE: <code>
4299
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4300
     * //
4301
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4302
     * </code>
4303
     *
4304
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4305
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4306
     *
4307
     * @psalm-pure
4308
     *
4309
     * @return bool
4310
     */
4311 83
    public static function is_utf8($str, bool $strict = false): bool
4312
    {
4313 83
        if (\is_array($str)) {
4314 2
            foreach ($str as &$v) {
4315 2
                if (!self::is_utf8($v, $strict)) {
4316 2
                    return false;
4317
                }
4318
            }
4319
4320
            return true;
4321
        }
4322
4323 83
        return self::is_utf8_string((string) $str, $strict);
4324
    }
4325
4326
    /**
4327
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4328
     * Decodes a JSON string
4329
     *
4330
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4331
     *
4332
     * @see http://php.net/manual/en/function.json-decode.php
4333
     *
4334
     * @param string $json    <p>
4335
     *                        The <i>json</i> string being decoded.
4336
     *                        </p>
4337
     *                        <p>
4338
     *                        This function only works with UTF-8 encoded strings.
4339
     *                        </p>
4340
     *                        <p>PHP implements a superset of
4341
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4342
     *                        only supports these values when they are nested inside an array or an object.
4343
     *                        </p>
4344
     * @param bool   $assoc   [optional] <p>
4345
     *                        When <b>TRUE</b>, returned objects will be converted into
4346
     *                        associative arrays.
4347
     *                        </p>
4348
     * @param int    $depth   [optional] <p>
4349
     *                        User specified recursion depth.
4350
     *                        </p>
4351
     * @param int    $options [optional] <p>
4352
     *                        Bitmask of JSON decode options. Currently only
4353
     *                        <b>JSON_BIGINT_AS_STRING</b>
4354
     *                        is supported (default is to cast large integers as floats)
4355
     *                        </p>
4356
     *
4357
     * @psalm-pure
4358
     *
4359
     * @return mixed
4360
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4361
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4362
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4363
     *               is deeper than the recursion limit.</p>
4364
     */
4365 43
    public static function json_decode(
4366
        string $json,
4367
        bool $assoc = false,
4368
        int $depth = 512,
4369
        int $options = 0
4370
    ) {
4371 43
        $json = self::filter($json);
4372
4373 43
        if (self::$SUPPORT['json'] === false) {
4374
            throw new \RuntimeException('ext-json: is not installed');
4375
        }
4376
4377
        /** @noinspection PhpComposerExtensionStubsInspection */
4378 43
        return \json_decode($json, $assoc, $depth, $options);
4379
    }
4380
4381
    /**
4382
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4383
     * Returns the JSON representation of a value.
4384
     *
4385
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4386
     *
4387
     * @see http://php.net/manual/en/function.json-encode.php
4388
     *
4389
     * @param mixed $value   <p>
4390
     *                       The <i>value</i> being encoded. Can be any type except
4391
     *                       a resource.
4392
     *                       </p>
4393
     *                       <p>
4394
     *                       All string data must be UTF-8 encoded.
4395
     *                       </p>
4396
     *                       <p>PHP implements a superset of
4397
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4398
     *                       only supports these values when they are nested inside an array or an object.
4399
     *                       </p>
4400
     * @param int   $options [optional] <p>
4401
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4402
     *                       <b>JSON_HEX_TAG</b>,
4403
     *                       <b>JSON_HEX_AMP</b>,
4404
     *                       <b>JSON_HEX_APOS</b>,
4405
     *                       <b>JSON_NUMERIC_CHECK</b>,
4406
     *                       <b>JSON_PRETTY_PRINT</b>,
4407
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4408
     *                       <b>JSON_FORCE_OBJECT</b>,
4409
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4410
     *                       constants is described on
4411
     *                       the JSON constants page.
4412
     *                       </p>
4413
     * @param int   $depth   [optional] <p>
4414
     *                       Set the maximum depth. Must be greater than zero.
4415
     *                       </p>
4416
     *
4417
     * @psalm-pure
4418
     *
4419
     * @return false|string
4420
     *                      A JSON encoded <strong>string</strong> on success or<br>
4421
     *                      <strong>FALSE</strong> on failure
4422
     */
4423 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4424
    {
4425 5
        $value = self::filter($value);
4426
4427 5
        if (self::$SUPPORT['json'] === false) {
4428
            throw new \RuntimeException('ext-json: is not installed');
4429
        }
4430
4431
        /** @noinspection PhpComposerExtensionStubsInspection */
4432 5
        return \json_encode($value, $options, $depth);
4433
    }
4434
4435
    /**
4436
     * Checks whether JSON is available on the server.
4437
     *
4438
     * @psalm-pure
4439
     *
4440
     * @return bool
4441
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4442
     *
4443
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4444
     */
4445
    public static function json_loaded(): bool
4446
    {
4447
        return \function_exists('json_decode');
4448
    }
4449
4450
    /**
4451
     * Makes string's first char lowercase.
4452
     *
4453
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4454
     *
4455
     * @param string      $str                           <p>The input string</p>
4456
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4457
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4458
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4459
     *                                                   tr</p>
4460
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4461
     *                                                   -> ß</p>
4462
     *
4463
     * @psalm-pure
4464
     *
4465
     * @return string the resulting string
4466
     */
4467 46
    public static function lcfirst(
4468
        string $str,
4469
        string $encoding = 'UTF-8',
4470
        bool $clean_utf8 = false,
4471
        string $lang = null,
4472
        bool $try_to_keep_the_string_length = false
4473
    ): string {
4474 46
        if ($clean_utf8) {
4475
            $str = self::clean($str);
4476
        }
4477
4478 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4479
4480 46
        if ($encoding === 'UTF-8') {
4481 43
            $str_part_two = (string) \mb_substr($str, 1);
4482
4483 43
            if ($use_mb_functions) {
4484 43
                $str_part_one = \mb_strtolower(
4485 43
                    (string) \mb_substr($str, 0, 1)
4486
                );
4487
            } else {
4488
                $str_part_one = self::strtolower(
4489
                    (string) \mb_substr($str, 0, 1),
4490
                    $encoding,
4491
                    false,
4492
                    $lang,
4493 43
                    $try_to_keep_the_string_length
4494
                );
4495
            }
4496
        } else {
4497 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4498
4499 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4500
4501 3
            $str_part_one = self::strtolower(
4502 3
                (string) self::substr($str, 0, 1, $encoding),
4503 3
                $encoding,
4504 3
                false,
4505 3
                $lang,
4506 3
                $try_to_keep_the_string_length
4507
            );
4508
        }
4509
4510 46
        return $str_part_one . $str_part_two;
4511
    }
4512
4513
    /**
4514
     * alias for "UTF8::lcfirst()"
4515
     *
4516
     * @param string      $str
4517
     * @param string      $encoding
4518
     * @param bool        $clean_utf8
4519
     * @param string|null $lang
4520
     * @param bool        $try_to_keep_the_string_length
4521
     *
4522
     * @psalm-pure
4523
     *
4524
     * @return string
4525
     *
4526
     * @see        UTF8::lcfirst()
4527
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4528
     */
4529 2
    public static function lcword(
4530
        string $str,
4531
        string $encoding = 'UTF-8',
4532
        bool $clean_utf8 = false,
4533
        string $lang = null,
4534
        bool $try_to_keep_the_string_length = false
4535
    ): string {
4536 2
        return self::lcfirst(
4537 2
            $str,
4538 2
            $encoding,
4539 2
            $clean_utf8,
4540 2
            $lang,
4541 2
            $try_to_keep_the_string_length
4542
        );
4543
    }
4544
4545
    /**
4546
     * Lowercase for all words in the string.
4547
     *
4548
     * @param string      $str                           <p>The input string.</p>
4549
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4550
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4551
     *                                                   not start a new word.</p>
4552
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4553
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4554
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4555
     *                                                   tr</p>
4556
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4557
     *                                                   -> ß</p>
4558
     *
4559
     * @psalm-pure
4560
     *
4561
     * @return string
4562
     */
4563 2
    public static function lcwords(
4564
        string $str,
4565
        array $exceptions = [],
4566
        string $char_list = '',
4567
        string $encoding = 'UTF-8',
4568
        bool $clean_utf8 = false,
4569
        string $lang = null,
4570
        bool $try_to_keep_the_string_length = false
4571
    ): string {
4572 2
        if (!$str) {
4573 2
            return '';
4574
        }
4575
4576 2
        $words = self::str_to_words($str, $char_list);
4577 2
        $use_exceptions = $exceptions !== [];
4578
4579 2
        $words_str = '';
4580 2
        foreach ($words as &$word) {
4581 2
            if (!$word) {
4582 2
                continue;
4583
            }
4584
4585
            if (
4586 2
                !$use_exceptions
4587
                ||
4588 2
                !\in_array($word, $exceptions, true)
4589
            ) {
4590 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4591
            } else {
4592 2
                $words_str .= $word;
4593
            }
4594
        }
4595
4596 2
        return $words_str;
4597
    }
4598
4599
    /**
4600
     * alias for "UTF8::lcfirst()"
4601
     *
4602
     * @param string      $str
4603
     * @param string      $encoding
4604
     * @param bool        $clean_utf8
4605
     * @param string|null $lang
4606
     * @param bool        $try_to_keep_the_string_length
4607
     *
4608
     * @psalm-pure
4609
     *
4610
     * @return string
4611
     *
4612
     * @see        UTF8::lcfirst()
4613
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4614
     */
4615 5
    public static function lowerCaseFirst(
4616
        string $str,
4617
        string $encoding = 'UTF-8',
4618
        bool $clean_utf8 = false,
4619
        string $lang = null,
4620
        bool $try_to_keep_the_string_length = false
4621
    ): string {
4622 5
        return self::lcfirst(
4623 5
            $str,
4624 5
            $encoding,
4625 5
            $clean_utf8,
4626 5
            $lang,
4627 5
            $try_to_keep_the_string_length
4628
        );
4629
    }
4630
4631
    /**
4632
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4633
     *
4634
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4635
     *
4636
     * @param string      $str   <p>The string to be trimmed</p>
4637
     * @param string|null $chars <p>Optional characters to be stripped</p>
4638
     *
4639
     * @psalm-pure
4640
     *
4641
     * @return string the string with unwanted characters stripped from the left
4642
     */
4643 23
    public static function ltrim(string $str = '', string $chars = null): string
4644
    {
4645 23
        if ($str === '') {
4646 3
            return '';
4647
        }
4648
4649 22
        if (self::$SUPPORT['mbstring'] === true) {
4650 22
            if ($chars !== null) {
4651
                /** @noinspection PregQuoteUsageInspection */
4652 11
                $chars = \preg_quote($chars);
4653 11
                $pattern = "^[${chars}]+";
4654
            } else {
4655 14
                $pattern = '^[\\s]+';
4656
            }
4657
4658
            /** @noinspection PhpComposerExtensionStubsInspection */
4659 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4660
        }
4661
4662
        if ($chars !== null) {
4663
            $chars = \preg_quote($chars, '/');
4664
            $pattern = "^[${chars}]+";
4665
        } else {
4666
            $pattern = '^[\\s]+';
4667
        }
4668
4669
        return self::regex_replace($str, $pattern, '');
4670
    }
4671
4672
    /**
4673
     * Returns the UTF-8 character with the maximum code point in the given data.
4674
     *
4675
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4676
     *
4677
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4678
     *
4679
     * @psalm-pure
4680
     *
4681
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4682
     */
4683
    public static function max($arg)
4684
    {
4685 2
        if (\is_array($arg)) {
4686 2
            $arg = \implode('', $arg);
4687
        }
4688
4689 2
        $codepoints = self::codepoints($arg);
4690 2
        if ($codepoints === []) {
4691 2
            return null;
4692
        }
4693
4694 2
        $codepoint_max = \max($codepoints);
4695
4696 2
        return self::chr((int) $codepoint_max);
4697
    }
4698
4699
    /**
4700
     * Calculates and returns the maximum number of bytes taken by any
4701
     * UTF-8 encoded character in the given string.
4702
     *
4703
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4704
     *
4705
     * @param string $str <p>The original Unicode string.</p>
4706
     *
4707
     * @psalm-pure
4708
     *
4709
     * @return int
4710
     *             <p>Max byte lengths of the given chars.</p>
4711
     */
4712
    public static function max_chr_width(string $str): int
4713
    {
4714 2
        $bytes = self::chr_size_list($str);
4715 2
        if ($bytes !== []) {
4716 2
            return (int) \max($bytes);
4717
        }
4718
4719 2
        return 0;
4720
    }
4721
4722
    /**
4723
     * Checks whether mbstring is available on the server.
4724
     *
4725
     * @psalm-pure
4726
     *
4727
     * @return bool
4728
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4729
     *
4730
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4731
     */
4732
    public static function mbstring_loaded(): bool
4733
    {
4734 28
        return \extension_loaded('mbstring');
4735
    }
4736
4737
    /**
4738
     * Returns the UTF-8 character with the minimum code point in the given data.
4739
     *
4740
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4741
     *
4742
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4743
     *
4744
     * @psalm-pure
4745
     *
4746
     * @return string|null
4747
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4748
     */
4749
    public static function min($arg)
4750
    {
4751 2
        if (\is_array($arg)) {
4752 2
            $arg = \implode('', $arg);
4753
        }
4754
4755 2
        $codepoints = self::codepoints($arg);
4756 2
        if ($codepoints === []) {
4757 2
            return null;
4758
        }
4759
4760 2
        $codepoint_min = \min($codepoints);
4761
4762 2
        return self::chr((int) $codepoint_min);
4763
    }
4764
4765
    /**
4766
     * alias for "UTF8::normalize_encoding()"
4767
     *
4768
     * @param mixed $encoding
4769
     * @param mixed $fallback
4770
     *
4771
     * @psalm-pure
4772
     *
4773
     * @return mixed
4774
     *
4775
     * @see        UTF8::normalize_encoding()
4776
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4777
     */
4778
    public static function normalizeEncoding($encoding, $fallback = '')
4779
    {
4780 2
        return self::normalize_encoding($encoding, $fallback);
4781
    }
4782
4783
    /**
4784
     * Normalize the encoding-"name" input.
4785
     *
4786
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4787
     *
4788
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4789
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4790
     *
4791
     * @psalm-pure
4792
     *
4793
     * @return mixed|string
4794
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4795
     *
4796
     * @template TNormalizeEncodingFallback
4797
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4798
     * @phpstan-return string|TNormalizeEncodingFallback
4799
     */
4800
    public static function normalize_encoding($encoding, $fallback = '')
4801
    {
4802
        /**
4803
         * @psalm-suppress ImpureStaticVariable
4804
         *
4805
         * @var array<string,string>
4806
         */
4807 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4808
4809
        // init
4810 339
        $encoding = (string) $encoding;
4811
4812 339
        if (!$encoding) {
4813 290
            return $fallback;
4814
        }
4815
4816
        if (
4817 53
            $encoding === 'UTF-8'
4818
            ||
4819 53
            $encoding === 'UTF8'
4820
        ) {
4821 29
            return 'UTF-8';
4822
        }
4823
4824
        if (
4825 44
            $encoding === '8BIT'
4826
            ||
4827 44
            $encoding === 'BINARY'
4828
        ) {
4829
            return 'CP850';
4830
        }
4831
4832
        if (
4833 44
            $encoding === 'HTML'
4834
            ||
4835 44
            $encoding === 'HTML-ENTITIES'
4836
        ) {
4837 2
            return 'HTML-ENTITIES';
4838
        }
4839
4840
        if (
4841 44
            $encoding === 'ISO'
4842
            ||
4843 44
            $encoding === 'ISO-8859-1'
4844
        ) {
4845 41
            return 'ISO-8859-1';
4846
        }
4847
4848
        if (
4849 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4850
            ||
4851 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4852
        ) {
4853
            return $fallback;
4854
        }
4855
4856 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4857 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4858
        }
4859
4860 5
        if (self::$ENCODINGS === null) {
4861 1
            self::$ENCODINGS = self::getData('encodings');
4862
        }
4863
4864 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4865 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4866
4867 3
            return $encoding;
4868
        }
4869
4870 4
        $encoding_original = $encoding;
4871 4
        $encoding = \strtoupper($encoding);
4872 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4873
4874
        $equivalences = [
4875 4
            'ISO8859'     => 'ISO-8859-1',
4876
            'ISO88591'    => 'ISO-8859-1',
4877
            'ISO'         => 'ISO-8859-1',
4878
            'LATIN'       => 'ISO-8859-1',
4879
            'LATIN1'      => 'ISO-8859-1', // Western European
4880
            'ISO88592'    => 'ISO-8859-2',
4881
            'LATIN2'      => 'ISO-8859-2', // Central European
4882
            'ISO88593'    => 'ISO-8859-3',
4883
            'LATIN3'      => 'ISO-8859-3', // Southern European
4884
            'ISO88594'    => 'ISO-8859-4',
4885
            'LATIN4'      => 'ISO-8859-4', // Northern European
4886
            'ISO88595'    => 'ISO-8859-5',
4887
            'ISO88596'    => 'ISO-8859-6', // Greek
4888
            'ISO88597'    => 'ISO-8859-7',
4889
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4890
            'ISO88599'    => 'ISO-8859-9',
4891
            'LATIN5'      => 'ISO-8859-9', // Turkish
4892
            'ISO885911'   => 'ISO-8859-11',
4893
            'TIS620'      => 'ISO-8859-11', // Thai
4894
            'ISO885910'   => 'ISO-8859-10',
4895
            'LATIN6'      => 'ISO-8859-10', // Nordic
4896
            'ISO885913'   => 'ISO-8859-13',
4897
            'LATIN7'      => 'ISO-8859-13', // Baltic
4898
            'ISO885914'   => 'ISO-8859-14',
4899
            'LATIN8'      => 'ISO-8859-14', // Celtic
4900
            'ISO885915'   => 'ISO-8859-15',
4901
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4902
            'ISO885916'   => 'ISO-8859-16',
4903
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4904
            'CP1250'      => 'WINDOWS-1250',
4905
            'WIN1250'     => 'WINDOWS-1250',
4906
            'WINDOWS1250' => 'WINDOWS-1250',
4907
            'CP1251'      => 'WINDOWS-1251',
4908
            'WIN1251'     => 'WINDOWS-1251',
4909
            'WINDOWS1251' => 'WINDOWS-1251',
4910
            'CP1252'      => 'WINDOWS-1252',
4911
            'WIN1252'     => 'WINDOWS-1252',
4912
            'WINDOWS1252' => 'WINDOWS-1252',
4913
            'CP1253'      => 'WINDOWS-1253',
4914
            'WIN1253'     => 'WINDOWS-1253',
4915
            'WINDOWS1253' => 'WINDOWS-1253',
4916
            'CP1254'      => 'WINDOWS-1254',
4917
            'WIN1254'     => 'WINDOWS-1254',
4918
            'WINDOWS1254' => 'WINDOWS-1254',
4919
            'CP1255'      => 'WINDOWS-1255',
4920
            'WIN1255'     => 'WINDOWS-1255',
4921
            'WINDOWS1255' => 'WINDOWS-1255',
4922
            'CP1256'      => 'WINDOWS-1256',
4923
            'WIN1256'     => 'WINDOWS-1256',
4924
            'WINDOWS1256' => 'WINDOWS-1256',
4925
            'CP1257'      => 'WINDOWS-1257',
4926
            'WIN1257'     => 'WINDOWS-1257',
4927
            'WINDOWS1257' => 'WINDOWS-1257',
4928
            'CP1258'      => 'WINDOWS-1258',
4929
            'WIN1258'     => 'WINDOWS-1258',
4930
            'WINDOWS1258' => 'WINDOWS-1258',
4931
            'UTF16'       => 'UTF-16',
4932
            'UTF32'       => 'UTF-32',
4933
            'UTF8'        => 'UTF-8',
4934
            'UTF'         => 'UTF-8',
4935
            'UTF7'        => 'UTF-7',
4936
            '8BIT'        => 'CP850',
4937
            'BINARY'      => 'CP850',
4938
        ];
4939
4940 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4941 3
            $encoding = $equivalences[$encoding_upper_helper];
4942
        }
4943
4944 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4945
4946 4
        return $encoding;
4947
    }
4948
4949
    /**
4950
     * Standardize line ending to unix-like.
4951
     *
4952
     * @param string          $str      <p>The input string.</p>
4953
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4954
     *                                  here.</p>
4955
     *
4956
     * @psalm-pure
4957
     *
4958
     * @return string
4959
     *                <p>A string with normalized line ending.</p>
4960
     */
4961
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4962
    {
4963 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4964
    }
4965
4966
    /**
4967
     * Normalize some MS Word special characters.
4968
     *
4969
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4970
     *
4971
     * @param string $str <p>The string to be normalized.</p>
4972
     *
4973
     * @psalm-pure
4974
     *
4975
     * @return string
4976
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4977
     */
4978
    public static function normalize_msword(string $str): string
4979
    {
4980 10
        return ASCII::normalize_msword($str);
4981
    }
4982
4983
    /**
4984
     * Normalize the whitespace.
4985
     *
4986
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4987
     *
4988
     * @param string $str                          <p>The string to be normalized.</p>
4989
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4990
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4991
     *                                             bidirectional text chars.</p>
4992
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4993
     *
4994
     * @psalm-pure
4995
     *
4996
     * @return string
4997
     *                <p>A string with normalized whitespace.</p>
4998
     */
4999
    public static function normalize_whitespace(
5000
        string $str,
5001
        bool $keep_non_breaking_space = false,
5002
        bool $keep_bidi_unicode_controls = false,
5003
        bool $normalize_control_characters = false
5004
    ): string {
5005 61
        return ASCII::normalize_whitespace(
5006 61
            $str,
5007 61
            $keep_non_breaking_space,
5008 61
            $keep_bidi_unicode_controls,
5009 61
            $normalize_control_characters
5010
        );
5011
    }
5012
5013
    /**
5014
     * Calculates Unicode code point of the given UTF-8 encoded character.
5015
     *
5016
     * INFO: opposite to UTF8::chr()
5017
     *
5018
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5019
     *
5020
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5021
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5022
     *
5023
     * @psalm-pure
5024
     *
5025
     * @return int
5026
     *             <p>Unicode code point of the given character,<br>
5027
     *             0 on invalid UTF-8 byte sequence</p>
5028
     */
5029
    public static function ord($chr, string $encoding = 'UTF-8'): int
5030
    {
5031
        /**
5032
         * @psalm-suppress ImpureStaticVariable
5033
         *
5034
         * @var array<string,int>
5035
         */
5036 27
        static $CHAR_CACHE = [];
5037
5038
        // init
5039 27
        $chr = (string) $chr;
5040
5041 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5042 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5043
        }
5044
5045 27
        $cache_key = $chr . '_' . $encoding;
5046 27
        if (isset($CHAR_CACHE[$cache_key])) {
5047 27
            return $CHAR_CACHE[$cache_key];
5048
        }
5049
5050
        // check again, if it's still not UTF-8
5051 11
        if ($encoding !== 'UTF-8') {
5052 3
            $chr = self::encode($encoding, $chr);
5053
        }
5054
5055 11
        if (self::$ORD === null) {
5056
            self::$ORD = self::getData('ord');
5057
        }
5058
5059 11
        if (isset(self::$ORD[$chr])) {
5060 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5061
        }
5062
5063
        //
5064
        // fallback via "IntlChar"
5065
        //
5066
5067 6
        if (self::$SUPPORT['intlChar'] === true) {
5068
            /** @noinspection PhpComposerExtensionStubsInspection */
5069 5
            $code = \IntlChar::ord($chr);
5070 5
            if ($code) {
5071 5
                return $CHAR_CACHE[$cache_key] = $code;
5072
            }
5073
        }
5074
5075
        //
5076
        // fallback via vanilla php
5077
        //
5078
5079
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5080 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5081
        /** @noinspection OffsetOperationsInspection */
5082 1
        $code = $chr ? $chr[1] : 0;
5083
5084
        /** @noinspection OffsetOperationsInspection */
5085 1
        if ($code >= 0xF0 && isset($chr[4])) {
5086
            /** @noinspection UnnecessaryCastingInspection */
5087
            /** @noinspection OffsetOperationsInspection */
5088
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5089
        }
5090
5091
        /** @noinspection OffsetOperationsInspection */
5092 1
        if ($code >= 0xE0 && isset($chr[3])) {
5093
            /** @noinspection UnnecessaryCastingInspection */
5094
            /** @noinspection OffsetOperationsInspection */
5095 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5096
        }
5097
5098
        /** @noinspection OffsetOperationsInspection */
5099 1
        if ($code >= 0xC0 && isset($chr[2])) {
5100
            /** @noinspection UnnecessaryCastingInspection */
5101
            /** @noinspection OffsetOperationsInspection */
5102 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5103
        }
5104
5105
        return $CHAR_CACHE[$cache_key] = $code;
5106
    }
5107
5108
    /**
5109
     * Parses the string into an array (into the the second parameter).
5110
     *
5111
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5112
     *          if the second parameter is not set!
5113
     *
5114
     * EXAMPLE: <code>
5115
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5116
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5117
     * </code>
5118
     *
5119
     * @see http://php.net/manual/en/function.parse-str.php
5120
     *
5121
     * @param string $str        <p>The input string.</p>
5122
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5123
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5124
     *
5125
     * @psalm-pure
5126
     *
5127
     * @return bool
5128
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5129
     */
5130
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5131
    {
5132 2
        if ($clean_utf8) {
5133 2
            $str = self::clean($str);
5134
        }
5135
5136 2
        if (self::$SUPPORT['mbstring'] === true) {
5137 2
            $return = \mb_parse_str($str, $result);
5138
5139 2
            return $return !== false && $result !== [];
5140
        }
5141
5142
        /**
5143
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5144
         */
5145
        \parse_str($str, $result);
5146
5147
        return $result !== [];
5148
    }
5149
5150
    /**
5151
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5152
     *
5153
     * @psalm-pure
5154
     *
5155
     * @return bool
5156
     *              <p>
5157
     *              <strong>true</strong> if support is available,<br>
5158
     *              <strong>false</strong> otherwise
5159
     *              </p>
5160
     */
5161
    public static function pcre_utf8_support(): bool
5162
    {
5163
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5164
        return (bool) @\preg_match('//u', '');
5165
    }
5166
5167
    /**
5168
     * Create an array containing a range of UTF-8 characters.
5169
     *
5170
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5171
     *
5172
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5173
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5174
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5175
     *                              "is_numeric"</p>
5176
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5177
     * @param float|int  $step      [optional] <p>
5178
     *                              If a step value is given, it will be used as the
5179
     *                              increment between elements in the sequence. step
5180
     *                              should be given as a positive number. If not specified,
5181
     *                              step will default to 1.
5182
     *                              </p>
5183
     *
5184
     * @psalm-pure
5185
     *
5186
     * @return string[]
5187
     */
5188
    public static function range(
5189
        $var1,
5190
        $var2,
5191
        bool $use_ctype = true,
5192
        string $encoding = 'UTF-8',
5193
        $step = 1
5194
    ): array {
5195 2
        if (!$var1 || !$var2) {
5196 2
            return [];
5197
        }
5198
5199 2
        if ($step !== 1) {
5200
            /**
5201
             * @psalm-suppress RedundantConditionGivenDocblockType
5202
             * @psalm-suppress DocblockTypeContradiction
5203
             */
5204 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5205
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5206
            }
5207
5208
            /**
5209
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5210
             */
5211 1
            if ($step <= 0) {
5212
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5213
            }
5214
        }
5215
5216 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5217
            throw new \RuntimeException('ext-ctype: is not installed');
5218
        }
5219
5220 2
        $is_digit = false;
5221 2
        $is_xdigit = false;
5222
5223
        /** @noinspection PhpComposerExtensionStubsInspection */
5224 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5225 2
            $is_digit = true;
5226 2
            $start = (int) $var1;
5227 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5228
            $is_xdigit = true;
5229
            $start = (int) self::hex_to_int((string) $var1);
5230 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5231 1
            $start = (int) $var1;
5232
        } else {
5233 2
            $start = self::ord((string) $var1);
5234
        }
5235
5236 2
        if (!$start) {
5237
            return [];
5238
        }
5239
5240 2
        if ($is_digit) {
5241 2
            $end = (int) $var2;
5242 2
        } elseif ($is_xdigit) {
5243
            $end = (int) self::hex_to_int((string) $var2);
5244 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5245 1
            $end = (int) $var2;
5246
        } else {
5247 2
            $end = self::ord((string) $var2);
5248
        }
5249
5250 2
        if (!$end) {
5251
            return [];
5252
        }
5253
5254 2
        $array = [];
5255 2
        foreach (\range($start, $end, $step) as $i) {
5256 2
            $array[] = (string) self::chr((int) $i, $encoding);
5257
        }
5258
5259 2
        return $array;
5260
    }
5261
5262
    /**
5263
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5264
     *
5265
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5266
     *
5267
     * e.g:
5268
     * 'test+test'                     => 'test+test'
5269
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5270
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5271
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5272
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5273
     * 'Düsseldorf'                   => 'Düsseldorf'
5274
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5275
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5276
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5277
     *
5278
     * @param string $str          <p>The input string.</p>
5279
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5280
     *
5281
     * @psalm-pure
5282
     *
5283
     * @return string
5284
     *                <p>The decoded URL, as a string.</p>
5285
     */
5286
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5287
    {
5288 7
        if ($str === '') {
5289 4
            return '';
5290
        }
5291
5292
        $str = self::urldecode_unicode_helper($str);
5293 7
5294
        if ($multi_decode) {
5295 7
            do {
5296
                $str_compare = $str;
5297 7
5298
                /**
5299 7
                 * @psalm-suppress PossiblyInvalidArgument
5300
                 */
5301 4
                $str = \rawurldecode(
5302
                    self::html_entity_decode(
5303
                        self::to_utf8($str),
5304 7
                        \ENT_QUOTES | \ENT_HTML5
5305
                    )
5306 7
                );
5307
            } while ($str_compare !== $str);
5308 6
        } else {
5309
            /**
5310
             * @psalm-suppress PossiblyInvalidArgument
5311
             */
5312
            $str = \rawurldecode(
5313 6
                self::html_entity_decode(
5314 6
                    self::to_utf8($str),
5315 6
                    \ENT_QUOTES | \ENT_HTML5
5316 6
                )
5317 6
            );
5318
        }
5319
5320
        return self::fix_simple_utf8($str);
5321 6
    }
5322
5323
    /**
5324
     * Replaces all occurrences of $pattern in $str by $replacement.
5325
     *
5326 1
     * @param string $str         <p>The input string.</p>
5327 1
     * @param string $pattern     <p>The regular expression pattern.</p>
5328 1
     * @param string $replacement <p>The string to replace with.</p>
5329 1
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5330 1
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5331
     *
5332
     * @psalm-pure
5333
     *
5334
     * @return string
5335
     */
5336 7
    public static function regex_replace(
5337
        string $str,
5338
        string $pattern,
5339
        string $replacement,
5340
        string $options = '',
5341
        string $delimiter = '/'
5342
    ): string {
5343
        if ($options === 'msr') {
5344
            $options = 'ms';
5345
        }
5346
5347
        // fallback
5348
        if (!$delimiter) {
5349
            $delimiter = '/';
5350
        }
5351
5352
        return (string) \preg_replace(
5353
            $delimiter . $pattern . $delimiter . 'u' . $options,
5354
            $replacement,
5355
            $str
5356
        );
5357
    }
5358
5359 18
    /**
5360 9
     * alias for "UTF8::remove_bom()"
5361
     *
5362
     * @param string $str
5363
     *
5364 18
     * @psalm-pure
5365
     *
5366
     * @return string
5367
     *
5368 18
     * @see        UTF8::remove_bom()
5369 18
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5370 18
     */
5371 18
    public static function removeBOM(string $str): string
5372
    {
5373
        return self::remove_bom($str);
5374
    }
5375
5376
    /**
5377
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5378
     *
5379
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5380
     *
5381
     * @param string $str <p>The input string.</p>
5382
     *
5383
     * @psalm-pure
5384
     *
5385
     * @return string
5386
     *                <p>A string without UTF-BOM.</p>
5387
     */
5388
    public static function remove_bom(string $str): string
5389 1
    {
5390
        if ($str === '') {
5391
            return '';
5392
        }
5393
5394
        $str_length = \strlen($str);
5395
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5396
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5397
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5398
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5399
                if ($str_tmp === false) {
5400
                    return '';
5401
                }
5402
5403
                $str_length -= (int) $bom_byte_length;
5404
5405
                $str = (string) $str_tmp;
5406 55
            }
5407 9
        }
5408
5409
        return $str;
5410 55
    }
5411 55
5412 55
    /**
5413
     * Removes duplicate occurrences of a string in another string.
5414 11
     *
5415 11
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5416
     *
5417
     * @param string          $str  <p>The base string.</p>
5418
     * @param string|string[] $what <p>String to search for in the base string.</p>
5419 11
     *
5420
     * @psalm-pure
5421 55
     *
5422
     * @return string
5423
     *                <p>A string with removed duplicates.</p>
5424
     */
5425 55
    public static function remove_duplicates(string $str, $what = ' '): string
5426
    {
5427
        if (\is_string($what)) {
5428
            $what = [$what];
5429
        }
5430
5431
        /**
5432
         * @psalm-suppress RedundantConditionGivenDocblockType
5433
         */
5434
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5435
            foreach ($what as $item) {
5436
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5437
            }
5438
        }
5439
5440
        return $str;
5441
    }
5442
5443 2
    /**
5444 2
     * Remove html via "strip_tags()" from the string.
5445
     *
5446
     * @param string $str            <p>The input string.</p>
5447
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5448
     *                               should not be stripped. Default: null
5449
     *                               </p>
5450 2
     *
5451 2
     * @psalm-pure
5452 2
     *
5453
     * @return string
5454
     *                <p>A string with without html tags.</p>
5455
     */
5456 2
    public static function remove_html(string $str, string $allowable_tags = ''): string
5457
    {
5458
        return \strip_tags($str, $allowable_tags);
5459
    }
5460
5461
    /**
5462
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5463
     *
5464
     * @param string $str         <p>The input string.</p>
5465
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5466
     *
5467
     * @psalm-pure
5468
     *
5469
     * @return string
5470
     *                <p>A string without breaks.</p>
5471
     */
5472
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5473
    {
5474 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5475
    }
5476
5477
    /**
5478
     * Remove invisible characters from a string.
5479
     *
5480
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5481
     *
5482
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5483
     *
5484
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5485
     *
5486
     * @param string $str                           <p>The input string.</p>
5487
     * @param bool   $url_encoded                   [optional] <p>
5488
     *                                              Try to remove url encoded control character.
5489
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5490 6
     *                                              <br>
5491
     *                                              Default: false
5492
     *                                              </p>
5493
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5494
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5495
     *
5496
     * @psalm-pure
5497
     *
5498
     * @return string
5499
     *                <p>A string without invisible chars.</p>
5500
     */
5501
    public static function remove_invisible_characters(
5502
        string $str,
5503
        bool $url_encoded = false,
5504
        string $replacement = '',
5505
        bool $keep_basic_control_characters = true
5506
    ): string {
5507
        return ASCII::remove_invisible_characters(
5508
            $str,
5509
            $url_encoded,
5510
            $replacement,
5511
            $keep_basic_control_characters
5512
        );
5513
    }
5514
5515
    /**
5516
     * Returns a new string with the prefix $substring removed, if present.
5517
     *
5518
     * @param string $str       <p>The input string.</p>
5519
     * @param string $substring <p>The prefix to remove.</p>
5520
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5521
     *
5522
     * @psalm-pure
5523 92
     *
5524 92
     * @return string
5525 92
     *                <p>A string without the prefix $substring.</p>
5526 92
     */
5527 92
    public static function remove_left(
5528
        string $str,
5529
        string $substring,
5530
        string $encoding = 'UTF-8'
5531
    ): string {
5532
        if (
5533
            $substring
5534
            &&
5535
            \strpos($str, $substring) === 0
5536
        ) {
5537
            if ($encoding === 'UTF-8') {
5538
                return (string) \mb_substr(
5539
                    $str,
5540
                    (int) \mb_strlen($substring)
5541
                );
5542
            }
5543
5544
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5545
5546
            return (string) self::substr(
5547
                $str,
5548
                (int) self::strlen($substring, $encoding),
5549 12
                null,
5550
                $encoding
5551 12
            );
5552
        }
5553 6
5554 4
        return $str;
5555 4
    }
5556 4
5557
    /**
5558
     * Returns a new string with the suffix $substring removed, if present.
5559
     *
5560 2
     * @param string $str
5561
     * @param string $substring <p>The suffix to remove.</p>
5562 2
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5563 2
     *
5564 2
     * @psalm-pure
5565 2
     *
5566 2
     * @return string
5567
     *                <p>A string having a $str without the suffix $substring.</p>
5568
     */
5569
    public static function remove_right(
5570 6
        string $str,
5571
        string $substring,
5572
        string $encoding = 'UTF-8'
5573
    ): string {
5574
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5575
            if ($encoding === 'UTF-8') {
5576
                return (string) \mb_substr(
5577
                    $str,
5578
                    0,
5579
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5580
                );
5581
            }
5582
5583
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5584
5585
            return (string) self::substr(
5586
                $str,
5587
                0,
5588
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5589
                $encoding
5590 12
            );
5591 6
        }
5592 4
5593 4
        return $str;
5594 4
    }
5595 4
5596
    /**
5597
     * Replaces all occurrences of $search in $str by $replacement.
5598
     *
5599 2
     * @param string $str            <p>The input string.</p>
5600
     * @param string $search         <p>The needle to search for.</p>
5601 2
     * @param string $replacement    <p>The string to replace with.</p>
5602 2
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5603 2
     *
5604 2
     * @psalm-pure
5605 2
     *
5606
     * @return string
5607
     *                <p>A string with replaced parts.</p>
5608
     */
5609 6
    public static function replace(
5610
        string $str,
5611
        string $search,
5612
        string $replacement,
5613
        bool $case_sensitive = true
5614
    ): string {
5615
        if ($case_sensitive) {
5616
            return \str_replace($search, $replacement, $str);
5617
        }
5618
5619
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5620
    }
5621
5622
    /**
5623
     * Replaces all occurrences of $search in $str by $replacement.
5624
     *
5625
     * @param string       $str            <p>The input string.</p>
5626
     * @param array        $search         <p>The elements to search for.</p>
5627
     * @param array|string $replacement    <p>The string to replace with.</p>
5628
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5629
     *
5630
     * @psalm-pure
5631 29
     *
5632 22
     * @return string
5633
     *                <p>A string with replaced parts.</p>
5634
     */
5635 7
    public static function replace_all(
5636
        string $str,
5637
        array $search,
5638
        $replacement,
5639
        bool $case_sensitive = true
5640
    ): string {
5641
        if ($case_sensitive) {
5642
            return \str_replace($search, $replacement, $str);
5643
        }
5644
5645
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5646
    }
5647
5648
    /**
5649
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5650
     *
5651
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5652
     *
5653
     * @param string $str                        <p>The input string</p>
5654
     * @param string $replacement_char           <p>The replacement character.</p>
5655
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5656
     *
5657 30
     * @psalm-pure
5658 23
     *
5659
     * @return string
5660
     *                <p>A string without diamond question marks (�).</p>
5661 7
     */
5662
    public static function replace_diamond_question_mark(
5663
        string $str,
5664
        string $replacement_char = '',
5665
        bool $process_invalid_utf8_chars = true
5666
    ): string {
5667
        if ($str === '') {
5668
            return '';
5669
        }
5670
5671
        if ($process_invalid_utf8_chars) {
5672
            if ($replacement_char === '') {
5673
                $replacement_char_helper = 'none';
5674
            } else {
5675
                $replacement_char_helper = \ord($replacement_char);
5676
            }
5677
5678
            if (self::$SUPPORT['mbstring'] === false) {
5679
                // if there is no native support for "mbstring",
5680
                // then we need to clean the string before ...
5681
                $str = self::clean($str);
5682
            }
5683 35
5684 9
            /**
5685
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5686
             */
5687 35
            $save = \mb_substitute_character();
5688 35
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5689 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5689
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5690
            // the polyfill maybe return false, so cast to string
5691 2
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5692
            \mb_substitute_character($save);
5693
        }
5694 35
5695
        return \str_replace(
5696
            [
5697
                "\xEF\xBF\xBD",
5698
                '�',
5699
            ],
5700
            [
5701
                $replacement_char,
5702
                $replacement_char,
5703 35
            ],
5704
            $str
5705 35
        );
5706
    }
5707 35
5708 35
    /**
5709
     * Strip whitespace or other characters from the end of a UTF-8 string.
5710
     *
5711 35
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5712
     *
5713 35
     * @param string      $str   <p>The string to be trimmed.</p>
5714
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5715
     *
5716
     * @psalm-pure
5717 35
     *
5718 35
     * @return string
5719
     *                <p>A string with unwanted characters stripped from the right.</p>
5720 35
     */
5721
    public static function rtrim(string $str = '', string $chars = null): string
5722
    {
5723
        if ($str === '') {
5724
            return '';
5725
        }
5726
5727
        if (self::$SUPPORT['mbstring'] === true) {
5728
            if ($chars !== null) {
5729
                /** @noinspection PregQuoteUsageInspection */
5730
                $chars = \preg_quote($chars);
5731
                $pattern = "[${chars}]+$";
5732
            } else {
5733
                $pattern = '[\\s]+$';
5734
            }
5735
5736
            /** @noinspection PhpComposerExtensionStubsInspection */
5737
            return (string) \mb_ereg_replace($pattern, '', $str);
5738
        }
5739 21
5740 3
        if ($chars !== null) {
5741
            $chars = \preg_quote($chars, '/');
5742
            $pattern = "[${chars}]+$";
5743 20
        } else {
5744 20
            $pattern = '[\\s]+$';
5745
        }
5746 9
5747 9
        return self::regex_replace($str, $pattern, '');
5748
    }
5749 14
5750
    /**
5751
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5752
     *
5753 20
     * @param bool $useEcho
5754
     *
5755
     * @psalm-pure
5756
     *
5757
     * @return string|void
5758
     */
5759
    public static function showSupport(bool $useEcho = true)
5760
    {
5761
        // init
5762
        $html = '';
5763
5764
        $html .= '<pre>';
5765
        /** @noinspection AlterInForeachInspection */
5766
        foreach (self::$SUPPORT as $key => &$value) {
5767
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5768
        }
5769
        $html .= '</pre>';
5770
5771
        if ($useEcho) {
5772
            echo $html;
5773
        }
5774
5775
        return $html;
5776
    }
5777
5778 2
    /**
5779
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5780 2
     *
5781
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5782 2
     *
5783 2
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5784
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5785 2
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5786
     *
5787 2
     * @psalm-pure
5788 1
     *
5789
     * @return string
5790
     *                <p>The HTML numbered entity for the given character.</p>
5791 2
     */
5792
    public static function single_chr_html_encode(
5793
        string $char,
5794
        bool $keep_ascii_chars = false,
5795
        string $encoding = 'UTF-8'
5796
    ): string {
5797
        if ($char === '') {
5798
            return '';
5799
        }
5800
5801
        if (
5802
            $keep_ascii_chars
5803
            &&
5804
            ASCII::is_ascii($char)
5805
        ) {
5806
            return $char;
5807
        }
5808
5809
        return '&#' . self::ord($char, $encoding) . ';';
5810
    }
5811
5812
    /**
5813 2
     * @param string $str
5814 2
     * @param int    $tab_length
5815
     *
5816
     * @psalm-pure
5817
     *
5818 2
     * @return string
5819
     */
5820 2
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5821
    {
5822 2
        if ($tab_length === 4) {
5823
            $tab = '    ';
5824
        } elseif ($tab_length === 2) {
5825 2
            $tab = '  ';
5826
        } else {
5827
            $tab = \str_repeat(' ', $tab_length);
5828
        }
5829
5830
        return \str_replace($tab, "\t", $str);
5831
    }
5832
5833
    /**
5834
     * alias for "UTF8::str_split()"
5835
     *
5836
     * @param int|string $str
5837
     * @param int        $length
5838 5
     * @param bool       $clean_utf8
5839 3
     *
5840 2
     * @psalm-pure
5841 1
     *
5842
     * @return string[]
5843 1
     *
5844
     * @see        UTF8::str_split()
5845
     * @deprecated <p>please use "UTF8::str_split()"</p>
5846 5
     */
5847
    public static function split(
5848
        $str,
5849
        int $length = 1,
5850
        bool $clean_utf8 = false
5851
    ): array {
5852
        /** @var string[] */
5853
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5854
    }
5855
5856
    /**
5857
     * alias for "UTF8::str_starts_with()"
5858
     *
5859
     * @param string $haystack
5860
     * @param string $needle
5861
     *
5862
     * @psalm-pure
5863
     *
5864
     * @return bool
5865
     *
5866
     * @see        UTF8::str_starts_with()
5867
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5868
     */
5869 9
    public static function str_begins(string $haystack, string $needle): bool
5870
    {
5871
        return self::str_starts_with($haystack, $needle);
5872
    }
5873
5874
    /**
5875
     * Returns a camelCase version of the string. Trims surrounding spaces,
5876
     * capitalizes letters following digits, spaces, dashes and underscores,
5877
     * and removes spaces, dashes, as well as underscores.
5878
     *
5879
     * @param string      $str                           <p>The input string.</p>
5880
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5881
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5882
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5883
     *                                                   tr</p>
5884
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5885
     *                                                   -> ß</p>
5886
     *
5887 1
     * @psalm-pure
5888
     *
5889
     * @return string
5890
     */
5891
    public static function str_camelize(
5892
        string $str,
5893
        string $encoding = 'UTF-8',
5894
        bool $clean_utf8 = false,
5895
        string $lang = null,
5896
        bool $try_to_keep_the_string_length = false
5897
    ): string {
5898
        if ($clean_utf8) {
5899
            $str = self::clean($str);
5900
        }
5901
5902
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5903
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5904
        }
5905
5906
        $str = self::lcfirst(
5907
            \trim($str),
5908
            $encoding,
5909
            false,
5910
            $lang,
5911
            $try_to_keep_the_string_length
5912
        );
5913
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5914 32
5915
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5916
5917
        $str = (string) \preg_replace_callback(
5918 32
            '/[-_\\s]+(.)?/u',
5919 26
            /**
5920
             * @param array $match
5921
             *
5922 32
             * @psalm-pure
5923 32
             *
5924 32
             * @return string
5925 32
             */
5926 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5927 32
                if (isset($match[1])) {
5928
                    if ($use_mb_functions) {
5929 32
                        if ($encoding === 'UTF-8') {
5930
                            return \mb_strtoupper($match[1]);
5931 32
                        }
5932
5933 32
                        return \mb_strtoupper($match[1], $encoding);
5934 32
                    }
5935
5936
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5937
                }
5938
5939
                return '';
5940
            },
5941
            $str
5942
        );
5943 27
5944 27
        return (string) \preg_replace_callback(
5945 27
            '/[\\p{N}]+(.)?/u',
5946 27
            /**
5947
             * @param array $match
5948
             *
5949
             * @psalm-pure
5950
             *
5951
             * @return string
5952
             */
5953
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5954
                if ($use_mb_functions) {
5955 1
                    if ($encoding === 'UTF-8') {
5956 32
                        return \mb_strtoupper($match[0]);
5957 32
                    }
5958
5959
                    return \mb_strtoupper($match[0], $encoding);
5960 32
                }
5961 32
5962
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5963
            },
5964
            $str
5965
        );
5966
    }
5967
5968
    /**
5969
     * Returns the string with the first letter of each word capitalized,
5970 6
     * except for when the word is a name which shouldn't be capitalized.
5971 6
     *
5972 6
     * @param string $str
5973
     *
5974
     * @psalm-pure
5975
     *
5976
     * @return string
5977
     *                <p>A string with $str capitalized.</p>
5978
     */
5979 32
    public static function str_capitalize_name(string $str): string
5980 32
    {
5981
        return self::str_capitalize_name_helper(
5982
            self::str_capitalize_name_helper(
5983
                self::collapse_whitespace($str),
5984
                ' '
5985
            ),
5986
            '-'
5987
        );
5988
    }
5989
5990
    /**
5991
     * Returns true if the string contains $needle, false otherwise. By default
5992
     * the comparison is case-sensitive, but can be made insensitive by setting
5993
     * $case_sensitive to false.
5994
     *
5995
     * @param string $haystack       <p>The input string.</p>
5996
     * @param string $needle         <p>Substring to look for.</p>
5997 1
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5998 1
     *
5999 1
     * @psalm-pure
6000 1
     *
6001
     * @return bool
6002 1
     *              <p>Whether or not $haystack contains $needle.</p>
6003
     */
6004
    public static function str_contains(
6005
        string $haystack,
6006
        string $needle,
6007
        bool $case_sensitive = true
6008
    ): bool {
6009
        if ($case_sensitive) {
6010
            if (\PHP_VERSION_ID >= 80000) {
6011
                /** @phpstan-ignore-next-line - only for PHP8 */
6012
                return \str_contains($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_contains was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6012
                return /** @scrutinizer ignore-call */ \str_contains($haystack, $needle);
Loading history...
6013
            }
6014
6015
            return \strpos($haystack, $needle) !== false;
6016
        }
6017
6018
        return \mb_stripos($haystack, $needle) !== false;
6019
    }
6020
6021
    /**
6022
     * Returns true if the string contains all $needles, false otherwise. By
6023
     * default the comparison is case-sensitive, but can be made insensitive by
6024
     * setting $case_sensitive to false.
6025 21
     *
6026 11
     * @param string $haystack       <p>The input string.</p>
6027
     * @param array  $needles        <p>SubStrings to look for.</p>
6028
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6029
     *
6030
     * @psalm-pure
6031 11
     *
6032
     * @return bool
6033
     *              <p>Whether or not $haystack contains $needle.</p>
6034 10
     */
6035
    public static function str_contains_all(
6036
        string $haystack,
6037
        array $needles,
6038
        bool $case_sensitive = true
6039
    ): bool {
6040
        if ($haystack === '' || $needles === []) {
6041
            return false;
6042
        }
6043
6044
        /** @noinspection LoopWhichDoesNotLoopInspection */
6045
        foreach ($needles as &$needle) {
6046
            if ($case_sensitive) {
6047
                /** @noinspection NestedPositiveIfStatementsInspection */
6048
                if (!$needle || \strpos($haystack, $needle) === false) {
6049
                    return false;
6050
                }
6051
            }
6052
6053
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6054
                return false;
6055
            }
6056 45
        }
6057 1
6058
        return true;
6059
    }
6060
6061 44
    /**
6062 44
     * Returns true if the string contains any $needles, false otherwise. By
6063
     * default the comparison is case-sensitive, but can be made insensitive by
6064 24
     * setting $case_sensitive to false.
6065 12
     *
6066
     * @param string $haystack       <p>The input string.</p>
6067
     * @param array  $needles        <p>SubStrings to look for.</p>
6068
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6069 33
     *
6070 33
     * @psalm-pure
6071
     *
6072
     * @return bool
6073
     *              <p>Whether or not $str contains $needle.</p>
6074 24
     */
6075
    public static function str_contains_any(
6076
        string $haystack,
6077
        array $needles,
6078
        bool $case_sensitive = true
6079
    ): bool {
6080
        if ($haystack === '' || $needles === []) {
6081
            return false;
6082
        }
6083
6084
        /** @noinspection LoopWhichDoesNotLoopInspection */
6085
        foreach ($needles as &$needle) {
6086
            if (!$needle) {
6087
                continue;
6088
            }
6089
6090
            if ($case_sensitive) {
6091
                if (\strpos($haystack, $needle) !== false) {
6092
                    return true;
6093
                }
6094
6095
                continue;
6096 46
            }
6097 1
6098
            if (\mb_stripos($haystack, $needle) !== false) {
6099
                return true;
6100
            }
6101 45
        }
6102 45
6103
        return false;
6104
    }
6105
6106 45
    /**
6107 25
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6108 14
     * inserted before uppercase characters (with the exception of the first
6109
     * character of the string), and in place of spaces as well as underscores.
6110
     *
6111 13
     * @param string $str      <p>The input string.</p>
6112
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6113
     *
6114 20
     * @psalm-pure
6115 20
     *
6116
     * @return string
6117
     */
6118
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6119 19
    {
6120
        return self::str_delimit($str, '-', $encoding);
6121
    }
6122
6123
    /**
6124
     * Returns a lowercase and trimmed string separated by the given delimiter.
6125
     * Delimiters are inserted before uppercase characters (with the exception
6126
     * of the first character of the string), and in place of spaces, dashes,
6127
     * and underscores. Alpha delimiters are not converted to lowercase.
6128
     *
6129
     * @param string      $str                           <p>The input string.</p>
6130
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6131
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6132
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6133
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6134
     *                                                   tr</p>
6135
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6136 19
     *                                                   ß</p>
6137
     *
6138
     * @psalm-pure
6139
     *
6140
     * @return string
6141
     */
6142
    public static function str_delimit(
6143
        string $str,
6144
        string $delimiter,
6145
        string $encoding = 'UTF-8',
6146
        bool $clean_utf8 = false,
6147
        string $lang = null,
6148
        bool $try_to_keep_the_string_length = false
6149
    ): string {
6150
        if (self::$SUPPORT['mbstring'] === true) {
6151
            /** @noinspection PhpComposerExtensionStubsInspection */
6152
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6153
6154
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6155
            if ($use_mb_functions && $encoding === 'UTF-8') {
6156
                $str = \mb_strtolower($str);
6157
            } else {
6158
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6159
            }
6160
6161
            /** @noinspection PhpComposerExtensionStubsInspection */
6162
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6163
        }
6164
6165
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6166 49
6167
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6168 49
        if ($use_mb_functions && $encoding === 'UTF-8') {
6169
            $str = \mb_strtolower($str);
6170 49
        } else {
6171 49
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6172 22
        }
6173
6174 27
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6175
    }
6176
6177
    /**
6178 49
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6179
     *
6180
     * EXAMPLE: <code>
6181
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6182
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6183
     * </code>
6184
     *
6185
     * @param string $str <p>The input string.</p>
6186
     *
6187
     * @psalm-pure
6188
     *
6189
     * @return false|string
6190
     *                      <p>
6191
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6192
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6193
     *                      </p>
6194
     */
6195
    public static function str_detect_encoding($str)
6196
    {
6197
        // init
6198
        $str = (string) $str;
6199
6200
        //
6201
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6202
        //
6203
6204
        if (self::is_binary($str, true)) {
6205
            $is_utf32 = self::is_utf32($str, false);
6206
            if ($is_utf32 === 1) {
6207
                return 'UTF-32LE';
6208
            }
6209
            if ($is_utf32 === 2) {
6210
                return 'UTF-32BE';
6211
            }
6212
6213
            $is_utf16 = self::is_utf16($str, false);
6214 31
            if ($is_utf16 === 1) {
6215
                return 'UTF-16LE';
6216
            }
6217
            if ($is_utf16 === 2) {
6218
                return 'UTF-16BE';
6219
            }
6220 31
6221 11
            // is binary but not "UTF-16" or "UTF-32"
6222 11
            return false;
6223
        }
6224
6225 11
        //
6226 1
        // 2.) simple check for ASCII chars
6227
        //
6228
6229 11
        if (ASCII::is_ascii($str)) {
6230 11
            return 'ASCII';
6231 3
        }
6232
6233 11
        //
6234 2
        // 3.) simple check for UTF-8 chars
6235
        //
6236
6237
        if (self::is_utf8_string($str)) {
6238 9
            return 'UTF-8';
6239
        }
6240
6241
        //
6242
        // 4.) check via "mb_detect_encoding()"
6243
        //
6244
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6245 27
6246 10
        $encoding_detecting_order = [
6247
            'ISO-8859-1',
6248
            'ISO-8859-2',
6249
            'ISO-8859-3',
6250
            'ISO-8859-4',
6251
            'ISO-8859-5',
6252
            'ISO-8859-6',
6253 27
            'ISO-8859-7',
6254 19
            'ISO-8859-8',
6255
            'ISO-8859-9',
6256
            'ISO-8859-10',
6257
            'ISO-8859-13',
6258
            'ISO-8859-14',
6259
            'ISO-8859-15',
6260
            'ISO-8859-16',
6261
            'WINDOWS-1251',
6262
            'WINDOWS-1252',
6263 16
            'WINDOWS-1254',
6264
            'CP932',
6265
            'CP936',
6266
            'CP950',
6267
            'CP866',
6268
            'CP850',
6269
            'CP51932',
6270
            'CP50220',
6271
            'CP50221',
6272
            'CP50222',
6273
            'ISO-2022-JP',
6274
            'ISO-2022-KR',
6275
            'JIS',
6276
            'JIS-ms',
6277
            'EUC-CN',
6278
            'EUC-JP',
6279
        ];
6280
6281
        if (self::$SUPPORT['mbstring'] === true) {
6282
            // info: do not use the symfony polyfill here
6283
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6284
            if ($encoding) {
6285
                return $encoding;
6286
            }
6287
        }
6288
6289
        //
6290
        // 5.) check via "iconv()"
6291
        //
6292
6293
        if (self::$ENCODINGS === null) {
6294
            self::$ENCODINGS = self::getData('encodings');
6295
        }
6296
6297 16
        foreach (self::$ENCODINGS as $encoding_tmp) {
6298
            // INFO: //IGNORE but still throw notice
6299 16
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6300 16
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6301 16
                return $encoding_tmp;
6302
            }
6303
        }
6304
6305
        return false;
6306
    }
6307
6308
    /**
6309
     * alias for "UTF8::str_ends_with()"
6310
     *
6311
     * @param string $haystack
6312
     * @param string $needle
6313
     *
6314
     * @psalm-pure
6315
     *
6316
     * @return bool
6317
     *
6318
     * @see        UTF8::str_ends_with()
6319
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6320
     */
6321
    public static function str_ends(string $haystack, string $needle): bool
6322
    {
6323
        return self::str_ends_with($haystack, $needle);
6324
    }
6325
6326
    /**
6327
     * Check if the string ends with the given substring.
6328
     *
6329
     * EXAMPLE: <code>
6330
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6331
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6332
     * </code>
6333
     *
6334
     * @param string $haystack <p>The string to search in.</p>
6335
     * @param string $needle   <p>The substring to search for.</p>
6336
     *
6337
     * @psalm-pure
6338
     *
6339 1
     * @return bool
6340
     */
6341
    public static function str_ends_with(string $haystack, string $needle): bool
6342
    {
6343
        if ($needle === '') {
6344
            return true;
6345
        }
6346
6347
        if ($haystack === '') {
6348
            return false;
6349
        }
6350
6351
        if (\PHP_VERSION_ID >= 80000) {
6352
            /** @phpstan-ignore-next-line - only for PHP8 */
6353
            return \str_ends_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_ends_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

6353
            return /** @scrutinizer ignore-call */ \str_ends_with($haystack, $needle);
Loading history...
6354
        }
6355
6356
        return \substr($haystack, -\strlen($needle)) === $needle;
6357
    }
6358
6359 9
    /**
6360 2
     * Returns true if the string ends with any of $substrings, false otherwise.
6361
     *
6362
     * - case-sensitive
6363 9
     *
6364
     * @param string   $str        <p>The input string.</p>
6365
     * @param string[] $substrings <p>Substrings to look for.</p>
6366
     *
6367 9
     * @psalm-pure
6368
     *
6369
     * @return bool
6370
     *              <p>Whether or not $str ends with $substring.</p>
6371
     */
6372 9
    public static function str_ends_with_any(string $str, array $substrings): bool
6373
    {
6374
        if ($substrings === []) {
6375
            return false;
6376
        }
6377
6378
        foreach ($substrings as &$substring) {
6379
            if (\substr($str, -\strlen($substring)) === $substring) {
6380
                return true;
6381
            }
6382
        }
6383
6384
        return false;
6385
    }
6386
6387
    /**
6388
     * Ensures that the string begins with $substring. If it doesn't, it's
6389
     * prepended.
6390 7
     *
6391
     * @param string $str       <p>The input string.</p>
6392
     * @param string $substring <p>The substring to add if not present.</p>
6393
     *
6394 7
     * @psalm-pure
6395 7
     *
6396 7
     * @return string
6397
     */
6398
    public static function str_ensure_left(string $str, string $substring): string
6399
    {
6400 6
        if (
6401
            $substring !== ''
6402
            &&
6403
            \strpos($str, $substring) === 0
6404
        ) {
6405
            return $str;
6406
        }
6407
6408
        return $substring . $str;
6409
    }
6410
6411
    /**
6412
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6413
     *
6414
     * @param string $str       <p>The input string.</p>
6415
     * @param string $substring <p>The substring to add if not present.</p>
6416
     *
6417 10
     * @psalm-pure
6418
     *
6419 10
     * @return string
6420
     */
6421 6
    public static function str_ensure_right(string $str, string $substring): string
6422
    {
6423
        if (
6424 4
            $str === ''
6425
            ||
6426
            $substring === ''
6427
            ||
6428
            \substr($str, -\strlen($substring)) !== $substring
6429
        ) {
6430
            $str .= $substring;
6431
        }
6432
6433
        return $str;
6434
    }
6435
6436
    /**
6437
     * Capitalizes the first word of the string, replaces underscores with
6438
     * spaces, and strips '_id'.
6439
     *
6440 10
     * @param string $str
6441
     *
6442 10
     * @psalm-pure
6443
     *
6444 10
     * @return string
6445
     */
6446 4
    public static function str_humanize($str): string
6447
    {
6448
        $str = \str_replace(
6449 10
            [
6450
                '_id',
6451
                '_',
6452
            ],
6453
            [
6454
                '',
6455
                ' ',
6456
            ],
6457
            $str
6458
        );
6459
6460
        return self::ucfirst(\trim($str));
6461
    }
6462
6463
    /**
6464 3
     * alias for "UTF8::str_istarts_with()"
6465
     *
6466 3
     * @param string $haystack
6467
     * @param string $needle
6468
     *
6469
     * @psalm-pure
6470 3
     *
6471
     * @return bool
6472
     *
6473 3
     * @see        UTF8::str_istarts_with()
6474
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6475
     */
6476 3
    public static function str_ibegins(string $haystack, string $needle): bool
6477
    {
6478
        return self::str_istarts_with($haystack, $needle);
6479
    }
6480
6481
    /**
6482
     * alias for "UTF8::str_iends_with()"
6483
     *
6484
     * @param string $haystack
6485
     * @param string $needle
6486
     *
6487
     * @psalm-pure
6488
     *
6489
     * @return bool
6490
     *
6491
     * @see        UTF8::str_iends_with()
6492
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6493
     */
6494 1
    public static function str_iends(string $haystack, string $needle): bool
6495
    {
6496
        return self::str_iends_with($haystack, $needle);
6497
    }
6498
6499
    /**
6500
     * Check if the string ends with the given substring, case-insensitive.
6501
     *
6502
     * EXAMPLE: <code>
6503
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6504
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6505
     * </code>
6506
     *
6507
     * @param string $haystack <p>The string to search in.</p>
6508
     * @param string $needle   <p>The substring to search for.</p>
6509
     *
6510
     * @psalm-pure
6511
     *
6512 1
     * @return bool
6513
     */
6514
    public static function str_iends_with(string $haystack, string $needle): bool
6515
    {
6516
        if ($needle === '') {
6517
            return true;
6518
        }
6519
6520
        if ($haystack === '') {
6521
            return false;
6522
        }
6523
6524
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6525
    }
6526
6527
    /**
6528
     * Returns true if the string ends with any of $substrings, false otherwise.
6529
     *
6530
     * - case-insensitive
6531
     *
6532 12
     * @param string   $str        <p>The input string.</p>
6533 2
     * @param string[] $substrings <p>Substrings to look for.</p>
6534
     *
6535
     * @psalm-pure
6536 12
     *
6537
     * @return bool
6538
     *              <p>Whether or not $str ends with $substring.</p>
6539
     */
6540 12
    public static function str_iends_with_any(string $str, array $substrings): bool
6541
    {
6542
        if ($substrings === []) {
6543
            return false;
6544
        }
6545
6546
        foreach ($substrings as &$substring) {
6547
            if (self::str_iends_with($str, $substring)) {
6548
                return true;
6549
            }
6550
        }
6551
6552
        return false;
6553
    }
6554
6555
    /**
6556
     * Returns the index of the first occurrence of $needle in the string,
6557
     * and false if not found. Accepts an optional offset from which to begin
6558 4
     * the search.
6559
     *
6560
     * @param string $str      <p>The input string.</p>
6561
     * @param string $needle   <p>Substring to look for.</p>
6562 4
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6563 4
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6564 4
     *
6565
     * @psalm-pure
6566
     *
6567
     * @return false|int
6568
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6569
     *
6570
     * @see        UTF8::stripos()
6571
     * @deprecated <p>please use "UTF8::stripos()"</p>
6572
     */
6573
    public static function str_iindex_first(
6574
        string $str,
6575
        string $needle,
6576
        int $offset = 0,
6577
        string $encoding = 'UTF-8'
6578
    ) {
6579
        return self::stripos(
6580
            $str,
6581
            $needle,
6582
            $offset,
6583
            $encoding
6584
        );
6585
    }
6586
6587
    /**
6588
     * Returns the index of the last occurrence of $needle in the string,
6589
     * and false if not found. Accepts an optional offset from which to begin
6590
     * the search. Offsets may be negative to count from the last character
6591
     * in the string.
6592
     *
6593
     * @param string $str      <p>The input string.</p>
6594
     * @param string $needle   <p>Substring to look for.</p>
6595 1
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6596 1
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6597 1
     *
6598 1
     * @psalm-pure
6599 1
     *
6600
     * @return false|int
6601
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6602
     *
6603
     * @see        UTF8::strripos()
6604
     * @deprecated <p>please use "UTF8::strripos()"</p>
6605
     */
6606
    public static function str_iindex_last(
6607
        string $str,
6608
        string $needle,
6609
        int $offset = 0,
6610
        string $encoding = 'UTF-8'
6611
    ) {
6612
        return self::strripos(
6613
            $str,
6614
            $needle,
6615
            $offset,
6616
            $encoding
6617
        );
6618
    }
6619
6620
    /**
6621
     * Returns the index of the first occurrence of $needle in the string,
6622
     * and false if not found. Accepts an optional offset from which to begin
6623
     * the search.
6624
     *
6625
     * @param string $str      <p>The input string.</p>
6626
     * @param string $needle   <p>Substring to look for.</p>
6627
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6628 10
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6629 10
     *
6630 10
     * @psalm-pure
6631 10
     *
6632 10
     * @return false|int
6633
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6634
     *
6635
     * @see        UTF8::strpos()
6636
     * @deprecated <p>please use "UTF8::strpos()"</p>
6637
     */
6638
    public static function str_index_first(
6639
        string $str,
6640
        string $needle,
6641
        int $offset = 0,
6642
        string $encoding = 'UTF-8'
6643
    ) {
6644
        return self::strpos(
6645
            $str,
6646
            $needle,
6647
            $offset,
6648
            $encoding
6649
        );
6650
    }
6651
6652
    /**
6653
     * Returns the index of the last occurrence of $needle in the string,
6654
     * and false if not found. Accepts an optional offset from which to begin
6655
     * the search. Offsets may be negative to count from the last character
6656
     * in the string.
6657
     *
6658
     * @param string $str      <p>The input string.</p>
6659
     * @param string $needle   <p>Substring to look for.</p>
6660 11
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6661 11
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6662 11
     *
6663 11
     * @psalm-pure
6664 11
     *
6665
     * @return false|int
6666
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6667
     *
6668
     * @see        UTF8::strrpos()
6669
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6670
     */
6671
    public static function str_index_last(
6672
        string $str,
6673
        string $needle,
6674
        int $offset = 0,
6675
        string $encoding = 'UTF-8'
6676
    ) {
6677
        return self::strrpos(
6678
            $str,
6679
            $needle,
6680
            $offset,
6681
            $encoding
6682
        );
6683
    }
6684
6685
    /**
6686
     * Inserts $substring into the string at the $index provided.
6687
     *
6688
     * @param string $str       <p>The input string.</p>
6689
     * @param string $substring <p>String to be inserted.</p>
6690
     * @param int    $index     <p>The index at which to insert the substring.</p>
6691
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6692
     *
6693 10
     * @psalm-pure
6694 10
     *
6695 10
     * @return string
6696 10
     */
6697 10
    public static function str_insert(
6698
        string $str,
6699
        string $substring,
6700
        int $index,
6701
        string $encoding = 'UTF-8'
6702
    ): string {
6703
        if ($encoding === 'UTF-8') {
6704
            $len = (int) \mb_strlen($str);
6705
            if ($index > $len) {
6706
                return $str;
6707
            }
6708
6709
            /** @noinspection UnnecessaryCastingInspection */
6710
            return (string) \mb_substr($str, 0, $index) .
6711
                   $substring .
6712
                   (string) \mb_substr($str, $index, $len);
6713
        }
6714
6715
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6716
6717
        $len = (int) self::strlen($str, $encoding);
6718
        if ($index > $len) {
6719 8
            return $str;
6720 4
        }
6721 4
6722
        return ((string) self::substr($str, 0, $index, $encoding)) .
6723
               $substring .
6724
               ((string) self::substr($str, $index, $len, $encoding));
6725
    }
6726 4
6727 4
    /**
6728 4
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6729
     *
6730
     * EXAMPLE: <code>
6731 4
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6732
     * </code>
6733 4
     *
6734 4
     * @see http://php.net/manual/en/function.str-ireplace.php
6735 1
     *
6736
     * @param string|string[] $search      <p>
6737
     *                                     Every replacement with search array is
6738 3
     *                                     performed on the result of previous replacement.
6739 3
     *                                     </p>
6740 3
     * @param string|string[] $replacement <p>The replacement.</p>
6741
     * @param string|string[] $subject     <p>
6742
     *                                     If subject is an array, then the search and
6743
     *                                     replace is performed with every entry of
6744
     *                                     subject, and the return value is an array as
6745
     *                                     well.
6746
     *                                     </p>
6747
     * @param int             $count       [optional] <p>
6748
     *                                     The number of matched and replaced needles will
6749
     *                                     be returned in count which is passed by
6750
     *                                     reference.
6751
     *                                     </p>
6752
     *
6753
     * @psalm-pure
6754
     *
6755
     * @return string|string[]
6756
     *                         <p>A string or an array of replacements.</p>
6757
     *
6758
     * @template TStrIReplaceSubject
6759
     * @phpstan-param TStrIReplaceSubject $subject
6760
     * @phpstan-return TStrIReplaceSubject
6761
     */
6762
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6763
    {
6764
        $search = (array) $search;
6765
6766
        /** @noinspection AlterInForeachInspection */
6767
        foreach ($search as &$s) {
6768
            $s = (string) $s;
6769
            if ($s === '') {
6770
                $s = '/^(?<=.)$/';
6771
            } else {
6772
                $s = '/' . \preg_quote($s, '/') . '/ui';
6773
            }
6774
        }
6775
6776
        // fallback
6777
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6778
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6779
            $replacement = '';
6780 29
        }
6781
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6782
        if ($subject === null) {
6783 29
            $subject = '';
6784 29
        }
6785 29
6786 6
        /**
6787
         * @psalm-suppress PossiblyNullArgument
6788 29
         * @phpstan-var TStrIReplaceSubject $subject
6789
         */
6790
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6791
6792
        return $subject;
6793
    }
6794 29
6795 1
    /**
6796
     * Replaces $search from the beginning of string with $replacement.
6797
     *
6798 29
     * @param string $str         <p>The input string.</p>
6799 1
     * @param string $search      <p>The string to search for.</p>
6800
     * @param string $replacement <p>The replacement.</p>
6801
     *
6802
     * @psalm-pure
6803
     *
6804
     * @return string
6805
     *                <p>The string after the replacement.</p>
6806 29
     */
6807
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6808 29
    {
6809
        if ($str === '') {
6810
            if ($replacement === '') {
6811
                return '';
6812
            }
6813
6814
            if ($search === '') {
6815
                return $replacement;
6816
            }
6817
        }
6818
6819
        if ($search === '') {
6820
            return $str . $replacement;
6821
        }
6822
6823
        $searchLength = \strlen($search);
6824
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6825 17
            return $replacement . \substr($str, $searchLength);
6826 4
        }
6827 2
6828
        return $str;
6829
    }
6830 2
6831 2
    /**
6832
     * Replaces $search from the ending of string with $replacement.
6833
     *
6834
     * @param string $str         <p>The input string.</p>
6835 13
     * @param string $search      <p>The string to search for.</p>
6836 2
     * @param string $replacement <p>The replacement.</p>
6837
     *
6838
     * @psalm-pure
6839 11
     *
6840 11
     * @return string
6841 10
     *                <p>The string after the replacement.</p>
6842
     */
6843
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6844 1
    {
6845
        if ($str === '') {
6846
            if ($replacement === '') {
6847
                return '';
6848
            }
6849
6850
            if ($search === '') {
6851
                return $replacement;
6852
            }
6853
        }
6854
6855
        if ($search === '') {
6856
            return $str . $replacement;
6857
        }
6858
6859
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6860
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6861 17
        }
6862 4
6863 2
        return $str;
6864
    }
6865
6866 2
    /**
6867 2
     * Check if the string starts with the given substring, case-insensitive.
6868
     *
6869
     * EXAMPLE: <code>
6870
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6871 13
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6872 2
     * </code>
6873
     *
6874
     * @param string $haystack <p>The string to search in.</p>
6875 11
     * @param string $needle   <p>The substring to search for.</p>
6876 9
     *
6877
     * @psalm-pure
6878
     *
6879 11
     * @return bool
6880
     */
6881
    public static function str_istarts_with(string $haystack, string $needle): bool
6882
    {
6883
        if ($needle === '') {
6884
            return true;
6885
        }
6886
6887
        if ($haystack === '') {
6888
            return false;
6889
        }
6890
6891
        return self::stripos($haystack, $needle) === 0;
6892
    }
6893
6894
    /**
6895
     * Returns true if the string begins with any of $substrings, false otherwise.
6896
     *
6897
     * - case-insensitive
6898
     *
6899 13
     * @param string $str        <p>The input string.</p>
6900 2
     * @param array  $substrings <p>Substrings to look for.</p>
6901
     *
6902
     * @psalm-pure
6903 13
     *
6904
     * @return bool
6905
     *              <p>Whether or not $str starts with $substring.</p>
6906
     */
6907 13
    public static function str_istarts_with_any(string $str, array $substrings): bool
6908
    {
6909
        if ($str === '') {
6910
            return false;
6911
        }
6912
6913
        if ($substrings === []) {
6914
            return false;
6915
        }
6916
6917
        foreach ($substrings as &$substring) {
6918
            if (self::str_istarts_with($str, $substring)) {
6919
                return true;
6920
            }
6921
        }
6922
6923
        return false;
6924
    }
6925 5
6926
    /**
6927
     * Gets the substring after the first occurrence of a separator.
6928
     *
6929 5
     * @param string $str       <p>The input string.</p>
6930
     * @param string $separator <p>The string separator.</p>
6931
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6932
     *
6933 5
     * @psalm-pure
6934 5
     *
6935 5
     * @return string
6936
     */
6937
    public static function str_isubstr_after_first_separator(
6938
        string $str,
6939 1
        string $separator,
6940
        string $encoding = 'UTF-8'
6941
    ): string {
6942
        if ($separator === '' || $str === '') {
6943
            return '';
6944
        }
6945
6946
        $offset = self::stripos($str, $separator);
6947
        if ($offset === false) {
6948
            return '';
6949
        }
6950
6951
        if ($encoding === 'UTF-8') {
6952
            return (string) \mb_substr(
6953
                $str,
6954
                $offset + (int) \mb_strlen($separator)
6955
            );
6956
        }
6957
6958 1
        return (string) self::substr(
6959 1
            $str,
6960
            $offset + (int) self::strlen($separator, $encoding),
6961
            null,
6962 1
            $encoding
6963 1
        );
6964 1
    }
6965
6966
    /**
6967 1
     * Gets the substring after the last occurrence of a separator.
6968 1
     *
6969 1
     * @param string $str       <p>The input string.</p>
6970 1
     * @param string $separator <p>The string separator.</p>
6971
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6972
     *
6973
     * @psalm-pure
6974
     *
6975
     * @return string
6976
     */
6977
    public static function str_isubstr_after_last_separator(
6978
        string $str,
6979
        string $separator,
6980
        string $encoding = 'UTF-8'
6981
    ): string {
6982
        if ($separator === '' || $str === '') {
6983
            return '';
6984
        }
6985
6986
        $offset = self::strripos($str, $separator);
6987
        if ($offset === false) {
6988
            return '';
6989
        }
6990
6991
        if ($encoding === 'UTF-8') {
6992
            return (string) \mb_substr(
6993
                $str,
6994
                $offset + (int) self::strlen($separator)
6995
            );
6996
        }
6997
6998 1
        return (string) self::substr(
6999 1
            $str,
7000
            $offset + (int) self::strlen($separator, $encoding),
7001
            null,
7002 1
            $encoding
7003 1
        );
7004 1
    }
7005
7006
    /**
7007 1
     * Gets the substring before the first occurrence of a separator.
7008 1
     *
7009 1
     * @param string $str       <p>The input string.</p>
7010 1
     * @param string $separator <p>The string separator.</p>
7011
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7012
     *
7013
     * @psalm-pure
7014
     *
7015
     * @return string
7016
     */
7017
    public static function str_isubstr_before_first_separator(
7018
        string $str,
7019
        string $separator,
7020
        string $encoding = 'UTF-8'
7021
    ): string {
7022
        if ($separator === '' || $str === '') {
7023
            return '';
7024
        }
7025
7026
        $offset = self::stripos($str, $separator);
7027
        if ($offset === false) {
7028
            return '';
7029
        }
7030
7031
        if ($encoding === 'UTF-8') {
7032
            return (string) \mb_substr($str, 0, $offset);
7033
        }
7034
7035
        return (string) self::substr($str, 0, $offset, $encoding);
7036
    }
7037
7038 1
    /**
7039 1
     * Gets the substring before the last occurrence of a separator.
7040
     *
7041
     * @param string $str       <p>The input string.</p>
7042 1
     * @param string $separator <p>The string separator.</p>
7043 1
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7044 1
     *
7045
     * @psalm-pure
7046
     *
7047 1
     * @return string
7048 1
     */
7049
    public static function str_isubstr_before_last_separator(
7050
        string $str,
7051
        string $separator,
7052
        string $encoding = 'UTF-8'
7053
    ): string {
7054
        if ($separator === '' || $str === '') {
7055
            return '';
7056
        }
7057
7058
        if ($encoding === 'UTF-8') {
7059
            $offset = \mb_strripos($str, $separator);
7060
            if ($offset === false) {
7061
                return '';
7062
            }
7063
7064
            return (string) \mb_substr($str, 0, $offset);
7065
        }
7066
7067
        $offset = self::strripos($str, $separator, 0, $encoding);
7068
        if ($offset === false) {
7069
            return '';
7070 1
        }
7071 1
7072
        return (string) self::substr($str, 0, $offset, $encoding);
7073
    }
7074 1
7075 1
    /**
7076 1
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7077 1
     *
7078
     * @param string $str           <p>The input string.</p>
7079
     * @param string $needle        <p>The string to look for.</p>
7080 1
     * @param bool   $before_needle [optional] <p>Default: false</p>
7081
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7082
     *
7083
     * @psalm-pure
7084
     *
7085
     * @return string
7086
     */
7087
    public static function str_isubstr_first(
7088
        string $str,
7089
        string $needle,
7090
        bool $before_needle = false,
7091
        string $encoding = 'UTF-8'
7092
    ): string {
7093
        if (
7094
            $needle === ''
7095
            ||
7096
            $str === ''
7097
        ) {
7098
            return '';
7099
        }
7100
7101
        $part = self::stristr(
7102
            $str,
7103
            $needle,
7104
            $before_needle,
7105
            $encoding
7106
        );
7107
        if ($part === false) {
7108
            return '';
7109
        }
7110 2
7111
        return $part;
7112 2
    }
7113
7114 2
    /**
7115
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7116
     *
7117 2
     * @param string $str           <p>The input string.</p>
7118 2
     * @param string $needle        <p>The string to look for.</p>
7119 2
     * @param bool   $before_needle [optional] <p>Default: false</p>
7120 2
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7121 2
     *
7122
     * @psalm-pure
7123 2
     *
7124 2
     * @return string
7125
     */
7126
    public static function str_isubstr_last(
7127 2
        string $str,
7128
        string $needle,
7129
        bool $before_needle = false,
7130
        string $encoding = 'UTF-8'
7131
    ): string {
7132
        if (
7133
            $needle === ''
7134
            ||
7135
            $str === ''
7136
        ) {
7137
            return '';
7138
        }
7139
7140
        $part = self::strrichr(
7141
            $str,
7142
            $needle,
7143
            $before_needle,
7144
            $encoding
7145
        );
7146
        if ($part === false) {
7147
            return '';
7148
        }
7149 1
7150
        return $part;
7151 1
    }
7152
7153 1
    /**
7154
     * Returns the last $n characters of the string.
7155
     *
7156 1
     * @param string $str      <p>The input string.</p>
7157 1
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7158 1
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7159 1
     *
7160 1
     * @psalm-pure
7161
     *
7162 1
     * @return string
7163 1
     */
7164
    public static function str_last_char(
7165
        string $str,
7166 1
        int $n = 1,
7167
        string $encoding = 'UTF-8'
7168
    ): string {
7169
        if ($str === '' || $n <= 0) {
7170
            return '';
7171
        }
7172
7173
        if ($encoding === 'UTF-8') {
7174
            return (string) \mb_substr($str, -$n);
7175
        }
7176
7177
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7178
7179
        return (string) self::substr($str, -$n, null, $encoding);
7180
    }
7181
7182
    /**
7183
     * Limit the number of characters in a string.
7184
     *
7185 12
     * @param string $str        <p>The input string.</p>
7186 4
     * @param int    $length     [optional] <p>Default: 100</p>
7187
     * @param string $str_add_on [optional] <p>Default: …</p>
7188
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7189 8
     *
7190 4
     * @psalm-pure
7191
     *
7192
     * @return string
7193 4
     */
7194
    public static function str_limit(
7195 4
        string $str,
7196
        int $length = 100,
7197
        string $str_add_on = '…',
7198
        string $encoding = 'UTF-8'
7199
    ): string {
7200
        if ($str === '' || $length <= 0) {
7201
            return '';
7202
        }
7203
7204
        if ($encoding === 'UTF-8') {
7205
            if ((int) \mb_strlen($str) <= $length) {
7206
                return $str;
7207
            }
7208
7209
            /** @noinspection UnnecessaryCastingInspection */
7210
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7211
        }
7212
7213
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7214
7215
        if ((int) self::strlen($str, $encoding) <= $length) {
7216 2
            return $str;
7217 2
        }
7218
7219
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7220 2
    }
7221 2
7222 2
    /**
7223
     * Limit the number of characters in a string, but also after the next word.
7224
     *
7225
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7226 2
     *
7227
     * @param string $str        <p>The input string.</p>
7228
     * @param int    $length     [optional] <p>Default: 100</p>
7229
     * @param string $str_add_on [optional] <p>Default: …</p>
7230
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7231
     *
7232
     * @psalm-pure
7233
     *
7234
     * @return string
7235
     */
7236
    public static function str_limit_after_word(
7237
        string $str,
7238
        int $length = 100,
7239
        string $str_add_on = '…',
7240
        string $encoding = 'UTF-8'
7241
    ): string {
7242
        if ($str === '' || $length <= 0) {
7243
            return '';
7244
        }
7245
7246
        if ($encoding === 'UTF-8') {
7247
            /** @noinspection UnnecessaryCastingInspection */
7248
            if ((int) \mb_strlen($str) <= $length) {
7249
                return $str;
7250
            }
7251
7252
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7253
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7254
            }
7255
7256
            $str = \mb_substr($str, 0, $length);
7257
7258 6
            $array = \explode(' ', $str, -1);
7259 2
            $new_str = \implode(' ', $array);
7260
7261
            if ($new_str === '') {
7262 6
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7263
            }
7264 2
        } else {
7265 2
            if ((int) self::strlen($str, $encoding) <= $length) {
7266
                return $str;
7267
            }
7268 2
7269 2
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7270
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7271
            }
7272 2
7273
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7274 2
            $str = self::substr($str, 0, $length, $encoding);
7275 2
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7276
            if ($str === false) {
7277 2
                return '' . $str_add_on;
7278 2
            }
7279
7280
            $array = \explode(' ', $str, -1);
7281 4
            $new_str = \implode(' ', $array);
7282
7283
            if ($new_str === '') {
7284
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7285 4
            }
7286 3
        }
7287
7288
        return $new_str . $str_add_on;
7289
    }
7290 1
7291
    /**
7292 1
     * Returns the longest common prefix between the $str1 and $str2.
7293
     *
7294
     * @param string $str1     <p>The input sting.</p>
7295
     * @param string $str2     <p>Second string for comparison.</p>
7296 1
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7297 1
     *
7298
     * @psalm-pure
7299 1
     *
7300
     * @return string
7301
     */
7302
    public static function str_longest_common_prefix(
7303
        string $str1,
7304 3
        string $str2,
7305
        string $encoding = 'UTF-8'
7306
    ): string {
7307
        // init
7308
        $longest_common_prefix = '';
7309
7310
        if ($encoding === 'UTF-8') {
7311
            $max_length = (int) \min(
7312
                \mb_strlen($str1),
7313
                \mb_strlen($str2)
7314
            );
7315
7316
            for ($i = 0; $i < $max_length; ++$i) {
7317
                $char = \mb_substr($str1, $i, 1);
7318
7319
                if (
7320
                    $char !== false
7321
                    &&
7322
                    $char === \mb_substr($str2, $i, 1)
7323
                ) {
7324 10
                    $longest_common_prefix .= $char;
7325
                } else {
7326 10
                    break;
7327 5
                }
7328 5
            }
7329 5
        } else {
7330
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7331
7332 5
            $max_length = (int) \min(
7333 4
                self::strlen($str1, $encoding),
7334
                self::strlen($str2, $encoding)
7335
            );
7336 4
7337
            for ($i = 0; $i < $max_length; ++$i) {
7338 4
                $char = self::substr($str1, $i, 1, $encoding);
7339
7340 3
                if (
7341
                    $char !== false
7342 3
                    &&
7343
                    $char === self::substr($str2, $i, 1, $encoding)
7344
                ) {
7345
                    $longest_common_prefix .= $char;
7346 5
                } else {
7347
                    break;
7348 5
                }
7349 5
            }
7350 5
        }
7351
7352
        return $longest_common_prefix;
7353 5
    }
7354 4
7355
    /**
7356
     * Returns the longest common substring between the $str1 and $str2.
7357 4
     * In the case of ties, it returns that which occurs first.
7358
     *
7359 4
     * @param string $str1
7360
     * @param string $str2     <p>Second string for comparison.</p>
7361 3
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7362
     *
7363 3
     * @psalm-pure
7364
     *
7365
     * @return string
7366
     *                <p>A string with its $str being the longest common substring.</p>
7367
     */
7368 10
    public static function str_longest_common_substring(
7369
        string $str1,
7370
        string $str2,
7371
        string $encoding = 'UTF-8'
7372
    ): string {
7373
        if ($str1 === '' || $str2 === '') {
7374
            return '';
7375
        }
7376
7377
        // Uses dynamic programming to solve
7378
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7379
7380
        if ($encoding === 'UTF-8') {
7381
            $str_length = (int) \mb_strlen($str1);
7382
            $other_length = (int) \mb_strlen($str2);
7383
        } else {
7384
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7385
7386
            $str_length = (int) self::strlen($str1, $encoding);
7387
            $other_length = (int) self::strlen($str2, $encoding);
7388
        }
7389 11
7390 2
        // Return if either string is empty
7391
        if ($str_length === 0 || $other_length === 0) {
7392
            return '';
7393
        }
7394
7395
        $len = 0;
7396 9
        $end = 0;
7397 4
        $table = \array_fill(
7398 4
            0,
7399
            $str_length + 1,
7400 5
            \array_fill(0, $other_length + 1, 0)
7401
        );
7402 5
7403 5
        if ($encoding === 'UTF-8') {
7404
            for ($i = 1; $i <= $str_length; ++$i) {
7405
                for ($j = 1; $j <= $other_length; ++$j) {
7406
                    $str_char = \mb_substr($str1, $i - 1, 1);
7407 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7408
7409
                    if ($str_char === $other_char) {
7410
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7411 9
                        if ($table[$i][$j] > $len) {
7412 9
                            $len = $table[$i][$j];
7413 9
                            $end = $i;
7414 9
                        }
7415 9
                    } else {
7416 9
                        $table[$i][$j] = 0;
7417
                    }
7418
                }
7419 9
            }
7420 9
        } else {
7421 9
            for ($i = 1; $i <= $str_length; ++$i) {
7422 9
                for ($j = 1; $j <= $other_length; ++$j) {
7423 9
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7424
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7425 9
7426 8
                    if ($str_char === $other_char) {
7427 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7428 8
                        if ($table[$i][$j] > $len) {
7429 8
                            $len = $table[$i][$j];
7430
                            $end = $i;
7431
                        }
7432 9
                    } else {
7433
                        $table[$i][$j] = 0;
7434
                    }
7435
                }
7436
            }
7437
        }
7438
7439
        if ($encoding === 'UTF-8') {
7440
            return (string) \mb_substr($str1, $end - $len, $len);
7441
        }
7442
7443
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7444
    }
7445
7446
    /**
7447
     * Returns the longest common suffix between the $str1 and $str2.
7448
     *
7449
     * @param string $str1
7450
     * @param string $str2     <p>Second string for comparison.</p>
7451
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7452
     *
7453
     * @psalm-pure
7454
     *
7455 9
     * @return string
7456 9
     */
7457
    public static function str_longest_common_suffix(
7458
        string $str1,
7459
        string $str2,
7460
        string $encoding = 'UTF-8'
7461
    ): string {
7462
        if ($str1 === '' || $str2 === '') {
7463
            return '';
7464
        }
7465
7466
        if ($encoding === 'UTF-8') {
7467
            $max_length = (int) \min(
7468
                \mb_strlen($str1, $encoding),
7469
                \mb_strlen($str2, $encoding)
7470
            );
7471
7472
            $longest_common_suffix = '';
7473
            for ($i = 1; $i <= $max_length; ++$i) {
7474
                $char = \mb_substr($str1, -$i, 1);
7475
7476
                if (
7477
                    $char !== false
7478 10
                    &&
7479 2
                    $char === \mb_substr($str2, -$i, 1)
7480
                ) {
7481
                    $longest_common_suffix = $char . $longest_common_suffix;
7482 8
                } else {
7483 4
                    break;
7484 4
                }
7485 4
            }
7486
        } else {
7487
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7488 4
7489 4
            $max_length = (int) \min(
7490 4
                self::strlen($str1, $encoding),
7491
                self::strlen($str2, $encoding)
7492
            );
7493 4
7494
            $longest_common_suffix = '';
7495 4
            for ($i = 1; $i <= $max_length; ++$i) {
7496
                $char = self::substr($str1, -$i, 1, $encoding);
7497 3
7498
                if (
7499 3
                    $char !== false
7500
                    &&
7501
                    $char === self::substr($str2, -$i, 1, $encoding)
7502
                ) {
7503 4
                    $longest_common_suffix = $char . $longest_common_suffix;
7504
                } else {
7505 4
                    break;
7506 4
                }
7507 4
            }
7508
        }
7509
7510 4
        return $longest_common_suffix;
7511 4
    }
7512 4
7513
    /**
7514
     * Returns true if $str matches the supplied pattern, false otherwise.
7515 4
     *
7516
     * @param string $str     <p>The input string.</p>
7517 4
     * @param string $pattern <p>Regex pattern to match against.</p>
7518
     *
7519 3
     * @psalm-pure
7520
     *
7521 3
     * @return bool
7522
     *              <p>Whether or not $str matches the pattern.</p>
7523
     */
7524
    public static function str_matches_pattern(string $str, string $pattern): bool
7525
    {
7526 8
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7527
    }
7528
7529
    /**
7530
     * Returns whether or not a character exists at an index. Offsets may be
7531
     * negative to count from the last character in the string. Implements
7532
     * part of the ArrayAccess interface.
7533
     *
7534
     * @param string $str      <p>The input string.</p>
7535
     * @param int    $offset   <p>The index to check.</p>
7536
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7537
     *
7538
     * @psalm-pure
7539
     *
7540
     * @return bool
7541
     *              <p>Whether or not the index exists.</p>
7542 10
     */
7543
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7544
    {
7545
        // init
7546
        $length = (int) self::strlen($str, $encoding);
7547
7548
        if ($offset >= 0) {
7549
            return $length > $offset;
7550
        }
7551
7552
        return $length >= \abs($offset);
7553
    }
7554
7555
    /**
7556
     * Returns the character at the given index. Offsets may be negative to
7557
     * count from the last character in the string. Implements part of the
7558
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7559
     * does not exist.
7560
     *
7561
     * @param string $str      <p>The input string.</p>
7562 6
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7563
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7564 6
     *
7565 3
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7566
     *
7567
     * @return string
7568 3
     *                <p>The character at the specified index.</p>
7569
     *
7570
     * @psalm-pure
7571
     */
7572
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7573
    {
7574
        // init
7575
        $length = (int) self::strlen($str);
7576
7577
        if (
7578
            ($index >= 0 && $length <= $index)
7579
            ||
7580
            $length < \abs($index)
7581
        ) {
7582
            throw new \OutOfBoundsException('No character exists at the index');
7583
        }
7584
7585
        return self::char_at($str, $index, $encoding);
7586
    }
7587
7588
    /**
7589
     * Pad a UTF-8 string to a given length with another string.
7590
     *
7591 2
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7592
     *
7593
     * @param string     $str        <p>The input string.</p>
7594 2
     * @param int        $pad_length <p>The length of return string.</p>
7595
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7596 2
     * @param int|string $pad_type   [optional] <p>
7597
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7598 1
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7599
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7600
     *                               </p>
7601 1
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7602
     *
7603
     * @psalm-pure
7604
     *
7605
     * @return string
7606
     *                <p>Returns the padded string.</p>
7607
     */
7608
    public static function str_pad(
7609
        string $str,
7610
        int $pad_length,
7611
        string $pad_string = ' ',
7612
        $pad_type = \STR_PAD_RIGHT,
7613
        string $encoding = 'UTF-8'
7614
    ): string {
7615
        if ($pad_length === 0 || $pad_string === '') {
7616
            return $str;
7617
        }
7618
7619
        if ($pad_type !== (int) $pad_type) {
7620
            if ($pad_type === 'left') {
7621
                $pad_type = \STR_PAD_LEFT;
7622
            } elseif ($pad_type === 'right') {
7623
                $pad_type = \STR_PAD_RIGHT;
7624
            } elseif ($pad_type === 'both') {
7625
                $pad_type = \STR_PAD_BOTH;
7626
            } else {
7627
                throw new \InvalidArgumentException(
7628
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7629
                );
7630
            }
7631 41
        }
7632 1
7633
        if ($encoding === 'UTF-8') {
7634
            $str_length = (int) \mb_strlen($str);
7635 41
7636 13
            if ($pad_length >= $str_length) {
7637 3
                switch ($pad_type) {
7638 10
                    case \STR_PAD_LEFT:
7639 6
                        $ps_length = (int) \mb_strlen($pad_string);
7640 4
7641 3
                        $diff = ($pad_length - $str_length);
7642
7643 1
                        $pre = (string) \mb_substr(
7644 1
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7645
                            0,
7646
                            $diff
7647
                        );
7648
                        $post = '';
7649 40
7650 25
                        break;
7651
7652 25
                    case \STR_PAD_BOTH:
7653
                        $diff = ($pad_length - $str_length);
7654 25
7655 8
                        $ps_length_left = (int) \floor($diff / 2);
7656
7657 8
                        $ps_length_right = (int) \ceil($diff / 2);
7658
7659 8
                        $pre = (string) \mb_substr(
7660 8
                            \str_repeat($pad_string, $ps_length_left),
7661 8
                            0,
7662 8
                            $ps_length_left
7663
                        );
7664 8
                        $post = (string) \mb_substr(
7665
                            \str_repeat($pad_string, $ps_length_right),
7666 8
                            0,
7667
                            $ps_length_right
7668 20
                        );
7669 14
7670
                        break;
7671 14
7672
                    case \STR_PAD_RIGHT:
7673 14
                    default:
7674
                        $ps_length = (int) \mb_strlen($pad_string);
7675 14
7676 14
                        $diff = ($pad_length - $str_length);
7677 14
7678 14
                        $post = (string) \mb_substr(
7679
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7680 14
                            0,
7681 14
                            $diff
7682 14
                        );
7683 14
                        $pre = '';
7684
                }
7685
7686 14
                return $pre . $str . $post;
7687
            }
7688 9
7689
            return $str;
7690 9
        }
7691
7692 9
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7693
7694 9
        $str_length = (int) self::strlen($str, $encoding);
7695 9
7696 9
        if ($pad_length >= $str_length) {
7697 9
            switch ($pad_type) {
7698
                case \STR_PAD_LEFT:
7699 9
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7700
7701
                    $diff = ($pad_length - $str_length);
7702 25
7703
                    $pre = (string) self::substr(
7704
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7705 3
                        0,
7706
                        $diff,
7707
                        $encoding
7708 15
                    );
7709
                    $post = '';
7710 15
7711
                    break;
7712 15
7713
                case \STR_PAD_BOTH:
7714 14
                    $diff = ($pad_length - $str_length);
7715 5
7716
                    $ps_length_left = (int) \floor($diff / 2);
7717 5
7718
                    $ps_length_right = (int) \ceil($diff / 2);
7719 5
7720 5
                    $pre = (string) self::substr(
7721 5
                        \str_repeat($pad_string, $ps_length_left),
7722 5
                        0,
7723 5
                        $ps_length_left,
7724
                        $encoding
7725 5
                    );
7726
                    $post = (string) self::substr(
7727 5
                        \str_repeat($pad_string, $ps_length_right),
7728
                        0,
7729 9
                        $ps_length_right,
7730 3
                        $encoding
7731
                    );
7732 3
7733
                    break;
7734 3
7735
                case \STR_PAD_RIGHT:
7736 3
                default:
7737 3
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7738 3
7739 3
                    $diff = ($pad_length - $str_length);
7740 3
7741
                    $post = (string) self::substr(
7742 3
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7743 3
                        0,
7744 3
                        $diff,
7745 3
                        $encoding
7746 3
                    );
7747
                    $pre = '';
7748
            }
7749 3
7750
            return $pre . $str . $post;
7751 6
        }
7752
7753 6
        return $str;
7754
    }
7755 6
7756
    /**
7757 6
     * Returns a new string of a given length such that both sides of the
7758 6
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7759 6
     *
7760 6
     * @param string $str
7761 6
     * @param int    $length   <p>Desired string length after padding.</p>
7762
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7763 6
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7764
     *
7765
     * @psalm-pure
7766 14
     *
7767
     * @return string
7768
     *                <p>The string with padding applied.</p>
7769 1
     */
7770
    public static function str_pad_both(
7771
        string $str,
7772
        int $length,
7773
        string $pad_str = ' ',
7774
        string $encoding = 'UTF-8'
7775
    ): string {
7776
        return self::str_pad(
7777
            $str,
7778
            $length,
7779
            $pad_str,
7780
            \STR_PAD_BOTH,
7781
            $encoding
7782
        );
7783
    }
7784
7785
    /**
7786
     * Returns a new string of a given length such that the beginning of the
7787
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7788
     *
7789
     * @param string $str
7790
     * @param int    $length   <p>Desired string length after padding.</p>
7791
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7792 11
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7793 11
     *
7794 11
     * @psalm-pure
7795 11
     *
7796 11
     * @return string
7797 11
     *                <p>The string with left padding.</p>
7798
     */
7799
    public static function str_pad_left(
7800
        string $str,
7801
        int $length,
7802
        string $pad_str = ' ',
7803
        string $encoding = 'UTF-8'
7804
    ): string {
7805
        return self::str_pad(
7806
            $str,
7807
            $length,
7808
            $pad_str,
7809
            \STR_PAD_LEFT,
7810
            $encoding
7811
        );
7812
    }
7813
7814
    /**
7815
     * Returns a new string of a given length such that the end of the string
7816
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7817
     *
7818
     * @param string $str
7819
     * @param int    $length   <p>Desired string length after padding.</p>
7820
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7821 7
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7822 7
     *
7823 7
     * @psalm-pure
7824 7
     *
7825 7
     * @return string
7826 7
     *                <p>The string with right padding.</p>
7827
     */
7828
    public static function str_pad_right(
7829
        string $str,
7830
        int $length,
7831
        string $pad_str = ' ',
7832
        string $encoding = 'UTF-8'
7833
    ): string {
7834
        return self::str_pad(
7835
            $str,
7836
            $length,
7837
            $pad_str,
7838
            \STR_PAD_RIGHT,
7839
            $encoding
7840
        );
7841
    }
7842
7843
    /**
7844
     * Repeat a string.
7845
     *
7846
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7847
     *
7848
     * @param string $str        <p>
7849
     *                           The string to be repeated.
7850 7
     *                           </p>
7851 7
     * @param int    $multiplier <p>
7852 7
     *                           Number of time the input string should be
7853 7
     *                           repeated.
7854 7
     *                           </p>
7855 7
     *                           <p>
7856
     *                           multiplier has to be greater than or equal to 0.
7857
     *                           If the multiplier is set to 0, the function
7858
     *                           will return an empty string.
7859
     *                           </p>
7860
     *
7861
     * @psalm-pure
7862
     *
7863
     * @return string
7864
     *                <p>The repeated string.</p>
7865
     */
7866
    public static function str_repeat(string $str, int $multiplier): string
7867
    {
7868
        $str = self::filter($str);
7869
7870
        return \str_repeat($str, $multiplier);
7871
    }
7872
7873
    /**
7874
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7875
     *
7876
     * Replace all occurrences of the search string with the replacement string
7877
     *
7878
     * @see http://php.net/manual/en/function.str-replace.php
7879
     *
7880
     * @param string|string[] $search  <p>
7881
     *                                 The value being searched for, otherwise known as the needle.
7882
     *                                 An array may be used to designate multiple needles.
7883
     *                                 </p>
7884 9
     * @param string|string[] $replace <p>
7885
     *                                 The replacement value that replaces found search
7886 9
     *                                 values. An array may be used to designate multiple replacements.
7887
     *                                 </p>
7888
     * @param string|string[] $subject <p>
7889
     *                                 The string or array of strings being searched and replaced on,
7890
     *                                 otherwise known as the haystack.
7891
     *                                 </p>
7892
     *                                 <p>
7893
     *                                 If subject is an array, then the search and
7894
     *                                 replace is performed with every entry of
7895
     *                                 subject, and the return value is an array as
7896
     *                                 well.
7897
     *                                 </p>
7898
     * @param int|null        $count   [optional] <p>
7899
     *                                 If passed, this will hold the number of matched and replaced needles.
7900
     *                                 </p>
7901
     *
7902
     * @psalm-pure
7903
     *
7904
     * @return string|string[]
7905
     *                         <p>This function returns a string or an array with the replaced values.</p>
7906
     *
7907
     * @template TStrReplaceSubject
7908
     * @phpstan-param TStrReplaceSubject $subject
7909
     * @phpstan-return TStrReplaceSubject
7910
     *
7911
     * @deprecated please use \str_replace() instead
7912
     */
7913
    public static function str_replace(
7914
        $search,
7915
        $replace,
7916
        $subject,
7917
        int &$count = null
7918
    ) {
7919
        /**
7920
         * @psalm-suppress PossiblyNullArgument
7921
         * @phpstan-var TStrReplaceSubject $return;
7922
         */
7923
        $return = \str_replace(
7924
            $search,
7925
            $replace,
7926
            $subject,
7927
            $count
7928
        );
7929
7930
        return $return;
7931
    }
7932
7933
    /**
7934
     * Replaces $search from the beginning of string with $replacement.
7935
     *
7936
     * @param string $str         <p>The input string.</p>
7937
     * @param string $search      <p>The string to search for.</p>
7938
     * @param string $replacement <p>The replacement.</p>
7939 12
     *
7940 12
     * @psalm-pure
7941 12
     *
7942 12
     * @return string
7943 12
     *                <p>A string after the replacements.</p>
7944
     */
7945
    public static function str_replace_beginning(
7946 12
        string $str,
7947
        string $search,
7948
        string $replacement
7949
    ): string {
7950
        if ($str === '') {
7951
            if ($replacement === '') {
7952
                return '';
7953
            }
7954
7955
            if ($search === '') {
7956
                return $replacement;
7957
            }
7958
        }
7959
7960
        if ($search === '') {
7961
            return $str . $replacement;
7962
        }
7963
7964
        $searchLength = \strlen($search);
7965
        if (\strncmp($str, $search, $searchLength) === 0) {
7966 17
            return $replacement . \substr($str, $searchLength);
7967 4
        }
7968 2
7969
        return $str;
7970
    }
7971 2
7972 2
    /**
7973
     * Replaces $search from the ending of string with $replacement.
7974
     *
7975
     * @param string $str         <p>The input string.</p>
7976 13
     * @param string $search      <p>The string to search for.</p>
7977 2
     * @param string $replacement <p>The replacement.</p>
7978
     *
7979
     * @psalm-pure
7980 11
     *
7981 11
     * @return string
7982 9
     *                <p>A string after the replacements.</p>
7983
     */
7984
    public static function str_replace_ending(
7985 2
        string $str,
7986
        string $search,
7987
        string $replacement
7988
    ): string {
7989
        if ($str === '') {
7990
            if ($replacement === '') {
7991
                return '';
7992
            }
7993
7994
            if ($search === '') {
7995
                return $replacement;
7996
            }
7997
        }
7998
7999
        if ($search === '') {
8000
            return $str . $replacement;
8001
        }
8002
8003
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8004
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8005 17
        }
8006 4
8007 2
        return $str;
8008
    }
8009
8010 2
    /**
8011 2
     * Replace the first "$search"-term with the "$replace"-term.
8012
     *
8013
     * @param string $search
8014
     * @param string $replace
8015 13
     * @param string $subject
8016 2
     *
8017
     * @psalm-pure
8018
     *
8019 11
     * @return string
8020 8
     *
8021
     * @psalm-suppress InvalidReturnType
8022
     */
8023 11
    public static function str_replace_first(
8024
        string $search,
8025
        string $replace,
8026
        string $subject
8027
    ): string {
8028
        $pos = self::strpos($subject, $search);
8029
8030
        if ($pos !== false) {
8031
            /**
8032
             * @psalm-suppress InvalidReturnStatement
8033
             */
8034
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8035
                $subject,
8036
                $replace,
8037
                $pos,
8038
                (int) self::strlen($search)
8039
            );
8040
        }
8041
8042
        return $subject;
8043
    }
8044 2
8045
    /**
8046 2
     * Replace the last "$search"-term with the "$replace"-term.
8047
     *
8048
     * @param string $search
8049
     * @param string $replace
8050 2
     * @param string $subject
8051 2
     *
8052 2
     * @psalm-pure
8053 2
     *
8054 2
     * @return string
8055
     *
8056
     * @psalm-suppress InvalidReturnType
8057
     */
8058 2
    public static function str_replace_last(
8059
        string $search,
8060
        string $replace,
8061
        string $subject
8062
    ): string {
8063
        $pos = self::strrpos($subject, $search);
8064
        if ($pos !== false) {
8065
            /**
8066
             * @psalm-suppress InvalidReturnStatement
8067
             */
8068
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8069
                $subject,
8070
                $replace,
8071
                $pos,
8072
                (int) self::strlen($search)
8073
            );
8074
        }
8075
8076
        return $subject;
8077
    }
8078
8079 2
    /**
8080 2
     * Shuffles all the characters in the string.
8081
     *
8082
     * INFO: uses random algorithm which is weak for cryptography purposes
8083
     *
8084 2
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8085 2
     *
8086 2
     * @param string $str      <p>The input string</p>
8087 2
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8088 2
     *
8089
     * @return string
8090
     *                <p>The shuffled string.</p>
8091
     */
8092 2
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8093
    {
8094
        if ($encoding === 'UTF-8') {
8095
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8096
            /** @noinspection NonSecureShuffleUsageInspection */
8097
            \shuffle($indexes);
8098
8099
            // init
8100
            $shuffled_str = '';
8101
8102
            foreach ($indexes as &$i) {
8103
                $tmp_sub_str = \mb_substr($str, $i, 1);
8104
                if ($tmp_sub_str !== false) {
8105
                    $shuffled_str .= $tmp_sub_str;
8106
                }
8107
            }
8108
        } else {
8109
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8110 5
8111 5
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8112
            /** @noinspection NonSecureShuffleUsageInspection */
8113 5
            \shuffle($indexes);
8114
8115
            // init
8116 5
            $shuffled_str = '';
8117
8118 5
            foreach ($indexes as &$i) {
8119 5
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8120 5
                if ($tmp_sub_str !== false) {
8121 5
                    $shuffled_str .= $tmp_sub_str;
8122
                }
8123
            }
8124
        }
8125
8126
        return $shuffled_str;
8127
    }
8128
8129
    /**
8130
     * Returns the substring beginning at $start, and up to, but not including
8131
     * the index specified by $end. If $end is omitted, the function extracts
8132
     * the remaining string. If $end is negative, it is computed from the end
8133
     * of the string.
8134
     *
8135
     * @param string   $str
8136
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8137
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8138
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8139
     *
8140
     * @psalm-pure
8141
     *
8142 5
     * @return false|string
8143
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8144
     *                      characters long, <b>FALSE</b> will be returned.
8145
     */
8146
    public static function str_slice(
8147
        string $str,
8148
        int $start,
8149
        int $end = null,
8150
        string $encoding = 'UTF-8'
8151
    ) {
8152
        if ($encoding === 'UTF-8') {
8153
            if ($end === null) {
8154
                $length = (int) \mb_strlen($str);
8155
            } elseif ($end >= 0 && $end <= $start) {
8156
                return '';
8157
            } elseif ($end < 0) {
8158
                $length = (int) \mb_strlen($str) + $end - $start;
8159
            } else {
8160
                $length = $end - $start;
8161
            }
8162
8163
            return \mb_substr($str, $start, $length);
8164
        }
8165
8166
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8167
8168 18
        if ($end === null) {
8169 7
            $length = (int) self::strlen($str, $encoding);
8170 1
        } elseif ($end >= 0 && $end <= $start) {
8171 6
            return '';
8172 2
        } elseif ($end < 0) {
8173 4
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8174 1
        } else {
8175
            $length = $end - $start;
8176 3
        }
8177
8178
        return self::substr($str, $start, $length, $encoding);
8179 5
    }
8180
8181
    /**
8182 11
     * Convert a string to e.g.: "snake_case"
8183
     *
8184 11
     * @param string $str
8185 5
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8186 6
     *
8187 2
     * @psalm-pure
8188 4
     *
8189 1
     * @return string
8190
     *                <p>A string in snake_case.</p>
8191 3
     */
8192
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8193
    {
8194 9
        if ($str === '') {
8195
            return '';
8196
        }
8197
8198
        $str = \str_replace(
8199
            '-',
8200
            '_',
8201
            self::normalize_whitespace($str)
8202
        );
8203
8204
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8205
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8206
        }
8207
8208
        $str = (string) \preg_replace_callback(
8209
            '/([\\p{N}|\\p{Lu}])/u',
8210 22
            /**
8211
             * @param string[] $matches
8212
             *
8213
             * @psalm-pure
8214 22
             *
8215 22
             * @return string
8216 22
             */
8217 22
            static function (array $matches) use ($encoding): string {
8218
                $match = $matches[1];
8219
                $match_int = (int) $match;
8220 22
8221 19
                if ((string) $match_int === $match) {
8222
                    return '_' . $match . '_';
8223
                }
8224 22
8225 22
                if ($encoding === 'UTF-8') {
8226
                    return '_' . \mb_strtolower($match);
8227
                }
8228
8229
                return '_' . self::strtolower($match, $encoding);
8230
            },
8231
            $str
8232
        );
8233
8234 9
        $str = (string) \preg_replace(
8235 9
            [
8236
                '/\\s+/u',           // convert spaces to "_"
8237 9
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8238 4
                '/_+/',                 // remove double "_"
8239
            ],
8240
            [
8241 5
                '_',
8242 5
                '',
8243
                '_',
8244
            ],
8245
            $str
8246 22
        );
8247 22
8248
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8249
    }
8250 22
8251
    /**
8252 22
     * Sort all characters according to code points.
8253
     *
8254
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8255
     *
8256
     * @param string $str    <p>A UTF-8 string.</p>
8257 22
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8258
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8259
     *
8260
     * @psalm-pure
8261 22
     *
8262
     * @return string
8263
     *                <p>A string of sorted characters.</p>
8264 22
     */
8265
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8266
    {
8267
        $array = self::codepoints($str);
8268
8269
        if ($unique) {
8270
            $array = \array_flip(\array_flip($array));
8271
        }
8272
8273
        if ($desc) {
8274
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8274
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8275
        } else {
8276
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8276
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8277
        }
8278
8279
        return self::string($array);
8280
    }
8281
8282
    /**
8283 2
     * Convert a string to an array of Unicode characters.
8284
     *
8285 2
     * EXAMPLE: <code>
8286 2
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8287
     * </code>
8288
     *
8289 2
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8290 2
     * @param int            $length                  [optional] <p>Max character length of each array
8291
     *                                                lement.</p>
8292 2
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8293
     *                                                string.</p>
8294
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8295 2
     *                                                "mb_substr"</p>
8296
     *
8297
     * @psalm-pure
8298
     *
8299
     * @return string[][]
8300
     *                    <p>An array containing chunks of the input.</p>
8301
     */
8302
    public static function str_split_array(
8303
        array $input,
8304
        int $length = 1,
8305
        bool $clean_utf8 = false,
8306
        bool $try_to_use_mb_functions = true
8307
    ): array {
8308
        foreach ($input as $k => &$v) {
8309
            $v = self::str_split(
8310
                $v,
8311
                $length,
8312
                $clean_utf8,
8313
                $try_to_use_mb_functions
8314
            );
8315
        }
8316
8317
        /** @var string[][] $input */
8318
        return $input;
8319
    }
8320
8321
    /**
8322
     * Convert a string to an array of unicode characters.
8323
     *
8324 1
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8325 1
     *
8326 1
     * @param int|string $input                   <p>The string or int to split into array.</p>
8327 1
     * @param int        $length                  [optional] <p>Max character length of each array
8328 1
     *                                            element.</p>
8329 1
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8330
     *                                            string.</p>
8331
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8332
     *                                            "mb_substr"</p>
8333
     *
8334 1
     * @psalm-pure
8335
     *
8336
     * @return string[]
8337
     *                  <p>An array containing chunks of chars from the input.</p>
8338
     *
8339
     * @noinspection SuspiciousBinaryOperationInspection
8340
     * @noinspection OffsetOperationsInspection
8341
     */
8342
    public static function str_split(
8343
        $input,
8344
        int $length = 1,
8345
        bool $clean_utf8 = false,
8346
        bool $try_to_use_mb_functions = true
8347
    ): array {
8348
        if ($length <= 0) {
8349
            return [];
8350
        }
8351
8352
        // this is only an old fallback
8353
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8354
        /** @var int|int[]|string|string[] $input */
8355
        $input = $input;
8356
        if (\is_array($input)) {
8357
            /**
8358
             * @psalm-suppress InvalidReturnStatement
8359
             */
8360
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8361
                $input,
8362
                $length,
8363
                $clean_utf8,
8364 90
                $try_to_use_mb_functions
8365 3
            );
8366
        }
8367
8368
        // init
8369
        $input = (string) $input;
8370
8371 89
        if ($input === '') {
8372 89
            return [];
8373
        }
8374
8375
        if ($clean_utf8) {
8376
            $input = self::clean($input);
8377
        }
8378
8379
        if (
8380
            $try_to_use_mb_functions
8381
            &&
8382
            self::$SUPPORT['mbstring'] === true
8383
        ) {
8384
            if (\function_exists('mb_str_split')) {
8385 89
                /**
8386
                 * @psalm-suppress ImpureFunctionCall - why?
8387 89
                 */
8388 14
                $return = \mb_str_split($input, $length);
8389
                if ($return !== false) {
8390
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8391 86
                }
8392 19
            }
8393
8394
            $i_max = \mb_strlen($input);
8395
            if ($i_max <= 127) {
8396 86
                $ret = [];
8397
                for ($i = 0; $i < $i_max; ++$i) {
8398 86
                    $ret[] = \mb_substr($input, $i, 1);
8399
                }
8400 82
            } else {
8401
                $return_array = [];
8402
                \preg_match_all('/./us', $input, $return_array);
8403
                $ret = $return_array[0] ?? [];
8404 82
            }
8405 82
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8406 82
            $return_array = [];
8407
            \preg_match_all('/./us', $input, $return_array);
8408
            $ret = $return_array[0] ?? [];
8409
        } else {
8410
8411
            // fallback
8412
8413
            $ret = [];
8414
            $len = \strlen($input);
8415
8416
            /** @noinspection ForeachInvariantsInspection */
8417
            for ($i = 0; $i < $len; ++$i) {
8418
                if (($input[$i] & "\x80") === "\x00") {
8419
                    $ret[] = $input[$i];
8420
                } elseif (
8421 23
                    isset($input[$i + 1])
8422 17
                    &&
8423 17
                    ($input[$i] & "\xE0") === "\xC0"
8424 17
                ) {
8425
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8426
                        $ret[] = $input[$i] . $input[$i + 1];
8427
8428
                        ++$i;
8429 8
                    }
8430 8
                } elseif (
8431
                    isset($input[$i + 2])
8432
                    &&
8433 8
                    ($input[$i] & "\xF0") === "\xE0"
8434 8
                ) {
8435 8
                    if (
8436
                        ($input[$i + 1] & "\xC0") === "\x80"
8437 8
                        &&
8438
                        ($input[$i + 2] & "\xC0") === "\x80"
8439 8
                    ) {
8440
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8441 4
8442 4
                        $i += 2;
8443
                    }
8444 4
                } elseif (
8445
                    isset($input[$i + 3])
8446
                    &&
8447 6
                    ($input[$i] & "\xF8") === "\xF0"
8448
                ) {
8449 6
                    if (
8450
                        ($input[$i + 1] & "\xC0") === "\x80"
8451
                        &&
8452 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8453
                        &&
8454 6
                        ($input[$i + 3] & "\xC0") === "\x80"
8455
                    ) {
8456 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8457
8458 6
                        $i += 3;
8459
                    }
8460
                }
8461
            }
8462
        }
8463
8464
        if ($length > 1) {
8465
            $ret = \array_chunk($ret, $length);
8466
8467
            return \array_map(
8468
                static function (array $item): string {
8469
                    return \implode('', $item);
8470
                },
8471
                $ret
8472
            );
8473
        }
8474
8475
        if (isset($ret[0]) && $ret[0] === '') {
8476
            return [];
8477
        }
8478
8479
        return $ret;
8480 23
    }
8481 2
8482
    /**
8483 2
     * Splits the string with the provided regular expression, returning an
8484
     * array of strings. An optional integer $limit will truncate the
8485 2
     * results.
8486 2
     *
8487 2
     * @param string $str
8488
     * @param string $pattern <p>The regex with which to split the string.</p>
8489
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8490
     *
8491 23
     * @psalm-pure
8492
     *
8493
     * @return string[]
8494
     *                  <p>An array of strings.</p>
8495 23
     */
8496
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8497
    {
8498
        if ($limit === 0) {
8499
            return [];
8500
        }
8501
8502
        if ($pattern === '') {
8503
            return [$str];
8504
        }
8505
8506
        if (self::$SUPPORT['mbstring'] === true) {
8507
            if ($limit >= 0) {
8508
                /** @noinspection PhpComposerExtensionStubsInspection */
8509
                $result_tmp = \mb_split($pattern, $str);
8510
8511
                $result = [];
8512
                foreach ($result_tmp as $item_tmp) {
8513
                    if ($limit === 0) {
8514 16
                        break;
8515 2
                    }
8516
                    --$limit;
8517
8518 14
                    $result[] = $item_tmp;
8519 1
                }
8520
8521
                return $result;
8522 13
            }
8523 13
8524
            /** @noinspection PhpComposerExtensionStubsInspection */
8525 8
            return \mb_split($pattern, $str);
8526
        }
8527 8
8528 8
        if ($limit > 0) {
8529 8
            ++$limit;
8530 4
        } else {
8531
            $limit = -1;
8532 8
        }
8533
8534 8
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8535
8536
        if ($array === false) {
8537 8
            return [];
8538
        }
8539
8540
        if ($limit > 0 && \count($array) === $limit) {
8541 5
            \array_pop($array);
8542
        }
8543
8544
        return $array;
8545
    }
8546
8547
    /**
8548
     * Check if the string starts with the given substring.
8549
     *
8550
     * EXAMPLE: <code>
8551
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8552
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8553
     * </code>
8554
     *
8555
     * @param string $haystack <p>The string to search in.</p>
8556
     * @param string $needle   <p>The substring to search for.</p>
8557
     *
8558
     * @psalm-pure
8559
     *
8560
     * @return bool
8561
     */
8562
    public static function str_starts_with(string $haystack, string $needle): bool
8563
    {
8564
        if ($needle === '') {
8565
            return true;
8566
        }
8567
8568
        if ($haystack === '') {
8569
            return false;
8570
        }
8571
8572
        if (\PHP_VERSION_ID >= 80000) {
8573
            /** @phpstan-ignore-next-line - only for PHP8 */
8574
            return \str_starts_with($haystack, $needle);
0 ignored issues
show
Bug introduced by
The function str_starts_with was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

8574
            return /** @scrutinizer ignore-call */ \str_starts_with($haystack, $needle);
Loading history...
8575
        }
8576
8577
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8578
    }
8579
8580 19
    /**
8581 2
     * Returns true if the string begins with any of $substrings, false otherwise.
8582
     *
8583
     * - case-sensitive
8584 19
     *
8585
     * @param string $str        <p>The input string.</p>
8586
     * @param array  $substrings <p>Substrings to look for.</p>
8587
     *
8588 19
     * @psalm-pure
8589
     *
8590
     * @return bool
8591
     *              <p>Whether or not $str starts with $substring.</p>
8592
     */
8593 19
    public static function str_starts_with_any(string $str, array $substrings): bool
8594
    {
8595
        if ($str === '') {
8596
            return false;
8597
        }
8598
8599
        if ($substrings === []) {
8600
            return false;
8601
        }
8602
8603
        foreach ($substrings as &$substring) {
8604
            if (self::str_starts_with($str, $substring)) {
8605
                return true;
8606
            }
8607
        }
8608
8609
        return false;
8610
    }
8611 8
8612
    /**
8613
     * Gets the substring after the first occurrence of a separator.
8614
     *
8615 8
     * @param string $str       <p>The input string.</p>
8616
     * @param string $separator <p>The string separator.</p>
8617
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8618
     *
8619 8
     * @psalm-pure
8620 8
     *
8621 8
     * @return string
8622
     */
8623
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8624
    {
8625 6
        if ($separator === '' || $str === '') {
8626
            return '';
8627
        }
8628
8629
        if ($encoding === 'UTF-8') {
8630
            $offset = \mb_strpos($str, $separator);
8631
            if ($offset === false) {
8632
                return '';
8633
            }
8634
8635
            return (string) \mb_substr(
8636
                $str,
8637
                $offset + (int) \mb_strlen($separator)
8638
            );
8639
        }
8640
8641 1
        $offset = self::strpos($str, $separator, 0, $encoding);
8642 1
        if ($offset === false) {
8643
            return '';
8644
        }
8645 1
8646 1
        return (string) \mb_substr(
8647 1
            $str,
8648 1
            $offset + (int) self::strlen($separator, $encoding),
8649
            null,
8650
            $encoding
8651 1
        );
8652 1
    }
8653 1
8654
    /**
8655
     * Gets the substring after the last occurrence of a separator.
8656
     *
8657
     * @param string $str       <p>The input string.</p>
8658
     * @param string $separator <p>The string separator.</p>
8659
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8660
     *
8661
     * @psalm-pure
8662
     *
8663
     * @return string
8664
     */
8665
    public static function str_substr_after_last_separator(
8666
        string $str,
8667
        string $separator,
8668
        string $encoding = 'UTF-8'
8669
    ): string {
8670
        if ($separator === '' || $str === '') {
8671
            return '';
8672
        }
8673
8674
        if ($encoding === 'UTF-8') {
8675
            $offset = \mb_strrpos($str, $separator);
8676
            if ($offset === false) {
8677
                return '';
8678
            }
8679
8680
            return (string) \mb_substr(
8681
                $str,
8682
                $offset + (int) \mb_strlen($separator)
8683
            );
8684
        }
8685
8686 1
        $offset = self::strrpos($str, $separator, 0, $encoding);
8687 1
        if ($offset === false) {
8688
            return '';
8689
        }
8690 1
8691 1
        return (string) self::substr(
8692 1
            $str,
8693 1
            $offset + (int) self::strlen($separator, $encoding),
8694
            null,
8695
            $encoding
8696 1
        );
8697 1
    }
8698 1
8699
    /**
8700
     * Gets the substring before the first occurrence of a separator.
8701
     *
8702
     * @param string $str       <p>The input string.</p>
8703
     * @param string $separator <p>The string separator.</p>
8704
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8705
     *
8706
     * @psalm-pure
8707
     *
8708
     * @return string
8709
     */
8710
    public static function str_substr_before_first_separator(
8711
        string $str,
8712
        string $separator,
8713
        string $encoding = 'UTF-8'
8714
    ): string {
8715
        if ($separator === '' || $str === '') {
8716
            return '';
8717
        }
8718
8719
        if ($encoding === 'UTF-8') {
8720
            $offset = \mb_strpos($str, $separator);
8721
            if ($offset === false) {
8722
                return '';
8723
            }
8724
8725
            return (string) \mb_substr(
8726
                $str,
8727
                0,
8728
                $offset
8729
            );
8730
        }
8731 1
8732 1
        $offset = self::strpos($str, $separator, 0, $encoding);
8733
        if ($offset === false) {
8734
            return '';
8735 1
        }
8736 1
8737 1
        return (string) self::substr(
8738 1
            $str,
8739
            0,
8740
            $offset,
8741 1
            $encoding
8742 1
        );
8743 1
    }
8744 1
8745
    /**
8746
     * Gets the substring before the last occurrence of a separator.
8747
     *
8748
     * @param string $str       <p>The input string.</p>
8749
     * @param string $separator <p>The string separator.</p>
8750
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8751
     *
8752
     * @psalm-pure
8753
     *
8754
     * @return string
8755
     */
8756
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8757
    {
8758
        if ($separator === '' || $str === '') {
8759
            return '';
8760
        }
8761
8762
        if ($encoding === 'UTF-8') {
8763
            $offset = \mb_strrpos($str, $separator);
8764
            if ($offset === false) {
8765
                return '';
8766
            }
8767
8768
            return (string) \mb_substr(
8769
                $str,
8770
                0,
8771
                $offset
8772
            );
8773
        }
8774 1
8775 1
        $offset = self::strrpos($str, $separator, 0, $encoding);
8776
        if ($offset === false) {
8777
            return '';
8778 1
        }
8779 1
8780 1
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8781 1
8782
        return (string) self::substr(
8783
            $str,
8784 1
            0,
8785 1
            $offset,
8786 1
            $encoding
8787 1
        );
8788
    }
8789
8790
    /**
8791
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8792
     *
8793
     * @param string $str           <p>The input string.</p>
8794
     * @param string $needle        <p>The string to look for.</p>
8795
     * @param bool   $before_needle [optional] <p>Default: false</p>
8796
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8797
     *
8798
     * @psalm-pure
8799
     *
8800
     * @return string
8801
     */
8802
    public static function str_substr_first(
8803
        string $str,
8804
        string $needle,
8805
        bool $before_needle = false,
8806
        string $encoding = 'UTF-8'
8807
    ): string {
8808
        if ($str === '' || $needle === '') {
8809
            return '';
8810
        }
8811
8812
        if ($encoding === 'UTF-8') {
8813
            if ($before_needle) {
8814
                $part = \mb_strstr(
8815
                    $str,
8816
                    $needle,
8817
                    $before_needle
8818
                );
8819
            } else {
8820
                $part = \mb_strstr(
8821
                    $str,
8822
                    $needle
8823
                );
8824 2
            }
8825 2
        } else {
8826
            $part = self::strstr(
8827
                $str,
8828 2
                $needle,
8829 2
                $before_needle,
8830 1
                $encoding
8831 1
            );
8832 1
        }
8833 1
8834
        return $part === false ? '' : $part;
8835
    }
8836 1
8837 1
    /**
8838 2
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8839
     *
8840
     * @param string $str           <p>The input string.</p>
8841
     * @param string $needle        <p>The string to look for.</p>
8842
     * @param bool   $before_needle [optional] <p>Default: false</p>
8843
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8844
     *
8845
     * @psalm-pure
8846
     *
8847
     * @return string
8848
     */
8849
    public static function str_substr_last(
8850 2
        string $str,
8851
        string $needle,
8852
        bool $before_needle = false,
8853
        string $encoding = 'UTF-8'
8854
    ): string {
8855
        if ($str === '' || $needle === '') {
8856
            return '';
8857
        }
8858
8859
        if ($encoding === 'UTF-8') {
8860
            if ($before_needle) {
8861
                $part = \mb_strrchr(
8862
                    $str,
8863
                    $needle,
8864
                    $before_needle
8865
                );
8866
            } else {
8867
                $part = \mb_strrchr(
8868
                    $str,
8869
                    $needle
8870
                );
8871 2
            }
8872 2
        } else {
8873
            $part = self::strrchr(
8874
                $str,
8875 2
                $needle,
8876 2
                $before_needle,
8877 1
                $encoding
8878 1
            );
8879 1
        }
8880 1
8881
        return $part === false ? '' : $part;
8882
    }
8883 1
8884 1
    /**
8885 2
     * Surrounds $str with the given substring.
8886
     *
8887
     * @param string $str
8888
     * @param string $substring <p>The substring to add to both sides.</p>
8889
     *
8890
     * @psalm-pure
8891
     *
8892
     * @return string
8893
     *                <p>A string with the substring both prepended and appended.</p>
8894
     */
8895
    public static function str_surround(string $str, string $substring): string
8896
    {
8897 2
        return $substring . $str . $substring;
8898
    }
8899
8900
    /**
8901
     * Returns a trimmed string with the first letter of each word capitalized.
8902
     * Also accepts an array, $ignore, allowing you to list words not to be
8903
     * capitalized.
8904
     *
8905
     * @param string              $str
8906
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8907
     *                                                           null. Default: null</p>
8908
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8909
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8910
     *                                                           string.</p>
8911
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8912
     *                                                           el, lt, tr</p>
8913 5
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8914
     *                                                           e.g. ẞ -> ß</p>
8915
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8916
     *                                                           first</p>
8917
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8918
     *                                                           whitespace separator === words.</p>
8919
     *
8920
     * @psalm-pure
8921
     *
8922
     * @return string
8923
     *                <p>The titleized string.</p>
8924
     *
8925
     * @noinspection PhpTooManyParametersInspection
8926
     */
8927
    public static function str_titleize(
8928
        string $str,
8929
        array $ignore = null,
8930
        string $encoding = 'UTF-8',
8931
        bool $clean_utf8 = false,
8932
        string $lang = null,
8933
        bool $try_to_keep_the_string_length = false,
8934
        bool $use_trim_first = true,
8935
        string $word_define_chars = null
8936
    ): string {
8937
        if ($str === '') {
8938
            return '';
8939
        }
8940
8941
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8942
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8943
        }
8944
8945
        if ($use_trim_first) {
8946
            $str = \trim($str);
8947
        }
8948
8949
        if ($clean_utf8) {
8950
            $str = self::clean($str);
8951
        }
8952
8953 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8954
8955
        if ($word_define_chars) {
8956
            $word_define_chars = \preg_quote($word_define_chars, '/');
8957 10
        } else {
8958 9
            $word_define_chars = '';
8959
        }
8960
8961 10
        $str = (string) \preg_replace_callback(
8962 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8963
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8964
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8965 10
                    return $match[0];
8966
                }
8967
8968
                if ($use_mb_functions) {
8969 10
                    if ($encoding === 'UTF-8') {
8970
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8971 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8972 4
                    }
8973
8974 6
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8975
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8976
                }
8977 10
8978 10
                return self::ucfirst(
8979
                    self::strtolower(
8980 10
                        $match[0],
8981 4
                        $encoding,
8982
                        false,
8983
                        $lang,
8984 10
                        $try_to_keep_the_string_length
8985 10
                    ),
8986 10
                    $encoding,
8987 10
                    false,
8988
                    $lang,
8989
                    $try_to_keep_the_string_length
8990
                );
8991
            },
8992
            $str
8993
        );
8994
8995
        return $str;
8996
    }
8997
8998
    /**
8999
     * Convert a string into a obfuscate string.
9000
     *
9001
     * EXAMPLE: <code>
9002
     *
9003
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9004
     * </code>
9005
     *
9006
     * @param string   $str
9007 10
     * @param float    $percent
9008 10
     * @param string   $obfuscateChar
9009
     * @param string[] $keepChars
9010
     *
9011 10
     * @psalm-pure
9012
     *
9013
     * @return string
9014
     *                <p>The obfuscate string.</p>
9015
     */
9016
    public static function str_obfuscate(
9017
        string $str,
9018
        float $percent = 0.5,
9019
        string $obfuscateChar = '*',
9020
        array $keepChars = []
9021
    ): string {
9022
        $obfuscateCharHelper = "\u{2603}";
9023
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9024
9025
        $chars = self::chars($str);
9026
        $charsMax = \count($chars);
9027
        $charsMaxChange = \round($charsMax * $percent);
9028
        $charsCounter = 0;
9029
        $charKeyDone = [];
9030
9031
        while ($charsCounter < $charsMaxChange) {
9032
            foreach ($chars as $charKey => $char) {
9033
                if (isset($charKeyDone[$charKey])) {
9034
                    continue;
9035
                }
9036
9037
                if (\random_int(0, 100) > 50) {
9038 1
                    continue;
9039 1
                }
9040
9041 1
                if ($char === $obfuscateChar) {
9042 1
                    continue;
9043 1
                }
9044 1
9045 1
                ++$charsCounter;
9046
                $charKeyDone[$charKey] = true;
9047 1
9048 1
                if ($charsCounter > $charsMaxChange) {
9049 1
                    break;
9050 1
                }
9051
9052
                if (\in_array($char, $keepChars, true)) {
9053 1
                    continue;
9054 1
                }
9055
9056
                $chars[$charKey] = $obfuscateChar;
9057 1
            }
9058
        }
9059
9060
        $str = \implode('', $chars);
9061 1
9062 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9063
    }
9064 1
9065
    /**
9066
     * Returns a trimmed string in proper title case.
9067
     *
9068 1
     * Also accepts an array, $ignore, allowing you to list words not to be
9069 1
     * capitalized.
9070
     *
9071
     * Adapted from John Gruber's script.
9072 1
     *
9073
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9074
     *
9075
     * @param string $str
9076 1
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9077
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9078 1
     *
9079
     * @psalm-pure
9080
     *
9081
     * @return string
9082
     *                <p>The titleized string.</p>
9083
     */
9084
    public static function str_titleize_for_humans(
9085
        string $str,
9086
        array $ignore = [],
9087
        string $encoding = 'UTF-8'
9088
    ): string {
9089
        if ($str === '') {
9090
            return '';
9091
        }
9092
9093
        $small_words = [
9094
            '(?<!q&)a',
9095
            'an',
9096
            'and',
9097
            'as',
9098
            'at(?!&t)',
9099
            'but',
9100
            'by',
9101
            'en',
9102
            'for',
9103
            'if',
9104
            'in',
9105 35
            'of',
9106
            'on',
9107
            'or',
9108
            'the',
9109
            'to',
9110 35
            'v[.]?',
9111
            'via',
9112
            'vs[.]?',
9113
        ];
9114
9115
        if ($ignore !== []) {
9116
            $small_words = \array_merge($small_words, $ignore);
9117
        }
9118
9119
        $small_words_rx = \implode('|', $small_words);
9120
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9121
9122
        $str = \trim($str);
9123
9124
        if (!self::has_lowercase($str)) {
9125
            $str = self::strtolower($str, $encoding);
9126
        }
9127
9128
        // the main substitutions
9129
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9130
        $str = (string) \preg_replace_callback(
9131 35
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9132 1
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9133
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9134
                        |
9135 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9136 35
                        |
9137
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9138 35
                        |
9139
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9140 35
                      ) (_*) \\b                                                          # 6. With trailing underscore
9141 2
                    ~ux',
9142
            /**
9143
             * @param string[] $matches
9144
             *
9145
             * @psalm-pure
9146 35
             *
9147
             * @return string
9148
             */
9149 35
            static function (array $matches) use ($encoding): string {
9150
                // preserve leading underscore
9151 35
                $str = $matches[1];
9152
                if ($matches[2]) {
9153 35
                    // preserve URLs, domains, emails and file paths
9154
                    $str .= $matches[2];
9155 35
                } elseif ($matches[3]) {
9156
                    // lower-case small words
9157
                    $str .= self::strtolower($matches[3], $encoding);
9158
                } elseif ($matches[4]) {
9159
                    // capitalize word w/o internal caps
9160
                    $str .= static::ucfirst($matches[4], $encoding);
9161
                } else {
9162
                    // preserve other kinds of word (iPhone)
9163
                    $str .= $matches[5];
9164
                }
9165
                // preserve trailing underscore
9166
                $str .= $matches[6];
9167 35
9168 35
                return $str;
9169
            },
9170 5
            $str
9171 35
        );
9172
9173 25
        // Exceptions for small words: capitalize at start of title...
9174 35
        $str = (string) \preg_replace_callback(
9175
            '~(  \\A [[:punct:]]*            # start of title...
9176 34
                      |  [:.;?!][ ]+                # or of subsentence...
9177
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9178
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9179 7
                     ~uxi',
9180
            /**
9181
             * @param string[] $matches
9182 35
             *
9183
             * @psalm-pure
9184 35
             *
9185 35
             * @return string
9186 35
             */
9187
            static function (array $matches) use ($encoding): string {
9188
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9189
            },
9190 35
            $str
9191
        );
9192
9193
        // ...and end of title
9194 35
        $str = (string) \preg_replace_callback(
9195
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9196
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9197
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9198
                     ~uxi',
9199
            /**
9200
             * @param string[] $matches
9201
             *
9202
             * @psalm-pure
9203
             *
9204 11
             * @return string
9205 35
             */
9206 35
            static function (array $matches) use ($encoding): string {
9207
                return static::ucfirst($matches[1], $encoding);
9208
            },
9209
            $str
9210 35
        );
9211 35
9212
        // Exceptions for small words in hyphenated compound words.
9213
        // e.g. "in-flight" -> In-Flight
9214
        $str = (string) \preg_replace_callback(
9215
            '~\\b
9216
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9217
                        ( ' . $small_words_rx . ' )
9218
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9219
                       ~uxi',
9220
            /**
9221
             * @param string[] $matches
9222
             *
9223 3
             * @psalm-pure
9224 35
             *
9225 35
             * @return string
9226
             */
9227
            static function (array $matches) use ($encoding): string {
9228
                return static::ucfirst($matches[1], $encoding);
9229
            },
9230 35
            $str
9231
        );
9232
9233 35
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9234
        $str = (string) \preg_replace_callback(
9235
            '~\\b
9236
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9237
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9238
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9239
                      (?!	- )                 # Negative lookahead for another -
9240
                     ~uxi',
9241
            /**
9242
             * @param string[] $matches
9243
             *
9244
             * @psalm-pure
9245 35
             *
9246 35
             * @return string
9247
             */
9248
            static function (array $matches) use ($encoding): string {
9249
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9250 35
            },
9251
            $str
9252
        );
9253
9254 35
        return $str;
9255
    }
9256
9257
    /**
9258
     * Get a binary representation of a specific string.
9259
     *
9260
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9261
     *
9262
     * @param string $str <p>The input string.</p>
9263
     *
9264
     * @psalm-pure
9265
     *
9266 35
     * @return false|string
9267 35
     *                      <p>false on error</p>
9268
     */
9269
    public static function str_to_binary(string $str)
9270 35
    {
9271
        /** @var array|false $value - needed for PhpStan (stubs error) */
9272
        $value = \unpack('H*', $str);
9273
        if ($value === false) {
9274
            return false;
9275
        }
9276
9277
        /** @noinspection OffsetOperationsInspection */
9278
        return \base_convert($value[1], 16, 2);
9279
    }
9280
9281
    /**
9282
     * @param string   $str
9283
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9284
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9285
     *
9286
     * @psalm-pure
9287
     *
9288 2
     * @return string[]
9289 2
     */
9290
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9291
    {
9292
        if ($str === '') {
9293
            return $remove_empty_values ? [] : [''];
9294 2
        }
9295
9296
        if (self::$SUPPORT['mbstring'] === true) {
9297
            /** @noinspection PhpComposerExtensionStubsInspection */
9298
            $return = \mb_split("[\r\n]{1,2}", $str);
9299
        } else {
9300
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9301
        }
9302
9303
        if ($return === false) {
9304
            return $remove_empty_values ? [] : [''];
9305
        }
9306
9307
        if (
9308 17
            $remove_short_values === null
9309 1
            &&
9310
            !$remove_empty_values
9311
        ) {
9312 16
            return $return;
9313
        }
9314 16
9315
        return self::reduce_string_array(
9316
            $return,
9317
            $remove_empty_values,
9318
            $remove_short_values
9319 16
        );
9320
    }
9321
9322
    /**
9323
     * Convert a string into an array of words.
9324 16
     *
9325
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9326 16
     *
9327
     * @param string   $str
9328 16
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9329
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9330
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9331
     *
9332
     * @psalm-pure
9333
     *
9334
     * @return string[]
9335
     */
9336
    public static function str_to_words(
9337
        string $str,
9338
        string $char_list = '',
9339
        bool $remove_empty_values = false,
9340
        int $remove_short_values = null
9341
    ): array {
9342
        if ($str === '') {
9343
            return $remove_empty_values ? [] : [''];
9344
        }
9345
9346
        $char_list = self::rxClass($char_list, '\pL');
9347
9348
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9349
        if ($return === false) {
9350
            return $remove_empty_values ? [] : [''];
9351
        }
9352
9353
        if (
9354
            $remove_short_values === null
9355
            &&
9356
            !$remove_empty_values
9357
        ) {
9358 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9359 4
        }
9360
9361
        $tmp_return = self::reduce_string_array(
9362 13
            $return,
9363
            $remove_empty_values,
9364 13
            $remove_short_values
9365 13
        );
9366
9367
        foreach ($tmp_return as &$item) {
9368
            $item = (string) $item;
9369
        }
9370 13
9371
        return $tmp_return;
9372 13
    }
9373
9374 13
    /**
9375
     * alias for "UTF8::to_ascii()"
9376
     *
9377 2
     * @param string $str
9378 2
     * @param string $unknown
9379 2
     * @param bool   $strict
9380 2
     *
9381
     * @psalm-pure
9382
     *
9383 2
     * @return string
9384 2
     *
9385
     * @see        UTF8::to_ascii()
9386
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9387 2
     */
9388
    public static function str_transliterate(
9389
        string $str,
9390
        string $unknown = '?',
9391
        bool $strict = false
9392
    ): string {
9393
        return self::to_ascii($str, $unknown, $strict);
9394
    }
9395
9396
    /**
9397
     * Truncates the string to a given length. If $substring is provided, and
9398
     * truncating occurs, the string is further truncated so that the substring
9399
     * may be appended without exceeding the desired length.
9400
     *
9401
     * @param string $str
9402
     * @param int    $length    <p>Desired length of the truncated string.</p>
9403
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9404
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9405
     *
9406
     * @psalm-pure
9407
     *
9408
     * @return string
9409 7
     *                <p>A string after truncating.</p>
9410
     */
9411
    public static function str_truncate(
9412
        string $str,
9413
        int $length,
9414
        string $substring = '',
9415
        string $encoding = 'UTF-8'
9416
    ): string {
9417
        if ($str === '') {
9418
            return '';
9419
        }
9420
9421
        if ($encoding === 'UTF-8') {
9422
            if ($length >= (int) \mb_strlen($str)) {
9423
                return $str;
9424
            }
9425
9426
            if ($substring !== '') {
9427
                $length -= (int) \mb_strlen($substring);
9428
9429
                /** @noinspection UnnecessaryCastingInspection */
9430
                return (string) \mb_substr($str, 0, $length) . $substring;
9431
            }
9432
9433 22
            /** @noinspection UnnecessaryCastingInspection */
9434
            return (string) \mb_substr($str, 0, $length);
9435
        }
9436
9437 22
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9438 10
9439 2
        if ($length >= (int) self::strlen($str, $encoding)) {
9440
            return $str;
9441
        }
9442 8
9443 4
        if ($substring !== '') {
9444
            $length -= (int) self::strlen($substring, $encoding);
9445
        }
9446 4
9447
        return (
9448
               (string) self::substr(
9449
                   $str,
9450 4
                   0,
9451
                   $length,
9452
                   $encoding
9453 12
               )
9454
               ) . $substring;
9455 12
    }
9456 2
9457
    /**
9458
     * Truncates the string to a given length, while ensuring that it does not
9459 10
     * split words. If $substring is provided, and truncating occurs, the
9460 6
     * string is further truncated so that the substring may be appended without
9461
     * exceeding the desired length.
9462
     *
9463
     * @param string $str
9464 10
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9465 10
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9466 10
     *                                                       Default:
9467 10
     *                                                       ''</p>
9468 10
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9469
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9470 10
     *
9471
     * @psalm-pure
9472
     *
9473
     * @return string
9474
     *                <p>A string after truncating.</p>
9475
     */
9476
    public static function str_truncate_safe(
9477
        string $str,
9478
        int $length,
9479
        string $substring = '',
9480
        string $encoding = 'UTF-8',
9481
        bool $ignore_do_not_split_words_for_one_word = false
9482
    ): string {
9483
        if ($str === '' || $length <= 0) {
9484
            return $substring;
9485
        }
9486
9487
        if ($encoding === 'UTF-8') {
9488
            if ($length >= (int) \mb_strlen($str)) {
9489
                return $str;
9490
            }
9491
9492
            // need to further trim the string so we can append the substring
9493
            $length -= (int) \mb_strlen($substring);
9494
            if ($length <= 0) {
9495
                return $substring;
9496
            }
9497
9498
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9499 47
            $truncated = \mb_substr($str, 0, $length);
9500 1
            if ($truncated === false) {
9501
                return '';
9502
            }
9503 47
9504 21
            // if the last word was truncated
9505 5
            $space_position = \mb_strpos($str, ' ', $length - 1);
9506
            if ($space_position !== $length) {
9507
                // find pos of the last occurrence of a space, get up to that
9508
                $last_position = \mb_strrpos($truncated, ' ', 0);
9509 17
9510 17
                if (
9511 1
                    $last_position !== false
9512
                    ||
9513
                    (
9514
                        $space_position !== false
9515 17
                        &&
9516 17
                        !$ignore_do_not_split_words_for_one_word
9517
                    )
9518
                ) {
9519
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9520
                }
9521 17
            }
9522 17
        } else {
9523
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9524 13
9525
            if ($length >= (int) self::strlen($str, $encoding)) {
9526
                return $str;
9527 13
            }
9528
9529
            // need to further trim the string so we can append the substring
9530 3
            $length -= (int) self::strlen($substring, $encoding);
9531
            if ($length <= 0) {
9532 13
                return $substring;
9533
            }
9534
9535 17
            $truncated = self::substr($str, 0, $length, $encoding);
9536
9537
            if ($truncated === false) {
9538
                return '';
9539 26
            }
9540
9541 26
            // if the last word was truncated
9542 4
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9543
            if ($space_position !== $length) {
9544
                // find pos of the last occurrence of a space, get up to that
9545
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9546 22
9547 22
                if (
9548
                    $last_position !== false
9549
                    ||
9550
                    (
9551 22
                        $space_position !== false
9552
                        &&
9553 22
                        !$ignore_do_not_split_words_for_one_word
9554
                    )
9555
                ) {
9556
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9557
                }
9558 22
            }
9559 22
        }
9560
9561 12
        return $truncated . $substring;
9562
    }
9563
9564 12
    /**
9565
     * Returns a lowercase and trimmed string separated by underscores.
9566
     * Underscores are inserted before uppercase characters (with the exception
9567 4
     * of the first character of the string), and in place of spaces as well as
9568
     * dashes.
9569 12
     *
9570
     * @param string $str
9571
     *
9572 9
     * @psalm-pure
9573
     *
9574
     * @return string
9575
     *                <p>The underscored string.</p>
9576
     */
9577 39
    public static function str_underscored(string $str): string
9578
    {
9579
        return self::str_delimit($str, '_');
9580
    }
9581
9582
    /**
9583
     * Returns an UpperCamelCase version of the supplied string. It trims
9584
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9585
     * and underscores, and removes spaces, dashes, underscores.
9586
     *
9587
     * @param string      $str                           <p>The input string.</p>
9588
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9589
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9590
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9591
     *                                                   tr</p>
9592
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9593
     *                                                   -> ß</p>
9594
     *
9595 16
     * @psalm-pure
9596
     *
9597
     * @return string
9598
     *                <p>A string in UpperCamelCase.</p>
9599
     */
9600
    public static function str_upper_camelize(
9601
        string $str,
9602
        string $encoding = 'UTF-8',
9603
        bool $clean_utf8 = false,
9604
        string $lang = null,
9605
        bool $try_to_keep_the_string_length = false
9606
    ): string {
9607
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9608
    }
9609
9610
    /**
9611
     * alias for "UTF8::ucfirst()"
9612
     *
9613
     * @param string      $str
9614
     * @param string      $encoding
9615
     * @param bool        $clean_utf8
9616
     * @param string|null $lang
9617
     * @param bool        $try_to_keep_the_string_length
9618
     *
9619
     * @psalm-pure
9620
     *
9621
     * @return string
9622
     *
9623 13
     * @see        UTF8::ucfirst()
9624
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9625
     */
9626
    public static function str_upper_first(
9627
        string $str,
9628
        string $encoding = 'UTF-8',
9629
        bool $clean_utf8 = false,
9630
        string $lang = null,
9631
        bool $try_to_keep_the_string_length = false
9632
    ): string {
9633
        return self::ucfirst(
9634
            $str,
9635
            $encoding,
9636
            $clean_utf8,
9637
            $lang,
9638
            $try_to_keep_the_string_length
9639
        );
9640
    }
9641
9642
    /**
9643
     * Get the number of words in a specific string.
9644
     *
9645
     * EXAMPLES: <code>
9646
     * // format: 0 -> return only word count (int)
9647
     * //
9648
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9649 5
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9650 5
     *
9651 5
     * // format: 1 -> return words (array)
9652 5
     * //
9653 5
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9654 5
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9655
     *
9656
     * // format: 2 -> return words with offset (array)
9657
     * //
9658
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9659
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9660
     * </code>
9661
     *
9662
     * @param string $str       <p>The input string.</p>
9663
     * @param int    $format    [optional] <p>
9664
     *                          <strong>0</strong> => return a number of words (default)<br>
9665
     *                          <strong>1</strong> => return an array of words<br>
9666
     *                          <strong>2</strong> => return an array of words with word-offset as key
9667
     *                          </p>
9668
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9669
     *
9670
     * @psalm-pure
9671
     *
9672
     * @return int|string[]
9673
     *                      <p>The number of words in the string.</p>
9674
     */
9675
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9676
    {
9677
        $str_parts = self::str_to_words($str, $char_list);
9678
9679
        $len = \count($str_parts);
9680
9681
        if ($format === 1) {
9682
            $number_of_words = [];
9683
            for ($i = 1; $i < $len; $i += 2) {
9684
                $number_of_words[] = $str_parts[$i];
9685
            }
9686
        } elseif ($format === 2) {
9687
            $number_of_words = [];
9688
            $offset = (int) self::strlen($str_parts[0]);
9689
            for ($i = 1; $i < $len; $i += 2) {
9690
                $number_of_words[$offset] = $str_parts[$i];
9691
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9692
            }
9693 2
        } else {
9694
            $number_of_words = (int) (($len - 1) / 2);
9695 2
        }
9696
9697 2
        return $number_of_words;
9698 2
    }
9699 2
9700 2
    /**
9701
     * Case-insensitive string comparison.
9702 2
     *
9703 2
     * INFO: Case-insensitive version of UTF8::strcmp()
9704 2
     *
9705 2
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9706 2
     *
9707 2
     * @param string $str1     <p>The first string.</p>
9708
     * @param string $str2     <p>The second string.</p>
9709
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9710 2
     *
9711
     * @psalm-pure
9712
     *
9713 2
     * @return int
9714
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9715
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9716
     *             <strong>0</strong> if they are equal
9717
     */
9718
    public static function strcasecmp(
9719
        string $str1,
9720
        string $str2,
9721
        string $encoding = 'UTF-8'
9722
    ): int {
9723
        return self::strcmp(
9724
            self::strtocasefold(
9725
                $str1,
9726
                true,
9727
                false,
9728
                $encoding,
9729
                null,
9730
                false
9731
            ),
9732
            self::strtocasefold(
9733
                $str2,
9734
                true,
9735
                false,
9736
                $encoding,
9737
                null,
9738
                false
9739 23
            )
9740 23
        );
9741 23
    }
9742 23
9743 23
    /**
9744 23
     * alias for "UTF8::strstr()"
9745 23
     *
9746 23
     * @param string $haystack
9747
     * @param string $needle
9748 23
     * @param bool   $before_needle
9749 23
     * @param string $encoding
9750 23
     * @param bool   $clean_utf8
9751 23
     *
9752 23
     * @psalm-pure
9753 23
     *
9754 23
     * @return false|string
9755
     *
9756
     * @see        UTF8::strstr()
9757
     * @deprecated <p>please use "UTF8::strstr()"</p>
9758
     */
9759
    public static function strchr(
9760
        string $haystack,
9761
        string $needle,
9762
        bool $before_needle = false,
9763
        string $encoding = 'UTF-8',
9764
        bool $clean_utf8 = false
9765
    ) {
9766
        return self::strstr(
9767
            $haystack,
9768
            $needle,
9769
            $before_needle,
9770
            $encoding,
9771
            $clean_utf8
9772
        );
9773
    }
9774
9775
    /**
9776
     * Case-sensitive string comparison.
9777
     *
9778
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9779
     *
9780
     * @param string $str1 <p>The first string.</p>
9781
     * @param string $str2 <p>The second string.</p>
9782 2
     *
9783 2
     * @psalm-pure
9784 2
     *
9785 2
     * @return int
9786 2
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9787 2
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9788
     *             <strong>0</strong> if they are equal
9789
     */
9790
    public static function strcmp(string $str1, string $str2): int
9791
    {
9792
        if ($str1 === $str2) {
9793
            return 0;
9794
        }
9795
9796
        return \strcmp(
9797
            \Normalizer::normalize($str1, \Normalizer::NFD),
9798
            \Normalizer::normalize($str2, \Normalizer::NFD)
9799
        );
9800
    }
9801
9802
    /**
9803
     * Find length of initial segment not matching mask.
9804
     *
9805
     * @param string   $str
9806
     * @param string   $char_list
9807
     * @param int      $offset
9808 29
     * @param int|null $length
9809 21
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9810
     *
9811
     * @psalm-pure
9812 24
     *
9813 24
     * @return int
9814 24
     */
9815
    public static function strcspn(
9816
        string $str,
9817
        string $char_list,
9818
        int $offset = 0,
9819
        int $length = null,
9820
        string $encoding = 'UTF-8'
9821
    ): int {
9822
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9823
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9824
        }
9825
9826
        if ($char_list === '') {
9827
            return (int) self::strlen($str, $encoding);
9828
        }
9829
9830
        if ($offset || $length !== null) {
9831
            if ($encoding === 'UTF-8') {
9832
                if ($length === null) {
9833
                    $str_tmp = \mb_substr($str, $offset);
9834
                } else {
9835
                    $str_tmp = \mb_substr($str, $offset, $length);
9836
                }
9837
            } else {
9838 12
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9839
            }
9840
9841
            if ($str_tmp === false) {
9842 12
                return 0;
9843 2
            }
9844
9845
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9846 11
            $str = $str_tmp;
9847 3
        }
9848 3
9849 2
        if ($str === '') {
9850
            return 0;
9851 3
        }
9852
9853
        $matches = [];
9854
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9855
            $return = self::strlen($matches[1], $encoding);
9856
            if ($return === false) {
9857 3
                return 0;
9858
            }
9859
9860
            return $return;
9861
        }
9862 3
9863
        return (int) self::strlen($str, $encoding);
9864
    }
9865 11
9866 2
    /**
9867
     * alias for "UTF8::stristr()"
9868
     *
9869 10
     * @param string $haystack
9870 10
     * @param string $needle
9871 9
     * @param bool   $before_needle
9872 9
     * @param string $encoding
9873
     * @param bool   $clean_utf8
9874
     *
9875
     * @psalm-pure
9876 9
     *
9877
     * @return false|string
9878
     *
9879 2
     * @see        UTF8::stristr()
9880
     * @deprecated <p>please use "UTF8::stristr()"</p>
9881
     */
9882
    public static function strichr(
9883
        string $haystack,
9884
        string $needle,
9885
        bool $before_needle = false,
9886
        string $encoding = 'UTF-8',
9887
        bool $clean_utf8 = false
9888
    ) {
9889
        return self::stristr(
9890
            $haystack,
9891
            $needle,
9892
            $before_needle,
9893
            $encoding,
9894
            $clean_utf8
9895
        );
9896
    }
9897
9898
    /**
9899
     * Create a UTF-8 string from code points.
9900
     *
9901
     * INFO: opposite to UTF8::codepoints()
9902
     *
9903
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9904
     *
9905 1
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9906 1
     *
9907 1
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9908 1
     *
9909 1
     * @psalm-pure
9910 1
     *
9911
     * @return string
9912
     *                <p>A UTF-8 encoded string.</p>
9913
     */
9914
    public static function string($intOrHex): string
9915
    {
9916
        if ($intOrHex === []) {
9917
            return '';
9918
        }
9919
9920
        if (!\is_array($intOrHex)) {
9921
            $intOrHex = [$intOrHex];
9922
        }
9923
9924
        $str = '';
9925
        foreach ($intOrHex as $strPart) {
9926
            $str .= '&#' . (int) $strPart . ';';
9927
        }
9928
9929
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9930
    }
9931
9932 4
    /**
9933 4
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9934
     *
9935
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9936 4
     *
9937 1
     * @param string $str <p>The input string.</p>
9938
     *
9939
     * @psalm-pure
9940 4
     *
9941 4
     * @return bool
9942 4
     *              <p>
9943
     *              <strong>true</strong> if the string has BOM at the start,<br>
9944
     *              <strong>false</strong> otherwise
9945 4
     *              </p>
9946
     */
9947
    public static function string_has_bom(string $str): bool
9948
    {
9949
        /** @noinspection PhpUnusedLocalVariableInspection */
9950
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9951
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9952
                return true;
9953
            }
9954
        }
9955
9956
        return false;
9957
    }
9958
9959
    /**
9960
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9961
     *
9962
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9963
     *
9964
     * @see http://php.net/manual/en/function.strip-tags.php
9965
     *
9966 6
     * @param string      $str            <p>
9967 6
     *                                    The input string.
9968 6
     *                                    </p>
9969
     * @param string|null $allowable_tags [optional] <p>
9970
     *                                    You can use the optional second parameter to specify tags which should
9971
     *                                    not be stripped.
9972 6
     *                                    </p>
9973
     *                                    <p>
9974
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9975
     *                                    can not be changed with allowable_tags.
9976
     *                                    </p>
9977
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9978
     *
9979
     * @psalm-pure
9980
     *
9981
     * @return string
9982
     *                <p>The stripped string.</p>
9983
     */
9984
    public static function strip_tags(
9985
        string $str,
9986
        string $allowable_tags = null,
9987
        bool $clean_utf8 = false
9988
    ): string {
9989
        if ($str === '') {
9990
            return '';
9991
        }
9992
9993
        if ($clean_utf8) {
9994
            $str = self::clean($str);
9995
        }
9996
9997
        if ($allowable_tags === null) {
9998
            return \strip_tags($str);
9999
        }
10000
10001
        return \strip_tags($str, $allowable_tags);
10002
    }
10003
10004
    /**
10005 4
     * Strip all whitespace characters. This includes tabs and newline
10006 1
     * characters, as well as multibyte whitespace such as the thin space
10007
     * and ideographic space.
10008
     *
10009 4
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10010 2
     *
10011
     * @param string $str
10012
     *
10013 4
     * @psalm-pure
10014 4
     *
10015
     * @return string
10016
     */
10017 2
    public static function strip_whitespace(string $str): string
10018
    {
10019
        if ($str === '') {
10020
            return '';
10021
        }
10022
10023
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10024
    }
10025
10026
    /**
10027
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10028
     *
10029
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10030
     *
10031
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10032
     *
10033
     * @see http://php.net/manual/en/function.mb-stripos.php
10034
     *
10035 36
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10036 3
     * @param string $needle     <p>The string to find in haystack.</p>
10037
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10038
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10039 33
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10040
     *
10041
     * @psalm-pure
10042
     *
10043
     * @return false|int
10044
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10045
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10046
     */
10047
    public static function stripos(
10048
        string $haystack,
10049
        string $needle,
10050
        int $offset = 0,
10051
        string $encoding = 'UTF-8',
10052
        bool $clean_utf8 = false
10053
    ) {
10054
        if ($haystack === '') {
10055
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10056
                return 0;
10057
            }
10058
10059
            return false;
10060
        }
10061
10062
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10063
            return false;
10064
        }
10065
10066
        if ($clean_utf8) {
10067
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10068
            // if invalid characters are found in $haystack before $needle
10069
            $haystack = self::clean($haystack);
10070 25
            $needle = self::clean($needle);
10071 5
        }
10072
10073
        if (self::$SUPPORT['mbstring'] === true) {
10074
            if ($encoding === 'UTF-8') {
10075 5
                return \mb_stripos($haystack, $needle, $offset);
10076
            }
10077
10078 24
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10079 2
10080
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10081
        }
10082 24
10083
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10084
10085 1
        if (
10086 1
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10087
            &&
10088
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10089 24
            &&
10090 24
            self::$SUPPORT['intl'] === true
10091 24
        ) {
10092
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10093
            if ($return_tmp !== false) {
10094 2
                return $return_tmp;
10095
            }
10096 2
        }
10097
10098
        //
10099 2
        // fallback for ascii only
10100
        //
10101
10102 2
        if (ASCII::is_ascii($haystack . $needle)) {
10103
            return \stripos($haystack, $needle, $offset);
10104 2
        }
10105
10106 2
        //
10107
        // fallback via vanilla php
10108
        //
10109
10110
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10111
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10112
10113
        return self::strpos($haystack, $needle, $offset, $encoding);
10114
    }
10115
10116
    /**
10117
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10118 2
     *
10119 2
     * EXAMPLE: <code>
10120
     * $str = 'iñtërnâtiônàlizætiøn';
10121
     * $search = 'NÂT';
10122
     *
10123
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10124
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10125
     * </code>
10126 2
     *
10127 2
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10128
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10129 2
     * @param bool   $before_needle [optional] <p>
10130
     *                              If <b>TRUE</b>, it returns the part of the
10131
     *                              haystack before the first occurrence of the needle (excluding the needle).
10132
     *                              </p>
10133
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10134
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10135
     *
10136
     * @psalm-pure
10137
     *
10138
     * @return false|string
10139
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10140
     */
10141
    public static function stristr(
10142
        string $haystack,
10143
        string $needle,
10144
        bool $before_needle = false,
10145
        string $encoding = 'UTF-8',
10146
        bool $clean_utf8 = false
10147
    ) {
10148
        if ($haystack === '') {
10149
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10150
                return '';
10151
            }
10152
10153
            return false;
10154
        }
10155
10156
        if ($clean_utf8) {
10157
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10158
            // if invalid characters are found in $haystack before $needle
10159
            $needle = self::clean($needle);
10160
            $haystack = self::clean($haystack);
10161
        }
10162
10163
        if ($needle === '') {
10164 13
            if (\PHP_VERSION_ID >= 80000) {
10165 3
                return $haystack;
10166
            }
10167
10168
            return false;
10169 3
        }
10170
10171
        if (self::$SUPPORT['mbstring'] === true) {
10172 11
            if ($encoding === 'UTF-8') {
10173
                return \mb_stristr($haystack, $needle, $before_needle);
10174
            }
10175 1
10176 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10177
10178
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10179 11
        }
10180 2
10181
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10182
10183
        if (
10184 2
            $encoding !== 'UTF-8'
10185
            &&
10186
            self::$SUPPORT['mbstring'] === false
10187 10
        ) {
10188 10
            /**
10189 10
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10190
             */
10191
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10192 1
        }
10193
10194 1
        if (
10195
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10196
            &&
10197
            self::$SUPPORT['intl'] === true
10198
        ) {
10199
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10200
            if ($return_tmp !== false) {
10201
                return $return_tmp;
10202
            }
10203
        }
10204
10205
        if (ASCII::is_ascii($needle . $haystack)) {
10206
            return \stristr($haystack, $needle, $before_needle);
10207
        }
10208
10209
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10210
10211
        if (!isset($match[1])) {
10212
            return false;
10213
        }
10214
10215
        if ($before_needle) {
10216
            return $match[1];
10217
        }
10218
10219
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10220
    }
10221
10222
    /**
10223
     * Get the string length, not the byte-length!
10224
     *
10225
     * INFO: use UTF8::strwidth() for the char-length
10226
     *
10227
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10228
     *
10229
     * @see http://php.net/manual/en/function.mb-strlen.php
10230
     *
10231
     * @param string $str        <p>The string being checked for length.</p>
10232
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10233
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10234
     *
10235
     * @psalm-pure
10236
     *
10237
     * @return false|int
10238
     *                   <p>
10239
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10240
     *                   $encoding.
10241
     *                   (One multi-byte character counted as +1).
10242
     *                   <br>
10243
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10244
     *                   chars.
10245
     *                   </p>
10246
     */
10247
    public static function strlen(
10248
        string $str,
10249
        string $encoding = 'UTF-8',
10250
        bool $clean_utf8 = false
10251
    ) {
10252
        if ($str === '') {
10253
            return 0;
10254
        }
10255
10256
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10257
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10258
        }
10259
10260
        if ($clean_utf8) {
10261
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10262
            // if invalid characters are found in $str
10263
            $str = self::clean($str);
10264
        }
10265
10266
        //
10267
        // fallback via mbstring
10268 174
        //
10269 21
10270
        if (self::$SUPPORT['mbstring'] === true) {
10271
            if ($encoding === 'UTF-8') {
10272 172
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10273 12
                return @\mb_strlen($str);
10274
            }
10275
10276 172
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10277
            return @\mb_strlen($str, $encoding);
10278
        }
10279 5
10280
        //
10281
        // fallback for binary || ascii only
10282
        //
10283
10284
        if (
10285
            $encoding === 'CP850'
10286 172
            ||
10287 166
            $encoding === 'ASCII'
10288
        ) {
10289 166
            return \strlen($str);
10290
        }
10291
10292
        if (
10293 4
            $encoding !== 'UTF-8'
10294
            &&
10295
            self::$SUPPORT['mbstring'] === false
10296
            &&
10297
            self::$SUPPORT['iconv'] === false
10298
        ) {
10299
            /**
10300
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10301 8
             */
10302
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10303 8
        }
10304
10305
        //
10306
        // fallback via iconv
10307
        //
10308
10309 8
        if (self::$SUPPORT['iconv'] === true) {
10310
            $return_tmp = \iconv_strlen($str, $encoding);
10311 8
            if ($return_tmp !== false) {
10312
                return $return_tmp;
10313 8
            }
10314
        }
10315
10316
        //
10317
        // fallback via intl
10318 2
        //
10319
10320
        if (
10321
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10322
            &&
10323
            self::$SUPPORT['intl'] === true
10324
        ) {
10325 8
            $return_tmp = \grapheme_strlen($str);
10326
            if ($return_tmp !== null) {
10327
                return $return_tmp;
10328
            }
10329
        }
10330
10331
        //
10332
        // fallback for ascii only
10333
        //
10334
10335
        if (ASCII::is_ascii($str)) {
10336
            return \strlen($str);
10337 8
        }
10338
10339 8
        //
10340
        // fallback via vanilla php
10341
        //
10342
10343
        \preg_match_all('/./us', $str, $parts);
10344
10345
        $return_tmp = \count($parts[0]);
10346
        if ($return_tmp === 0) {
10347
            return false;
10348
        }
10349
10350
        return $return_tmp;
10351 8
    }
10352 4
10353
    /**
10354
     * Get string length in byte.
10355
     *
10356
     * @param string $str
10357
     *
10358
     * @psalm-pure
10359 8
     *
10360
     * @return int
10361 8
     */
10362 8
    public static function strlen_in_byte(string $str): int
10363
    {
10364
        if ($str === '') {
10365
            return 0;
10366 8
        }
10367
10368
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10369
            // "mb_" is available if overload is used, so use it ...
10370
            return \mb_strlen($str, 'CP850'); // 8-BIT
10371
        }
10372
10373
        return \strlen($str);
10374
    }
10375
10376
    /**
10377
     * Case-insensitive string comparisons using a "natural order" algorithm.
10378
     *
10379
     * INFO: natural order version of UTF8::strcasecmp()
10380 1
     *
10381
     * EXAMPLES: <code>
10382
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10383
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10384 1
     *
10385
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10386
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10387
     * </code>
10388
     *
10389 1
     * @param string $str1     <p>The first string.</p>
10390
     * @param string $str2     <p>The second string.</p>
10391
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10392
     *
10393
     * @psalm-pure
10394
     *
10395
     * @return int
10396
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10397
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10398
     *             <strong>0</strong> if they are equal
10399
     */
10400
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10401
    {
10402
        return self::strnatcmp(
10403
            self::strtocasefold($str1, true, false, $encoding, null, false),
10404
            self::strtocasefold($str2, true, false, $encoding, null, false)
10405
        );
10406
    }
10407
10408
    /**
10409
     * String comparisons using a "natural order" algorithm
10410
     *
10411
     * INFO: natural order version of UTF8::strcmp()
10412
     *
10413
     * EXAMPLES: <code>
10414
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10415
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10416
     *
10417
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10418 2
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10419 2
     * </code>
10420 2
     *
10421
     * @see http://php.net/manual/en/function.strnatcmp.php
10422
     *
10423
     * @param string $str1 <p>The first string.</p>
10424
     * @param string $str2 <p>The second string.</p>
10425
     *
10426
     * @psalm-pure
10427
     *
10428
     * @return int
10429
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10430
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10431
     *             <strong>0</strong> if they are equal
10432
     */
10433
    public static function strnatcmp(string $str1, string $str2): int
10434
    {
10435
        if ($str1 === $str2) {
10436
            return 0;
10437
        }
10438
10439
        return \strnatcmp(
10440
            (string) self::strtonatfold($str1),
10441
            (string) self::strtonatfold($str2)
10442
        );
10443
    }
10444
10445
    /**
10446
     * Case-insensitive string comparison of the first n characters.
10447
     *
10448
     * EXAMPLE: <code>
10449
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10450
     * </code>
10451 4
     *
10452 4
     * @see http://php.net/manual/en/function.strncasecmp.php
10453
     *
10454
     * @param string $str1     <p>The first string.</p>
10455 4
     * @param string $str2     <p>The second string.</p>
10456 4
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10457 4
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10458
     *
10459
     * @psalm-pure
10460
     *
10461
     * @return int
10462
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10463
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10464
     *             <strong>0</strong> if they are equal
10465
     */
10466
    public static function strncasecmp(
10467
        string $str1,
10468
        string $str2,
10469
        int $len,
10470
        string $encoding = 'UTF-8'
10471
    ): int {
10472
        return self::strncmp(
10473
            self::strtocasefold($str1, true, false, $encoding, null, false),
10474
            self::strtocasefold($str2, true, false, $encoding, null, false),
10475
            $len
10476
        );
10477
    }
10478
10479
    /**
10480
     * String comparison of the first n characters.
10481
     *
10482
     * EXAMPLE: <code>
10483
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10484
     * </code>
10485
     *
10486
     * @see http://php.net/manual/en/function.strncmp.php
10487
     *
10488 2
     * @param string $str1     <p>The first string.</p>
10489 2
     * @param string $str2     <p>The second string.</p>
10490 2
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10491 2
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10492
     *
10493
     * @psalm-pure
10494
     *
10495
     * @return int
10496
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10497
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10498
     *             <strong>0</strong> if they are equal
10499
     */
10500
    public static function strncmp(
10501
        string $str1,
10502
        string $str2,
10503
        int $len,
10504
        string $encoding = 'UTF-8'
10505
    ): int {
10506
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10507
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10508
        }
10509
10510
        if ($encoding === 'UTF-8') {
10511
            $str1 = (string) \mb_substr($str1, 0, $len);
10512
            $str2 = (string) \mb_substr($str2, 0, $len);
10513
        } else {
10514
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10515
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10516
        }
10517
10518
        return self::strcmp($str1, $str2);
10519
    }
10520
10521
    /**
10522 4
     * Search a string for any of a set of characters.
10523
     *
10524
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10525
     *
10526 4
     * @see http://php.net/manual/en/function.strpbrk.php
10527 4
     *
10528 4
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10529
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10530
     *
10531
     * @psalm-pure
10532
     *
10533
     * @return false|string
10534 4
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10535
     */
10536
    public static function strpbrk(string $haystack, string $char_list)
10537
    {
10538
        if ($haystack === '' || $char_list === '') {
10539
            return false;
10540
        }
10541
10542
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10543
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10544
        }
10545
10546
        return false;
10547
    }
10548
10549
    /**
10550
     * Find the position of the first occurrence of a substring in a string.
10551
     *
10552
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10553
     *
10554 2
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10555 2
     *
10556
     * @see http://php.net/manual/en/function.mb-strpos.php
10557
     *
10558 2
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10559 2
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10560
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10561
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10562 2
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10563
     *
10564
     * @psalm-pure
10565
     *
10566
     * @return false|int
10567
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10568
     *                   string.<br> If needle is not found it returns false.
10569
     */
10570
    public static function strpos(
10571
        string $haystack,
10572
        $needle,
10573
        int $offset = 0,
10574
        string $encoding = 'UTF-8',
10575
        bool $clean_utf8 = false
10576
    ) {
10577
        if ($haystack === '') {
10578
            if (\PHP_VERSION_ID >= 80000) {
10579
                if ($needle === '') {
10580
                    return 0;
10581
                }
10582
            } else {
10583
                return false;
10584
            }
10585
        }
10586
10587
        // iconv and mbstring do not support integer $needle
10588
        if ((int) $needle === $needle) {
10589
            $needle = (string) self::chr($needle);
10590
        }
10591
        $needle = (string) $needle;
10592
10593 52
        if ($haystack === '') {
10594 4
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10595
                return 0;
10596
            }
10597
10598
            return false;
10599 4
        }
10600
10601
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10602
            return false;
10603
        }
10604 51
10605
        if ($clean_utf8) {
10606
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10607 51
            // if invalid characters are found in $haystack before $needle
10608
            $needle = self::clean($needle);
10609 51
            $haystack = self::clean($haystack);
10610
        }
10611
10612
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10613
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10614
        }
10615
10616
        //
10617 51
        // fallback via mbstring
10618 2
        //
10619
10620
        if (self::$SUPPORT['mbstring'] === true) {
10621 51
            if ($encoding === 'UTF-8') {
10622
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10623
                return @\mb_strpos($haystack, $needle, $offset);
10624 3
            }
10625 3
10626
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10627
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10628 51
        }
10629 10
10630
        //
10631
        // fallback for binary || ascii only
10632
        //
10633
        if (
10634
            $encoding === 'CP850'
10635
            ||
10636 51
            $encoding === 'ASCII'
10637 49
        ) {
10638
            return \strpos($haystack, $needle, $offset);
10639 49
        }
10640
10641
        if (
10642
            $encoding !== 'UTF-8'
10643 2
            &&
10644
            self::$SUPPORT['iconv'] === false
10645
            &&
10646
            self::$SUPPORT['mbstring'] === false
10647
        ) {
10648
            /**
10649
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10650 4
             */
10651
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10652 4
        }
10653
10654 2
        //
10655
        // fallback via intl
10656
        //
10657
10658 4
        if (
10659
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10660 4
            &&
10661
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10662 4
            &&
10663
            self::$SUPPORT['intl'] === true
10664
        ) {
10665
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10666
            if ($return_tmp !== false) {
10667 2
                return $return_tmp;
10668
            }
10669
        }
10670
10671
        //
10672
        // fallback via iconv
10673
        //
10674
10675 4
        if (
10676
            $offset >= 0 // iconv_strpos() can't handle negative offset
10677 4
            &&
10678
            self::$SUPPORT['iconv'] === true
10679 4
        ) {
10680
            // ignore invalid negative offset to keep compatibility
10681
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10682
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10683
            if ($return_tmp !== false) {
10684
                return $return_tmp;
10685
            }
10686
        }
10687
10688
        //
10689
        // fallback for ascii only
10690
        //
10691
10692 4
        if (ASCII::is_ascii($haystack . $needle)) {
10693
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10694 4
            return @\strpos($haystack, $needle, $offset);
10695
        }
10696
10697
        //
10698
        // fallback via vanilla php
10699
        //
10700
10701
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10702
        if ($haystack_tmp === false) {
10703
            $haystack_tmp = '';
10704
        }
10705
        $haystack = (string) $haystack_tmp;
10706
10707
        if ($offset < 0) {
10708 4
            $offset = 0;
10709
        }
10710 2
10711
        $pos = \strpos($haystack, $needle);
10712
        if ($pos === false) {
10713
            return false;
10714
        }
10715
10716
        if ($pos) {
10717 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10718 4
        }
10719
10720
        return $offset + 0;
10721 4
    }
10722
10723 4
    /**
10724
     * Find the position of the first occurrence of a substring in a string.
10725
     *
10726
     * @param string $haystack <p>
10727 4
     *                         The string being checked.
10728 4
     *                         </p>
10729 3
     * @param string $needle   <p>
10730
     *                         The position counted from the beginning of haystack.
10731
     *                         </p>
10732 4
     * @param int    $offset   [optional] <p>
10733 4
     *                         The search offset. If it is not specified, 0 is used.
10734
     *                         </p>
10735
     *
10736 2
     * @psalm-pure
10737
     *
10738
     * @return false|int
10739
     *                   <p>The numeric position of the first occurrence of needle in the
10740
     *                   haystack string. If needle is not found, it returns false.</p>
10741
     */
10742
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10743
    {
10744
        if ($haystack === '' || $needle === '') {
10745
            return false;
10746
        }
10747
10748
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10749
            // "mb_" is available if overload is used, so use it ...
10750
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10751
        }
10752
10753
        return \strpos($haystack, $needle, $offset);
10754
    }
10755
10756
    /**
10757
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10758
     *
10759
     * @param string $haystack <p>
10760 2
     *                         The string being checked.
10761
     *                         </p>
10762
     * @param string $needle   <p>
10763
     *                         The position counted from the beginning of haystack.
10764 2
     *                         </p>
10765
     * @param int    $offset   [optional] <p>
10766
     *                         The search offset. If it is not specified, 0 is used.
10767
     *                         </p>
10768
     *
10769 2
     * @psalm-pure
10770
     *
10771
     * @return false|int
10772
     *                   <p>The numeric position of the first occurrence of needle in the
10773
     *                   haystack string. If needle is not found, it returns false.</p>
10774
     */
10775
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10776
    {
10777
        if ($haystack === '' || $needle === '') {
10778
            return false;
10779
        }
10780
10781
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10782
            // "mb_" is available if overload is used, so use it ...
10783
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10784
        }
10785
10786
        return \stripos($haystack, $needle, $offset);
10787
    }
10788
10789
    /**
10790
     * Find the last occurrence of a character in a string within another.
10791
     *
10792
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10793 2
     *
10794
     * @see http://php.net/manual/en/function.mb-strrchr.php
10795
     *
10796
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10797 2
     * @param string $needle        <p>The string to find in haystack</p>
10798
     * @param bool   $before_needle [optional] <p>
10799
     *                              Determines which portion of haystack
10800
     *                              this function returns.
10801
     *                              If set to true, it returns all of haystack
10802 2
     *                              from the beginning to the last occurrence of needle.
10803
     *                              If set to false, it returns all of haystack
10804
     *                              from the last occurrence of needle to the end,
10805
     *                              </p>
10806
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10807
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10808
     *
10809
     * @psalm-pure
10810
     *
10811
     * @return false|string
10812
     *                      <p>The portion of haystack or false if needle is not found.</p>
10813
     */
10814
    public static function strrchr(
10815
        string $haystack,
10816
        string $needle,
10817
        bool $before_needle = false,
10818
        string $encoding = 'UTF-8',
10819
        bool $clean_utf8 = false
10820
    ) {
10821
        if ($haystack === '' || $needle === '') {
10822
            return false;
10823
        }
10824
10825
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10826
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10827
        }
10828
10829
        if ($clean_utf8) {
10830
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10831
            // if invalid characters are found in $haystack before $needle
10832
            $needle = self::clean($needle);
10833
            $haystack = self::clean($haystack);
10834
        }
10835
10836
        //
10837 2
        // fallback via mbstring
10838 2
        //
10839
10840
        if (self::$SUPPORT['mbstring'] === true) {
10841 2
            if ($encoding === 'UTF-8') {
10842 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10843
            }
10844
10845 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10846
        }
10847
10848 2
        //
10849 2
        // fallback for binary || ascii only
10850
        //
10851
10852
        if (
10853
            !$before_needle
10854
            &&
10855
            (
10856 2
                $encoding === 'CP850'
10857 2
                ||
10858 2
                $encoding === 'ASCII'
10859
            )
10860
        ) {
10861 2
            return \strrchr($haystack, $needle);
10862
        }
10863
10864
        if (
10865
            $encoding !== 'UTF-8'
10866
            &&
10867
            self::$SUPPORT['mbstring'] === false
10868
        ) {
10869
            /**
10870
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10871
             */
10872
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10873
        }
10874
10875
        //
10876
        // fallback via iconv
10877
        //
10878
10879
        if (self::$SUPPORT['iconv'] === true) {
10880
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10881
            if ($needle_tmp === false) {
10882
                return false;
10883
            }
10884
            $needle = (string) $needle_tmp;
10885
10886
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10887
            if ($pos === false) {
10888
                return false;
10889
            }
10890
10891
            if ($before_needle) {
10892
                return self::substr($haystack, 0, $pos, $encoding);
10893
            }
10894
10895
            return self::substr($haystack, $pos, null, $encoding);
10896
        }
10897
10898
        //
10899
        // fallback via vanilla php
10900
        //
10901
10902
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10903
        if ($needle_tmp === false) {
10904
            return false;
10905
        }
10906
        $needle = (string) $needle_tmp;
10907
10908
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10909
        if ($pos === false) {
10910
            return false;
10911
        }
10912
10913
        if ($before_needle) {
10914
            return self::substr($haystack, 0, $pos, $encoding);
10915
        }
10916
10917
        return self::substr($haystack, $pos, null, $encoding);
10918
    }
10919
10920
    /**
10921
     * Reverses characters order in the string.
10922
     *
10923
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10924
     *
10925
     * @param string $str      <p>The input string.</p>
10926
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10927
     *
10928
     * @psalm-pure
10929
     *
10930
     * @return string
10931
     *                <p>The string with characters in the reverse sequence.</p>
10932
     */
10933
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10934
    {
10935
        if ($str === '') {
10936
            return '';
10937
        }
10938
10939
        // init
10940
        $reversed = '';
10941
10942
        $str = self::emoji_encode($str, true);
10943
10944
        if ($encoding === 'UTF-8') {
10945
            if (self::$SUPPORT['intl'] === true) {
10946
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10947
                $i = (int) \grapheme_strlen($str);
10948
                while ($i--) {
10949
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10950
                    if ($reversed_tmp !== false) {
10951 10
                        $reversed .= $reversed_tmp;
10952 4
                    }
10953
                }
10954
            } else {
10955
                $i = (int) \mb_strlen($str);
10956 8
                while ($i--) {
10957
                    $reversed_tmp = \mb_substr($str, $i, 1);
10958 8
                    if ($reversed_tmp !== false) {
10959
                        $reversed .= $reversed_tmp;
10960 8
                    }
10961 8
                }
10962
            }
10963 8
        } else {
10964 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10965 8
10966 8
            $i = (int) self::strlen($str, $encoding);
10967 8
            while ($i--) {
10968
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10969
                if ($reversed_tmp !== false) {
10970
                    $reversed .= $reversed_tmp;
10971
                }
10972 8
            }
10973
        }
10974
10975
        return self::emoji_decode($reversed, true);
10976
    }
10977
10978
    /**
10979
     * Find the last occurrence of a character in a string within another, case-insensitive.
10980
     *
10981
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10982
     *
10983
     * @see http://php.net/manual/en/function.mb-strrichr.php
10984
     *
10985
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10986
     * @param string $needle        <p>The string to find in haystack.</p>
10987
     * @param bool   $before_needle [optional] <p>
10988
     *                              Determines which portion of haystack
10989
     *                              this function returns.
10990
     *                              If set to true, it returns all of haystack
10991 8
     *                              from the beginning to the last occurrence of needle.
10992
     *                              If set to false, it returns all of haystack
10993
     *                              from the last occurrence of needle to the end,
10994
     *                              </p>
10995
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10996
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10997
     *
10998
     * @psalm-pure
10999
     *
11000
     * @return false|string
11001
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
11002
     */
11003
    public static function strrichr(
11004
        string $haystack,
11005
        string $needle,
11006
        bool $before_needle = false,
11007
        string $encoding = 'UTF-8',
11008
        bool $clean_utf8 = false
11009
    ) {
11010
        if ($haystack === '' || $needle === '') {
11011
            return false;
11012
        }
11013
11014
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11015
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11016
        }
11017
11018
        if ($clean_utf8) {
11019
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11020
            // if invalid characters are found in $haystack before $needle
11021
            $needle = self::clean($needle);
11022
            $haystack = self::clean($haystack);
11023
        }
11024
11025
        //
11026 3
        // fallback via mbstring
11027 2
        //
11028
11029
        if (self::$SUPPORT['mbstring'] === true) {
11030 3
            if ($encoding === 'UTF-8') {
11031 2
                return \mb_strrichr($haystack, $needle, $before_needle);
11032
            }
11033
11034 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11035
        }
11036
11037 2
        //
11038 2
        // fallback via vanilla php
11039
        //
11040
11041
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11042
        if ($needle_tmp === false) {
11043
            return false;
11044
        }
11045 3
        $needle = (string) $needle_tmp;
11046 3
11047 3
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11048
        if ($pos === false) {
11049
            return false;
11050 2
        }
11051
11052
        if ($before_needle) {
11053
            return self::substr($haystack, 0, $pos, $encoding);
11054
        }
11055
11056
        return self::substr($haystack, $pos, null, $encoding);
11057
    }
11058
11059
    /**
11060
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11061
     *
11062
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11063
     *
11064
     * @param string     $haystack   <p>The string to look in.</p>
11065
     * @param int|string $needle     <p>The string to look for.</p>
11066
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11067
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11068
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11069
     *
11070
     * @psalm-pure
11071
     *
11072
     * @return false|int
11073
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11074
     *                   string.<br>If needle is not found, it returns false.</p>
11075
     */
11076
    public static function strripos(
11077
        string $haystack,
11078
        $needle,
11079
        int $offset = 0,
11080
        string $encoding = 'UTF-8',
11081
        bool $clean_utf8 = false
11082
    ) {
11083
        if ($haystack === '') {
11084
            if (\PHP_VERSION_ID >= 80000) {
11085
                if ($needle === '') {
11086
                    return 0;
11087
                }
11088
            } else {
11089
                return false;
11090
            }
11091
        }
11092
11093
        // iconv and mbstring do not support integer $needle
11094
        if ((int) $needle === $needle && $needle >= 0) {
11095
            $needle = (string) self::chr($needle);
11096
        }
11097
        $needle = (string) $needle;
11098
11099 14
        if ($haystack === '') {
11100 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11101
                return 0;
11102
            }
11103
11104
            return false;
11105 3
        }
11106
11107
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11108
            return false;
11109
        }
11110 14
11111
        if ($clean_utf8) {
11112
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11113 14
            $needle = self::clean($needle);
11114
            $haystack = self::clean($haystack);
11115 14
        }
11116
11117
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11118
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11119
        }
11120
11121
        //
11122
        // fallback via mbstrig
11123 14
        //
11124 3
11125
        if (self::$SUPPORT['mbstring'] === true) {
11126
            if ($encoding === 'UTF-8') {
11127 14
                return \mb_strripos($haystack, $needle, $offset);
11128
            }
11129 3
11130 3
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11131
        }
11132
11133 14
        //
11134 9
        // fallback for binary || ascii only
11135
        //
11136
11137
        if (
11138
            $encoding === 'CP850'
11139
            ||
11140
            $encoding === 'ASCII'
11141 14
        ) {
11142 14
            return \strripos($haystack, $needle, $offset);
11143 14
        }
11144
11145
        if (
11146
            $encoding !== 'UTF-8'
11147
            &&
11148
            self::$SUPPORT['mbstring'] === false
11149
        ) {
11150
            /**
11151
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11152
             */
11153
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11154
        }
11155
11156
        //
11157
        // fallback via intl
11158
        //
11159
11160
        if (
11161
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11162
            &&
11163
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11164
            &&
11165
            self::$SUPPORT['intl'] === true
11166
        ) {
11167
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11168
            if ($return_tmp !== false) {
11169
                return $return_tmp;
11170
            }
11171
        }
11172
11173
        //
11174
        // fallback for ascii only
11175
        //
11176
11177
        if (ASCII::is_ascii($haystack . $needle)) {
11178
            return \strripos($haystack, $needle, $offset);
11179
        }
11180
11181
        //
11182
        // fallback via vanilla php
11183
        //
11184
11185
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11186
        $needle = self::strtocasefold($needle, true, false, $encoding);
11187
11188
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11189
    }
11190
11191
    /**
11192
     * Finds position of last occurrence of a string within another, case-insensitive.
11193
     *
11194
     * @param string $haystack <p>
11195
     *                         The string from which to get the position of the last occurrence
11196
     *                         of needle.
11197
     *                         </p>
11198
     * @param string $needle   <p>
11199
     *                         The string to find in haystack.
11200
     *                         </p>
11201
     * @param int    $offset   [optional] <p>
11202
     *                         The position in haystack
11203
     *                         to start searching.
11204
     *                         </p>
11205
     *
11206
     * @psalm-pure
11207
     *
11208
     * @return false|int
11209
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11210
     *                   haystack string, or false if needle is not found.</p>
11211
     */
11212
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11213
    {
11214
        if ($haystack === '' || $needle === '') {
11215
            return false;
11216
        }
11217
11218
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11219
            // "mb_" is available if overload is used, so use it ...
11220
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11221
        }
11222
11223
        return \strripos($haystack, $needle, $offset);
11224
    }
11225
11226
    /**
11227
     * Find the position of the last occurrence of a substring in a string.
11228
     *
11229
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11230 2
     *
11231
     * @see http://php.net/manual/en/function.mb-strrpos.php
11232
     *
11233
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11234 2
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11235
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11236
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11237
     *                               the end of the string.
11238
     *                               </p>
11239 2
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11240
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11241
     *
11242
     * @psalm-pure
11243
     *
11244
     * @return false|int
11245
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11246
     *                   string.<br>If needle is not found, it returns false.</p>
11247
     */
11248
    public static function strrpos(
11249
        string $haystack,
11250
        $needle,
11251
        int $offset = 0,
11252
        string $encoding = 'UTF-8',
11253
        bool $clean_utf8 = false
11254
    ) {
11255
        if ($haystack === '') {
11256
            if (\PHP_VERSION_ID >= 80000) {
11257
                if ($needle === '') {
11258
                    return 0;
11259
                }
11260
            } else {
11261
                return false;
11262
            }
11263
        }
11264
11265
        // iconv and mbstring do not support integer $needle
11266
        if ((int) $needle === $needle && $needle >= 0) {
11267
            $needle = (string) self::chr($needle);
11268
        }
11269
        $needle = (string) $needle;
11270
11271 35
        if ($haystack === '') {
11272 4
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11273
                return 0;
11274
            }
11275
11276
            return false;
11277 4
        }
11278
11279
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11280
            return false;
11281
        }
11282 34
11283 1
        if ($clean_utf8) {
11284
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11285 34
            $needle = self::clean($needle);
11286
            $haystack = self::clean($haystack);
11287 34
        }
11288
11289
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11290
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11291
        }
11292
11293
        //
11294
        // fallback via mbstring
11295 34
        //
11296 2
11297
        if (self::$SUPPORT['mbstring'] === true) {
11298
            if ($encoding === 'UTF-8') {
11299 34
                return \mb_strrpos($haystack, $needle, $offset);
11300
            }
11301 4
11302 4
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11303
        }
11304
11305 34
        //
11306 8
        // fallback for binary || ascii only
11307
        //
11308
11309
        if (
11310
            $encoding === 'CP850'
11311
            ||
11312
            $encoding === 'ASCII'
11313 34
        ) {
11314 34
            return \strrpos($haystack, $needle, $offset);
11315 34
        }
11316
11317
        if (
11318 2
            $encoding !== 'UTF-8'
11319
            &&
11320
            self::$SUPPORT['mbstring'] === false
11321
        ) {
11322
            /**
11323
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11324
             */
11325
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11326
        }
11327
11328
        //
11329
        // fallback via intl
11330
        //
11331
11332
        if (
11333
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11334
            &&
11335
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11336
            &&
11337
            self::$SUPPORT['intl'] === true
11338
        ) {
11339
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11340
            if ($return_tmp !== false) {
11341
                return $return_tmp;
11342
            }
11343
        }
11344
11345
        //
11346
        // fallback for ascii only
11347
        //
11348
11349
        if (ASCII::is_ascii($haystack . $needle)) {
11350
            return \strrpos($haystack, $needle, $offset);
11351
        }
11352
11353
        //
11354
        // fallback via vanilla php
11355
        //
11356
11357
        $haystack_tmp = null;
11358
        if ($offset > 0) {
11359
            $haystack_tmp = self::substr($haystack, $offset);
11360
        } elseif ($offset < 0) {
11361
            $haystack_tmp = self::substr($haystack, 0, $offset);
11362
            $offset = 0;
11363
        }
11364
11365
        if ($haystack_tmp !== null) {
11366
            if ($haystack_tmp === false) {
11367
                $haystack_tmp = '';
11368
            }
11369
            $haystack = (string) $haystack_tmp;
11370
        }
11371
11372
        $pos = \strrpos($haystack, $needle);
11373
        if ($pos === false) {
11374
            return false;
11375
        }
11376
11377
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11378
        $str_tmp = \substr($haystack, 0, $pos);
11379
        if ($str_tmp === false) {
11380
            return false;
11381
        }
11382
11383
        return $offset + (int) self::strlen($str_tmp);
11384
    }
11385
11386
    /**
11387
     * Find the position of the last occurrence of a substring in a string.
11388
     *
11389
     * @param string $haystack <p>
11390
     *                         The string being checked, for the last occurrence
11391
     *                         of needle.
11392
     *                         </p>
11393
     * @param string $needle   <p>
11394
     *                         The string to find in haystack.
11395
     *                         </p>
11396
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11397
     *                         the string. Negative values will stop searching at an arbitrary point
11398
     *                         prior to the end of the string.
11399
     *                         </p>
11400
     *
11401
     * @psalm-pure
11402
     *
11403
     * @return false|int
11404
     *                   <p>The numeric position of the last occurrence of needle in the
11405
     *                   haystack string. If needle is not found, it returns false.</p>
11406
     */
11407
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11408
    {
11409
        if ($haystack === '' || $needle === '') {
11410
            return false;
11411
        }
11412
11413
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11414
            // "mb_" is available if overload is used, so use it ...
11415
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11416
        }
11417
11418
        return \strrpos($haystack, $needle, $offset);
11419
    }
11420
11421
    /**
11422
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11423
     * mask.
11424
     *
11425 2
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11426
     *
11427
     * @param string   $str      <p>The input string.</p>
11428
     * @param string   $mask     <p>The mask of chars</p>
11429 2
     * @param int      $offset   [optional]
11430
     * @param int|null $length   [optional]
11431
     * @param string   $encoding [optional] <p>Set the charset.</p>
11432
     *
11433
     * @psalm-pure
11434 2
     *
11435
     * @return false|int
11436
     */
11437
    public static function strspn(
11438
        string $str,
11439
        string $mask,
11440
        int $offset = 0,
11441
        int $length = null,
11442
        string $encoding = 'UTF-8'
11443
    ) {
11444
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11445
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11446
        }
11447
11448
        if ($offset || $length !== null) {
11449
            if ($encoding === 'UTF-8') {
11450
                if ($length === null) {
11451
                    $str = (string) \mb_substr($str, $offset);
11452
                } else {
11453
                    $str = (string) \mb_substr($str, $offset, $length);
11454
                }
11455
            } else {
11456
                $str = (string) self::substr($str, $offset, $length, $encoding);
11457
            }
11458
        }
11459
11460 10
        if ($str === '' || $mask === '') {
11461
            return 0;
11462
        }
11463
11464 10
        $matches = [];
11465 2
11466 2
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11467
    }
11468
11469 2
    /**
11470
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11471
     *
11472
     * EXAMPLE: <code>
11473
     * $str = 'iñtërnâtiônàlizætiøn';
11474
     * $search = 'nât';
11475
     *
11476 10
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11477 2
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11478
     * </code>
11479
     *
11480 8
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11481
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11482 8
     * @param bool   $before_needle [optional] <p>
11483
     *                              If <b>TRUE</b>, strstr() returns the part of the
11484
     *                              haystack before the first occurrence of the needle (excluding the needle).
11485
     *                              </p>
11486
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11487
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11488
     *
11489
     * @psalm-pure
11490
     *
11491
     * @return false|string
11492
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11493
     */
11494
    public static function strstr(
11495
        string $haystack,
11496
        string $needle,
11497
        bool $before_needle = false,
11498
        string $encoding = 'UTF-8',
11499
        bool $clean_utf8 = false
11500
    ) {
11501
        if ($haystack === '') {
11502
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11503
                return '';
11504
            }
11505
11506
            return false;
11507
        }
11508
11509
        if ($clean_utf8) {
11510
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11511
            // if invalid characters are found in $haystack before $needle
11512
            $needle = self::clean($needle);
11513
            $haystack = self::clean($haystack);
11514
        }
11515
11516
        if ($needle === '') {
11517 3
            if (\PHP_VERSION_ID >= 80000) {
11518 2
                return $haystack;
11519
            }
11520
11521
            return false;
11522 2
        }
11523
11524
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11525 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11526
        }
11527
11528
        //
11529
        // fallback via mbstring
11530
        //
11531
11532 3
        if (self::$SUPPORT['mbstring'] === true) {
11533 1
            if ($encoding === 'UTF-8') {
11534
                return \mb_strstr($haystack, $needle, $before_needle);
11535
            }
11536
11537 1
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11538
        }
11539
11540 3
        //
11541 2
        // fallback for binary || ascii only
11542
        //
11543
11544
        if (
11545
            $encoding === 'CP850'
11546
            ||
11547
            $encoding === 'ASCII'
11548 3
        ) {
11549 3
            return \strstr($haystack, $needle, $before_needle);
11550 3
        }
11551
11552
        if (
11553 2
            $encoding !== 'UTF-8'
11554
            &&
11555
            self::$SUPPORT['mbstring'] === false
11556
        ) {
11557
            /**
11558
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11559
             */
11560
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11561
        }
11562
11563
        //
11564
        // fallback via intl
11565
        //
11566
11567
        if (
11568
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11569
            &&
11570
            self::$SUPPORT['intl'] === true
11571
        ) {
11572
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11573
            if ($return_tmp !== false) {
11574
                return $return_tmp;
11575
            }
11576
        }
11577
11578
        //
11579
        // fallback for ascii only
11580
        //
11581
11582
        if (ASCII::is_ascii($haystack . $needle)) {
11583
            return \strstr($haystack, $needle, $before_needle);
11584
        }
11585
11586
        //
11587
        // fallback via vanilla php
11588
        //
11589
11590
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11591
11592
        if (!isset($match[1])) {
11593
            return false;
11594
        }
11595
11596
        if ($before_needle) {
11597
            return $match[1];
11598
        }
11599
11600
        return self::substr($haystack, (int) self::strlen($match[1]));
11601
    }
11602
11603
    /**
11604
     * Finds first occurrence of a string within another.
11605
     *
11606
     * @param string $haystack      <p>
11607
     *                              The string from which to get the first occurrence
11608
     *                              of needle.
11609
     *                              </p>
11610
     * @param string $needle        <p>
11611
     *                              The string to find in haystack.
11612
     *                              </p>
11613
     * @param bool   $before_needle [optional] <p>
11614
     *                              Determines which portion of haystack
11615
     *                              this function returns.
11616
     *                              If set to true, it returns all of haystack
11617
     *                              from the beginning to the first occurrence of needle.
11618
     *                              If set to false, it returns all of haystack
11619
     *                              from the first occurrence of needle to the end,
11620
     *                              </p>
11621
     *
11622
     * @psalm-pure
11623
     *
11624
     * @return false|string
11625
     *                      <p>The portion of haystack,
11626
     *                      or false if needle is not found.</p>
11627
     */
11628
    public static function strstr_in_byte(
11629
        string $haystack,
11630
        string $needle,
11631
        bool $before_needle = false
11632
    ) {
11633
        if ($haystack === '' || $needle === '') {
11634
            return false;
11635
        }
11636
11637
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11638
            // "mb_" is available if overload is used, so use it ...
11639
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11640
        }
11641
11642
        return \strstr($haystack, $needle, $before_needle);
11643
    }
11644
11645
    /**
11646
     * Unicode transformation for case-less matching.
11647
     *
11648
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11649 2
     *
11650
     * @see http://unicode.org/reports/tr21/tr21-5.html
11651
     *
11652
     * @param string      $str        <p>The input string.</p>
11653 2
     * @param bool        $full       [optional] <p>
11654
     *                                <b>true</b>, replace full case folding chars (default)<br>
11655
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11656
     *                                </p>
11657
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11658 2
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11659
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11660
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11661
     *                                is for some languages better ...</p>
11662
     *
11663
     * @psalm-pure
11664
     *
11665
     * @return string
11666
     */
11667
    public static function strtocasefold(
11668
        string $str,
11669
        bool $full = true,
11670
        bool $clean_utf8 = false,
11671
        string $encoding = 'UTF-8',
11672
        string $lang = null,
11673
        bool $lower = true
11674
    ): string {
11675
        if ($str === '') {
11676
            return '';
11677
        }
11678
11679
        if ($clean_utf8) {
11680
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11681
            // if invalid characters are found in $haystack before $needle
11682
            $str = self::clean($str);
11683
        }
11684
11685
        $str = self::fixStrCaseHelper($str, $lower, $full);
11686
11687
        if ($lang === null && $encoding === 'UTF-8') {
11688
            if ($lower) {
11689
                return \mb_strtolower($str);
11690
            }
11691 32
11692 5
            return \mb_strtoupper($str);
11693
        }
11694
11695 31
        if ($lower) {
11696
            return self::strtolower($str, $encoding, false, $lang);
11697
        }
11698 2
11699
        return self::strtoupper($str, $encoding, false, $lang);
11700
    }
11701 31
11702
    /**
11703 31
     * Make a string lowercase.
11704 31
     *
11705 2
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11706
     *
11707
     * @see http://php.net/manual/en/function.mb-strtolower.php
11708 29
     *
11709
     * @param string      $str                           <p>The string being lowercased.</p>
11710
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11711 2
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11712
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11713
     *                                                   tr</p>
11714
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11715 2
     *                                                   -> ß</p>
11716
     *
11717
     * @psalm-pure
11718
     *
11719
     * @return string
11720
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11721
     */
11722
    public static function strtolower(
11723
        $str,
11724
        string $encoding = 'UTF-8',
11725
        bool $clean_utf8 = false,
11726
        string $lang = null,
11727
        bool $try_to_keep_the_string_length = false
11728
    ): string {
11729
        // init
11730
        $str = (string) $str;
11731
11732
        if ($str === '') {
11733
            return '';
11734
        }
11735
11736
        if ($clean_utf8) {
11737
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11738
            // if invalid characters are found in $haystack before $needle
11739
            $str = self::clean($str);
11740
        }
11741
11742
        // hack for old php version or for the polyfill ...
11743
        if ($try_to_keep_the_string_length) {
11744
            $str = self::fixStrCaseHelper($str, true);
11745
        }
11746 73
11747
        if ($lang === null && $encoding === 'UTF-8') {
11748 73
            return \mb_strtolower($str);
11749 1
        }
11750
11751
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11752 72
11753
        if ($lang !== null) {
11754
            if (self::$SUPPORT['intl'] === true) {
11755 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11756
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11757
                }
11758
11759 72
                $language_code = $lang . '-Lower';
11760
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11761
                    /**
11762
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11763 72
                     */
11764 13
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11765
11766
                    $language_code = 'Any-Lower';
11767 61
                }
11768
11769 61
                /** @noinspection PhpComposerExtensionStubsInspection */
11770 2
                /** @noinspection UnnecessaryCastingInspection */
11771 2
                return (string) \transliterator_transliterate($language_code, $str);
11772
            }
11773
11774
            /**
11775 2
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11776 2
             */
11777
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11778
        }
11779
11780
        // always fallback via symfony polyfill
11781
        return \mb_strtolower($str, $encoding);
11782
    }
11783
11784
    /**
11785
     * Make a string uppercase.
11786
     *
11787 2
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11788
     *
11789
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11790
     *
11791
     * @param string      $str                           <p>The string being uppercased.</p>
11792
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11793
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11794
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11795
     *                                                   tr</p>
11796
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11797 61
     *                                                   -> ß</p>
11798
     *
11799
     * @psalm-pure
11800
     *
11801
     * @return string
11802
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11803
     */
11804
    public static function strtoupper(
11805
        $str,
11806
        string $encoding = 'UTF-8',
11807
        bool $clean_utf8 = false,
11808
        string $lang = null,
11809
        bool $try_to_keep_the_string_length = false
11810
    ): string {
11811
        // init
11812
        $str = (string) $str;
11813
11814
        if ($str === '') {
11815
            return '';
11816
        }
11817
11818
        if ($clean_utf8) {
11819
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11820
            // if invalid characters are found in $haystack before $needle
11821
            $str = self::clean($str);
11822
        }
11823
11824
        // hack for old php version or for the polyfill ...
11825
        if ($try_to_keep_the_string_length) {
11826
            $str = self::fixStrCaseHelper($str);
11827
        }
11828 17
11829
        if ($lang === null && $encoding === 'UTF-8') {
11830 17
            return \mb_strtoupper($str);
11831 1
        }
11832
11833
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11834 16
11835
        if ($lang !== null) {
11836
            if (self::$SUPPORT['intl'] === true) {
11837 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11838
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11839
                }
11840
11841 16
                $language_code = $lang . '-Upper';
11842 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11843
                    /**
11844
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11845 16
                     */
11846 8
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11847
11848
                    $language_code = 'Any-Upper';
11849 10
                }
11850
11851 10
                /** @noinspection PhpComposerExtensionStubsInspection */
11852 2
                /** @noinspection UnnecessaryCastingInspection */
11853 2
                return (string) \transliterator_transliterate($language_code, $str);
11854
            }
11855
11856
            /**
11857 2
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11858 2
             */
11859
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11860
        }
11861
11862
        // always fallback via symfony polyfill
11863
        return \mb_strtoupper($str, $encoding);
11864
    }
11865
11866
    /**
11867
     * Translate characters or replace sub-strings.
11868
     *
11869 2
     * EXAMPLE:
11870
     * <code>
11871
     * $array = [
11872
     *     'Hello'   => '○●◎',
11873
     *     '中文空白' => 'earth',
11874
     * ];
11875
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11876
     * </code>
11877
     *
11878
     * @see http://php.net/manual/en/function.strtr.php
11879 10
     *
11880
     * @param string          $str  <p>The string being translated.</p>
11881
     * @param string|string[] $from <p>The string replacing from.</p>
11882
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11883
     *
11884
     * @psalm-pure
11885
     *
11886
     * @return string
11887
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11888
     *                to the corresponding character in "to".</p>
11889
     */
11890
    public static function strtr(string $str, $from, $to = ''): string
11891
    {
11892
        if ($str === '') {
11893
            return '';
11894
        }
11895
11896
        if ($from === $to) {
11897
            return $str;
11898
        }
11899
11900
        if ($to !== '') {
11901
            if (!\is_array($from)) {
11902
                $from = self::str_split($from);
11903
            }
11904
11905
            if (!\is_array($to)) {
11906
                $to = self::str_split($to);
11907
            }
11908 2
11909
            $count_from = \count($from);
11910
            $count_to = \count($to);
11911
11912 2
            if ($count_from !== $count_to) {
11913
                if ($count_from > $count_to) {
11914
                    $from = \array_slice($from, 0, $count_to);
11915
                } elseif ($count_from < $count_to) {
11916 2
                    $to = \array_slice($to, 0, $count_from);
11917 2
                }
11918 2
            }
11919
11920
            $from = \array_combine($from, $to);
11921 2
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11922 2
            if ($from === false) {
11923
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11924
            }
11925 2
        }
11926 2
11927
        if (\is_string($from)) {
11928 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11928
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11929 2
        }
11930 2
11931 2
        return \strtr($str, $from);
11932 2
    }
11933
11934
    /**
11935
     * Return the width of a string.
11936 2
     *
11937
     * INFO: use UTF8::strlen() for the byte-length
11938 2
     *
11939
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11940
     *
11941
     * @param string $str        <p>The input string.</p>
11942
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11943 2
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11944 2
     *
11945
     * @psalm-pure
11946
     *
11947 2
     * @return int
11948
     */
11949
    public static function strwidth(
11950
        string $str,
11951
        string $encoding = 'UTF-8',
11952
        bool $clean_utf8 = false
11953
    ): int {
11954
        if ($str === '') {
11955
            return 0;
11956
        }
11957
11958
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11959
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11960
        }
11961
11962
        if ($clean_utf8) {
11963
            // iconv and mbstring are not tolerant to invalid encoding
11964
            // further, their behaviour is inconsistent with that of PHP's substr
11965
            $str = self::clean($str);
11966
        }
11967
11968
        //
11969
        // fallback via mbstring
11970 2
        //
11971 2
11972
        if (self::$SUPPORT['mbstring'] === true) {
11973
            if ($encoding === 'UTF-8') {
11974 2
                return \mb_strwidth($str);
11975 2
            }
11976
11977
            return \mb_strwidth($str, $encoding);
11978 2
        }
11979
11980
        //
11981 2
        // fallback via vanilla php
11982
        //
11983
11984
        if ($encoding !== 'UTF-8') {
11985
            $str = self::encode('UTF-8', $str, false, $encoding);
11986
        }
11987
11988 2
        $wide = 0;
11989 2
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11990 2
11991
        return ($wide << 1) + (int) self::strlen($str);
11992
    }
11993
11994
    /**
11995
     * Get part of a string.
11996
     *
11997
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11998
     *
11999
     * @see http://php.net/manual/en/function.mb-substr.php
12000
     *
12001
     * @param string   $str        <p>The string being checked.</p>
12002
     * @param int      $offset     <p>The first position used in str.</p>
12003
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12004
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12005
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12006
     *
12007
     * @psalm-pure
12008
     *
12009
     * @return false|string
12010
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12011
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12012
     *                      characters long, <b>FALSE</b> will be returned.
12013
     */
12014
    public static function substr(
12015
        string $str,
12016
        int $offset = 0,
12017
        int $length = null,
12018
        string $encoding = 'UTF-8',
12019
        bool $clean_utf8 = false
12020
    ) {
12021
        // empty string
12022
        if ($str === '' || $length === 0) {
12023
            return '';
12024
        }
12025
12026
        if ($clean_utf8) {
12027
            // iconv and mbstring are not tolerant to invalid encoding
12028
            // further, their behaviour is inconsistent with that of PHP's substr
12029
            $str = self::clean($str);
12030
        }
12031
12032
        // whole string
12033
        if (!$offset && $length === null) {
12034
            return $str;
12035
        }
12036
12037
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12038 172
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12039 8
        }
12040
12041
        //
12042 168
        // fallback via mbstring
12043
        //
12044
12045 2
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12046
            if ($length === null) {
12047
                return \mb_substr($str, $offset);
12048
            }
12049 168
12050 7
            return \mb_substr($str, $offset, $length);
12051
        }
12052
12053 163
        //
12054 19
        // fallback for binary || ascii only
12055
        //
12056
12057
        if (
12058
            $encoding === 'CP850'
12059
            ||
12060
            $encoding === 'ASCII'
12061 163
        ) {
12062 161
            if ($length === null) {
12063 64
                return \substr($str, $offset);
12064
            }
12065
12066 102
            return \substr($str, $offset, $length);
12067
        }
12068
12069
        // otherwise we need the string-length
12070
        $str_length = 0;
12071
        if ($offset || $length === null) {
12072
            $str_length = self::strlen($str, $encoding);
12073
        }
12074 4
12075
        // e.g.: invalid chars + mbstring not installed
12076 4
        if ($str_length === false) {
12077
            return false;
12078
        }
12079
12080
        // empty string
12081
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12082
            return '';
12083
        }
12084
12085
        // impossible
12086 4
        if ($offset && $offset > $str_length) {
12087 4
            return '';
12088 4
        }
12089
12090
        $length = $length ?? (int) $str_length;
12091
12092 4
        if (
12093
            $encoding !== 'UTF-8'
12094
            &&
12095
            self::$SUPPORT['mbstring'] === false
12096
        ) {
12097 4
            /**
12098
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12099
             */
12100
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12101
        }
12102 4
12103
        //
12104
        // fallback via intl
12105
        //
12106 4
12107
        if (
12108
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12109 4
            &&
12110
            $offset >= 0 // grapheme_substr() can't handle negative offset
12111 4
            &&
12112
            self::$SUPPORT['intl'] === true
12113
        ) {
12114
            $return_tmp = \grapheme_substr($str, $offset, $length);
12115
            if ($return_tmp !== false) {
12116 2
                return $return_tmp;
12117
            }
12118
        }
12119
12120
        //
12121
        // fallback via iconv
12122
        //
12123
12124 4
        if (
12125
            $length >= 0 // "iconv_substr()" can't handle negative length
12126 4
            &&
12127
            self::$SUPPORT['iconv'] === true
12128 4
        ) {
12129
            $return_tmp = \iconv_substr($str, $offset, $length);
12130
            if ($return_tmp !== false) {
12131
                return $return_tmp;
12132
            }
12133
        }
12134
12135
        //
12136
        // fallback for ascii only
12137
        //
12138
12139
        if (ASCII::is_ascii($str)) {
12140
            return \substr($str, $offset, $length);
12141 4
        }
12142
12143 4
        //
12144
        // fallback via vanilla php
12145
        //
12146
12147
        // split to array, and remove invalid characters
12148
        $array = self::str_split($str);
12149
12150
        // extract relevant part, and join to make sting again
12151
        return \implode('', \array_slice($array, $offset, $length));
12152
    }
12153
12154
    /**
12155 4
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12156
     *
12157
     * EXAMPLE: <code>
12158
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12159
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12160
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12161
     * </code>
12162
     *
12163
     * @param string   $str1               <p>The main string being compared.</p>
12164 4
     * @param string   $str2               <p>The secondary string being compared.</p>
12165
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12166
     *                                     counting from the end of the string.</p>
12167 4
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12168
     *                                     of the length of the str compared to the length of main_str less the
12169
     *                                     offset.</p>
12170
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12171
     *                                     insensitive.</p>
12172
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12173
     *
12174
     * @psalm-pure
12175
     *
12176
     * @return int
12177
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12178
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12179
     *             <strong>0</strong> if they are equal
12180
     */
12181
    public static function substr_compare(
12182
        string $str1,
12183
        string $str2,
12184
        int $offset = 0,
12185
        int $length = null,
12186
        bool $case_insensitivity = false,
12187
        string $encoding = 'UTF-8'
12188
    ): int {
12189
        if (
12190
            $offset !== 0
12191
            ||
12192
            $length !== null
12193
        ) {
12194
            if ($encoding === 'UTF-8') {
12195
                if ($length === null) {
12196
                    $str1 = (string) \mb_substr($str1, $offset);
12197
                } else {
12198
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12199
                }
12200
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12201
            } else {
12202
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12203
12204
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12205
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12206 2
            }
12207
        }
12208 2
12209
        if ($case_insensitivity) {
12210 2
            return self::strcasecmp($str1, $str2, $encoding);
12211 2
        }
12212 2
12213
        return self::strcmp($str1, $str2);
12214 2
    }
12215
12216 2
    /**
12217
     * Count the number of substring occurrences.
12218
     *
12219
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12220
     *
12221
     * @see http://php.net/manual/en/function.substr-count.php
12222
     *
12223
     * @param string   $haystack   <p>The string to search in.</p>
12224
     * @param string   $needle     <p>The substring to search for.</p>
12225 2
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12226 2
     * @param int|null $length     [optional] <p>
12227
     *                             The maximum length after the specified offset to search for the
12228
     *                             substring. It outputs a warning if the offset plus the length is
12229 2
     *                             greater than the haystack length.
12230
     *                             </p>
12231
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12232
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12233
     *
12234
     * @psalm-pure
12235
     *
12236
     * @return false|int
12237
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12238
     */
12239
    public static function substr_count(
12240
        string $haystack,
12241
        string $needle,
12242
        int $offset = 0,
12243
        int $length = null,
12244
        string $encoding = 'UTF-8',
12245
        bool $clean_utf8 = false
12246
    ) {
12247
        if ($needle === '') {
12248
            return false;
12249
        }
12250
12251
        if ($haystack === '') {
12252
            if (\PHP_VERSION_ID >= 80000) {
12253
                return 0;
12254
            }
12255
12256
            return 0;
12257
        }
12258
12259
        if ($length === 0) {
12260
            return 0;
12261
        }
12262
12263 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12264 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12265
        }
12266
12267 5
        if ($clean_utf8) {
12268 2
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12269
            // if invalid characters are found in $haystack before $needle
12270
            $needle = self::clean($needle);
12271
            $haystack = self::clean($haystack);
12272 2
        }
12273
12274
        if ($offset || $length > 0) {
12275 5
            if ($length === null) {
12276 2
                $length_tmp = self::strlen($haystack, $encoding);
12277
                if ($length_tmp === false) {
12278
                    return false;
12279 5
                }
12280 2
                $length = (int) $length_tmp;
12281
            }
12282
12283 5
            if ($encoding === 'UTF-8') {
12284
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12285
            } else {
12286
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12287
            }
12288
        }
12289
12290 5
        if (
12291 2
            $encoding !== 'UTF-8'
12292 2
            &&
12293 2
            self::$SUPPORT['mbstring'] === false
12294
        ) {
12295
            /**
12296 2
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12297
             */
12298
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12299 2
        }
12300 2
12301
        if (self::$SUPPORT['mbstring'] === true) {
12302 2
            if ($encoding === 'UTF-8') {
12303
                return \mb_substr_count($haystack, $needle);
12304
            }
12305
12306
            return \mb_substr_count($haystack, $needle, $encoding);
12307 5
        }
12308
12309 5
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12310
12311
        return \count($matches);
12312
    }
12313
12314
    /**
12315
     * Count the number of substring occurrences.
12316
     *
12317 5
     * @param string   $haystack <p>
12318 5
     *                           The string being checked.
12319 5
     *                           </p>
12320
     * @param string   $needle   <p>
12321
     *                           The string being found.
12322 2
     *                           </p>
12323
     * @param int      $offset   [optional] <p>
12324
     *                           The offset where to start counting
12325
     *                           </p>
12326
     * @param int|null $length   [optional] <p>
12327
     *                           The maximum length after the specified offset to search for the
12328
     *                           substring. It outputs a warning if the offset plus the length is
12329
     *                           greater than the haystack length.
12330
     *                           </p>
12331
     *
12332
     * @psalm-pure
12333
     *
12334
     * @return false|int
12335
     *                   <p>The number of times the
12336
     *                   needle substring occurs in the
12337
     *                   haystack string.</p>
12338
     */
12339
    public static function substr_count_in_byte(
12340
        string $haystack,
12341
        string $needle,
12342
        int $offset = 0,
12343
        int $length = null
12344
    ) {
12345
        if ($haystack === '' || $needle === '') {
12346
            return 0;
12347
        }
12348
12349
        if (
12350
            ($offset || $length !== null)
12351
            &&
12352
            self::$SUPPORT['mbstring_func_overload'] === true
12353
        ) {
12354
            if ($length === null) {
12355
                $length_tmp = self::strlen($haystack);
12356
                if ($length_tmp === false) {
12357
                    return false;
12358
                }
12359
                $length = (int) $length_tmp;
12360
            }
12361 4
12362 1
            if (
12363
                (
12364
                    $length !== 0
12365
                    &&
12366 3
                    $offset !== 0
12367
                )
12368 3
                &&
12369
                ($length + $offset) <= 0
12370
                &&
12371
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12372
            ) {
12373
                return false;
12374
            }
12375
12376
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12377
            $haystack_tmp = \substr($haystack, $offset, $length);
12378
            if ($haystack_tmp === false) {
12379
                $haystack_tmp = '';
12380
            }
12381
            $haystack = (string) $haystack_tmp;
12382
        }
12383
12384
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12385
            // "mb_" is available if overload is used, so use it ...
12386
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12387
        }
12388
12389
        if ($length === null) {
12390
            return \substr_count($haystack, $needle, $offset);
12391
        }
12392
12393
        return \substr_count($haystack, $needle, $offset, $length);
12394
    }
12395
12396
    /**
12397
     * Returns the number of occurrences of $substring in the given string.
12398
     * By default, the comparison is case-sensitive, but can be made insensitive
12399
     * by setting $case_sensitive to false.
12400 3
     *
12401
     * @param string $str            <p>The input string.</p>
12402
     * @param string $substring      <p>The substring to search for.</p>
12403
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12404
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12405 3
     *
12406 3
     * @psalm-pure
12407
     *
12408
     * @return int
12409
     */
12410
    public static function substr_count_simple(
12411
        string $str,
12412
        string $substring,
12413
        bool $case_sensitive = true,
12414
        string $encoding = 'UTF-8'
12415
    ): int {
12416
        if ($str === '' || $substring === '') {
12417
            return 0;
12418
        }
12419
12420
        if ($encoding === 'UTF-8') {
12421
            if ($case_sensitive) {
12422
                return (int) \mb_substr_count($str, $substring);
12423
            }
12424
12425
            return (int) \mb_substr_count(
12426
                \mb_strtoupper($str),
12427
                \mb_strtoupper($substring)
12428
            );
12429
        }
12430
12431
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12432 15
12433 2
        if ($case_sensitive) {
12434
            return (int) \mb_substr_count($str, $substring, $encoding);
12435
        }
12436 13
12437 7
        return (int) \mb_substr_count(
12438
            self::strtocasefold($str, true, false, $encoding, null, false),
12439
            self::strtocasefold($substring, true, false, $encoding, null, false),
12440
            $encoding
12441 7
        );
12442 7
    }
12443 7
12444
    /**
12445
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12446
     *
12447 6
     * EXMAPLE: <code>
12448
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12449 6
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12450 3
     * </code>
12451
     *
12452
     * @param string $haystack <p>The string to search in.</p>
12453 3
     * @param string $needle   <p>The substring to search for.</p>
12454 3
     *
12455 3
     * @psalm-pure
12456 3
     *
12457
     * @return string
12458
     *                <p>Return the sub-string.</p>
12459
     */
12460
    public static function substr_ileft(string $haystack, string $needle): string
12461
    {
12462
        if ($haystack === '') {
12463
            return '';
12464
        }
12465
12466
        if ($needle === '') {
12467
            return $haystack;
12468
        }
12469
12470
        if (self::str_istarts_with($haystack, $needle)) {
12471
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12472
        }
12473
12474
        return $haystack;
12475
    }
12476
12477
    /**
12478 2
     * Get part of a string process in bytes.
12479 2
     *
12480
     * @param string   $str    <p>The string being checked.</p>
12481
     * @param int      $offset <p>The first position used in str.</p>
12482 2
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12483 2
     *
12484
     * @psalm-pure
12485
     *
12486 2
     * @return false|string
12487 2
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12488
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12489
     *                      characters long, <b>FALSE</b> will be returned.
12490 2
     */
12491
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12492
    {
12493
        // empty string
12494
        if ($str === '' || $length === 0) {
12495
            return '';
12496
        }
12497
12498
        // whole string
12499
        if (!$offset && $length === null) {
12500
            return $str;
12501
        }
12502
12503
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12504
            // "mb_" is available if overload is used, so use it ...
12505
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12506
        }
12507
12508
        return \substr($str, $offset, $length ?? 2147483647);
12509
    }
12510 1
12511
    /**
12512
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12513
     *
12514
     * EXAMPLE: <code>
12515 1
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12516
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12517
     * </code>
12518
     *
12519 1
     * @param string $haystack <p>The string to search in.</p>
12520
     * @param string $needle   <p>The substring to search for.</p>
12521
     *
12522
     * @psalm-pure
12523
     *
12524 1
     * @return string
12525
     *                <p>Return the sub-string.<p>
12526
     */
12527
    public static function substr_iright(string $haystack, string $needle): string
12528
    {
12529
        if ($haystack === '') {
12530
            return '';
12531
        }
12532
12533
        if ($needle === '') {
12534
            return $haystack;
12535
        }
12536
12537
        if (self::str_iends_with($haystack, $needle)) {
12538
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12539
        }
12540
12541
        return $haystack;
12542
    }
12543
12544
    /**
12545 2
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12546 2
     *
12547
     * EXAMPLE: <code>
12548
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12549 2
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12550 2
     * </code>
12551
     *
12552
     * @param string $haystack <p>The string to search in.</p>
12553 2
     * @param string $needle   <p>The substring to search for.</p>
12554 2
     *
12555
     * @psalm-pure
12556
     *
12557 2
     * @return string
12558
     *                <p>Return the sub-string.</p>
12559
     */
12560
    public static function substr_left(string $haystack, string $needle): string
12561
    {
12562
        if ($haystack === '') {
12563
            return '';
12564
        }
12565
12566
        if ($needle === '') {
12567
            return $haystack;
12568
        }
12569
12570
        if (self::str_starts_with($haystack, $needle)) {
12571
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12572
        }
12573
12574
        return $haystack;
12575
    }
12576
12577
    /**
12578 2
     * Replace text within a portion of a string.
12579 2
     *
12580
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12581
     *
12582 2
     * source: https://gist.github.com/stemar/8287074
12583 2
     *
12584
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12585
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12586 2
     * @param int|int[]       $offset      <p>
12587 2
     *                                     If start is positive, the replacing will begin at the start'th offset
12588
     *                                     into string.
12589
     *                                     <br><br>
12590 2
     *                                     If start is negative, the replacing will begin at the start'th character
12591
     *                                     from the end of string.
12592
     *                                     </p>
12593
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12594
     *                                     portion of string which is to be replaced. If it is negative, it
12595
     *                                     represents the number of characters from the end of string at which to
12596
     *                                     stop replacing. If it is not given, then it will default to strlen(
12597
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12598
     *                                     length is zero then this function will have the effect of inserting
12599
     *                                     replacement into string at the given start offset.</p>
12600
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12601
     *
12602
     * @psalm-pure
12603
     *
12604
     * @return string|string[]
12605
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12606
     */
12607
    public static function substr_replace(
12608
        $str,
12609
        $replacement,
12610
        $offset,
12611
        $length = null,
12612
        string $encoding = 'UTF-8'
12613
    ) {
12614
        if (\is_array($str)) {
12615
            $num = \count($str);
12616
12617
            // the replacement
12618
            if (\is_array($replacement)) {
12619
                $replacement = \array_slice($replacement, 0, $num);
12620
            } else {
12621
                $replacement = \array_pad([$replacement], $num, $replacement);
12622
            }
12623
12624
            // the offset
12625
            if (\is_array($offset)) {
12626
                $offset = \array_slice($offset, 0, $num);
12627
                foreach ($offset as &$value_tmp) {
12628
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12629
                }
12630 10
                unset($value_tmp);
12631 1
            } else {
12632
                $offset = \array_pad([$offset], $num, $offset);
12633
            }
12634 1
12635 1
            // the length
12636
            if ($length === null) {
12637 1
                $length = \array_fill(0, $num, 0);
12638
            } elseif (\is_array($length)) {
12639
                $length = \array_slice($length, 0, $num);
12640
                foreach ($length as &$value_tmp_V2) {
12641 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12642 1
                }
12643 1
                unset($value_tmp_V2);
12644 1
            } else {
12645
                $length = \array_pad([$length], $num, $length);
12646 1
            }
12647
12648 1
            // recursive call
12649
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12650
        }
12651
12652 1
        if (\is_array($replacement)) {
12653 1
            if ($replacement !== []) {
12654 1
                $replacement = $replacement[0];
12655 1
            } else {
12656 1
                $replacement = '';
12657 1
            }
12658
        }
12659 1
12660
        // init
12661 1
        $str = (string) $str;
12662
        $replacement = (string) $replacement;
12663
12664
        if (\is_array($length)) {
12665 1
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12666
        }
12667
12668 10
        if (\is_array($offset)) {
12669 1
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12670 1
        }
12671
12672 1
        if ($str === '') {
12673
            return $replacement;
12674
        }
12675
12676
        if (self::$SUPPORT['mbstring'] === true) {
12677 10
            $string_length = (int) self::strlen($str, $encoding);
12678 10
12679
            if ($offset < 0) {
12680 10
                $offset = (int) \max(0, $string_length + $offset);
12681
            } elseif ($offset > $string_length) {
12682
                $offset = $string_length;
12683
            }
12684 10
12685
            if ($length !== null && $length < 0) {
12686
                $length = (int) \max(0, $string_length - $offset + $length);
12687
            } elseif ($length === null || $length > $string_length) {
12688 10
                $length = $string_length;
12689 1
            }
12690
12691
            /** @noinspection AdditionOperationOnArraysInspection */
12692 9
            if (($offset + $length) > $string_length) {
12693 9
                $length = $string_length - $offset;
12694
            }
12695 9
12696 1
            /** @noinspection AdditionOperationOnArraysInspection */
12697 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12698 1
                   $replacement .
12699
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12700
        }
12701 9
12702 1
        //
12703 9
        // fallback for ascii only
12704 4
        //
12705
12706
        if (ASCII::is_ascii($str)) {
12707
            return ($length === null) ?
12708 9
                \substr_replace($str, $replacement, $offset) :
12709 4
                \substr_replace($str, $replacement, $offset, $length);
12710
        }
12711
12712
        //
12713 9
        // fallback via vanilla php
12714 9
        //
12715 9
12716
        \preg_match_all('/./us', $str, $str_matches);
12717
        \preg_match_all('/./us', $replacement, $replacement_matches);
12718
12719
        if ($length === null) {
12720
            $length_tmp = self::strlen($str, $encoding);
12721
            if ($length_tmp === false) {
12722
                // e.g.: non mbstring support + invalid chars
12723
                return '';
12724
            }
12725
            $length = (int) $length_tmp;
12726
        }
12727
12728
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12729
12730
        return \implode('', $str_matches[0]);
12731
    }
12732
12733
    /**
12734
     * Removes a suffix ($needle) from the end of the string ($haystack).
12735
     *
12736
     * EXAMPLE: <code>
12737
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12738
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12739
     * </code>
12740
     *
12741
     * @param string $haystack <p>The string to search in.</p>
12742
     * @param string $needle   <p>The substring to search for.</p>
12743
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12744
     *
12745
     * @psalm-pure
12746
     *
12747
     * @return string
12748
     *                <p>Return the sub-string.</p>
12749
     */
12750
    public static function substr_right(
12751
        string $haystack,
12752
        string $needle,
12753
        string $encoding = 'UTF-8'
12754
    ): string {
12755
        if ($haystack === '') {
12756
            return '';
12757
        }
12758
12759
        if ($needle === '') {
12760
            return $haystack;
12761
        }
12762
12763
        if (
12764
            $encoding === 'UTF-8'
12765
            &&
12766
            \substr($haystack, -\strlen($needle)) === $needle
12767
        ) {
12768
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12769
        }
12770
12771 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12772 2
            return (string) self::substr(
12773
                $haystack,
12774
                0,
12775 2
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12776 2
                $encoding
12777
            );
12778
        }
12779
12780 2
        return $haystack;
12781
    }
12782 2
12783
    /**
12784 2
     * Returns a case swapped version of the string.
12785
     *
12786
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12787 2
     *
12788
     * @param string $str        <p>The input string.</p>
12789
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12790
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12791
     *
12792
     * @psalm-pure
12793
     *
12794
     * @return string
12795
     *                <p>Each character's case swapped.</p>
12796 2
     */
12797
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12798
    {
12799
        if ($str === '') {
12800
            return '';
12801
        }
12802
12803
        if ($clean_utf8) {
12804
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12805
            // if invalid characters are found in $haystack before $needle
12806
            $str = self::clean($str);
12807
        }
12808
12809
        if ($encoding === 'UTF-8') {
12810
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12811
        }
12812
12813
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12814
    }
12815 6
12816 1
    /**
12817
     * Checks whether symfony-polyfills are used.
12818
     *
12819 6
     * @psalm-pure
12820
     *
12821
     * @return bool
12822 2
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12823
     *
12824
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12825 6
     */
12826 4
    public static function symfony_polyfill_used(): bool
12827
    {
12828
        // init
12829 4
        $return = false;
12830
12831
        $return_tmp = \extension_loaded('mbstring');
12832
        if (!$return_tmp && \function_exists('mb_strlen')) {
12833
            $return = true;
12834
        }
12835
12836
        $return_tmp = \extension_loaded('iconv');
12837
        if (!$return_tmp && \function_exists('iconv')) {
12838
            $return = true;
12839
        }
12840
12841
        return $return;
12842
    }
12843
12844
    /**
12845
     * @param string $str
12846
     * @param int    $tab_length
12847
     *
12848
     * @psalm-pure
12849
     *
12850
     * @return string
12851
     */
12852
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12853
    {
12854
        if ($tab_length === 4) {
12855
            $spaces = '    ';
12856
        } elseif ($tab_length === 2) {
12857
            $spaces = '  ';
12858
        } else {
12859
            $spaces = \str_repeat(' ', $tab_length);
12860
        }
12861
12862
        return \str_replace("\t", $spaces, $str);
12863
    }
12864
12865
    /**
12866
     * Converts the first character of each word in the string to uppercase
12867
     * and all other chars to lowercase.
12868
     *
12869
     * @param string      $str                           <p>The input string.</p>
12870 6
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12871 3
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12872 3
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12873 1
     *                                                   tr</p>
12874
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12875 2
     *                                                   -> ß</p>
12876
     *
12877
     * @psalm-pure
12878 6
     *
12879
     * @return string
12880
     *                <p>A string with all characters of $str being title-cased.</p>
12881
     */
12882
    public static function titlecase(
12883
        string $str,
12884
        string $encoding = 'UTF-8',
12885
        bool $clean_utf8 = false,
12886
        string $lang = null,
12887
        bool $try_to_keep_the_string_length = false
12888
    ): string {
12889
        if ($clean_utf8) {
12890
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12891
            // if invalid characters are found in $haystack before $needle
12892
            $str = self::clean($str);
12893
        }
12894
12895
        if (
12896
            $lang === null
12897
            &&
12898
            !$try_to_keep_the_string_length
12899
        ) {
12900
            if ($encoding === 'UTF-8') {
12901
                return \mb_convert_case($str, \MB_CASE_TITLE);
12902
            }
12903
12904
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12905 5
12906
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12907
        }
12908
12909
        return self::str_titleize(
12910
            $str,
12911
            null,
12912 5
            $encoding,
12913
            false,
12914 5
            $lang,
12915
            $try_to_keep_the_string_length,
12916 5
            false
12917 3
        );
12918
    }
12919
12920 2
    /**
12921
     * alias for "UTF8::to_ascii()"
12922 2
     *
12923
     * @param string $str
12924
     * @param string $subst_chr
12925
     * @param bool   $strict
12926
     *
12927
     * @psalm-pure
12928
     *
12929
     * @return string
12930
     *
12931
     * @see        UTF8::to_ascii()
12932
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12933
     */
12934
    public static function toAscii(
12935
        string $str,
12936
        string $subst_chr = '?',
12937
        bool $strict = false
12938
    ): string {
12939
        return self::to_ascii($str, $subst_chr, $strict);
12940
    }
12941
12942
    /**
12943
     * alias for "UTF8::to_iso8859()"
12944
     *
12945
     * @param string|string[] $str
12946
     *
12947
     * @psalm-pure
12948
     *
12949
     * @return string|string[]
12950
     *
12951
     * @see        UTF8::to_iso8859()
12952
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12953
     */
12954
    public static function toIso8859($str)
12955 7
    {
12956
        return self::to_iso8859($str);
12957
    }
12958
12959
    /**
12960
     * alias for "UTF8::to_latin1()"
12961
     *
12962
     * @param string|string[] $str
12963
     *
12964
     * @psalm-pure
12965
     *
12966
     * @return string|string[]
12967
     *
12968
     * @see        UTF8::to_iso8859()
12969
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12970
     */
12971
    public static function toLatin1($str)
12972 2
    {
12973
        return self::to_iso8859($str);
12974
    }
12975
12976
    /**
12977
     * alias for "UTF8::to_utf8()"
12978
     *
12979
     * @param string|string[] $str
12980
     *
12981
     * @psalm-pure
12982
     *
12983
     * @return string|string[]
12984
     *
12985
     * @see        UTF8::to_utf8()
12986
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12987
     */
12988
    public static function toUTF8($str)
12989 2
    {
12990
        return self::to_utf8($str);
12991
    }
12992
12993
    /**
12994
     * Convert a string into ASCII.
12995
     *
12996
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12997
     *
12998
     * @param string $str     <p>The input string.</p>
12999
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
13000
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
13001
     *                        performance</p>
13002
     *
13003
     * @psalm-pure
13004
     *
13005
     * @return string
13006 2
     */
13007
    public static function to_ascii(
13008
        string $str,
13009
        string $unknown = '?',
13010
        bool $strict = false
13011
    ): string {
13012
        return ASCII::to_transliterate($str, $unknown, $strict);
13013
    }
13014
13015
    /**
13016
     * @param bool|int|float|string $str
13017
     *
13018
     * @psalm-pure
13019
     *
13020
     * @return bool
13021
     */
13022
    public static function to_boolean($str): bool
13023
    {
13024
        // init
13025
        $str = (string) $str;
13026
13027
        if ($str === '') {
13028 37
            return false;
13029
        }
13030
13031
        // Info: http://php.net/manual/en/filter.filters.validate.php
13032
        $map = [
13033
            'true'  => true,
13034
            '1'     => true,
13035
            'on'    => true,
13036
            'yes'   => true,
13037
            'false' => false,
13038
            '0'     => false,
13039
            'off'   => false,
13040
            'no'    => false,
13041
        ];
13042
13043 19
        if (isset($map[$str])) {
13044
            return $map[$str];
13045 19
        }
13046 2
13047
        $key = \strtolower($str);
13048
        if (isset($map[$key])) {
13049
            return $map[$key];
13050
        }
13051 17
13052
        if (\is_numeric($str)) {
13053
            return ((float) $str + 0) > 0;
13054
        }
13055
13056
        return (bool) \trim($str);
13057
    }
13058
13059
    /**
13060
     * Convert given string to safe filename (and keep string case).
13061 17
     *
13062 11
     * @param string $str
13063
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13064
     *                                  simply replaced with hyphen.
13065 6
     * @param string $fallback_char
13066 6
     *
13067 2
     * @psalm-pure
13068
     *
13069
     * @return string
13070 4
     */
13071 2
    public static function to_filename(
13072
        string $str,
13073
        bool $use_transliterate = false,
13074 2
        string $fallback_char = '-'
13075
    ): string {
13076
        return ASCII::to_filename(
13077
            $str,
13078
            $use_transliterate,
13079
            $fallback_char
13080
        );
13081
    }
13082
13083
    /**
13084
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13085
     *
13086
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13087
     *
13088
     * @param string|string[] $str
13089
     *
13090
     * @psalm-pure
13091
     *
13092
     * @return string|string[]
13093
     */
13094 1
    public static function to_iso8859($str)
13095 1
    {
13096 1
        if (\is_array($str)) {
13097 1
            foreach ($str as $k => &$v) {
13098
                $v = self::to_iso8859($v);
13099
            }
13100
13101
            return $str;
13102
        }
13103
13104
        $str = (string) $str;
13105
        if ($str === '') {
13106
            return '';
13107
        }
13108
13109
        return self::utf8_decode($str);
13110
    }
13111
13112
    /**
13113
     * alias for "UTF8::to_iso8859()"
13114 8
     *
13115 2
     * @param string|string[] $str
13116 2
     *
13117
     * @psalm-pure
13118
     *
13119 2
     * @return string|string[]
13120
     *
13121
     * @see        UTF8::to_iso8859()
13122 8
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13123 8
     */
13124 2
    public static function to_latin1($str)
13125
    {
13126
        return self::to_iso8859($str);
13127 8
    }
13128
13129
    /**
13130
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13131
     *
13132
     * <ul>
13133
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13134
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13135
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13136
     * case.</li>
13137
     * </ul>
13138
     *
13139
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13140
     *
13141
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13142
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13143
     *
13144 2
     * @psalm-pure
13145
     *
13146
     * @return string|string[]
13147
     *                         <p>The UTF-8 encoded string</p>
13148
     *
13149
     * @template TToUtf8
13150
     * @phpstan-param TToUtf8 $str
13151
     * @phpstan-return TToUtf8
13152
     *
13153
     * @noinspection SuspiciousBinaryOperationInspection
13154
     */
13155
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13156
    {
13157
        if (\is_array($str)) {
13158
            foreach ($str as $k => &$v) {
13159
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13160
            }
13161
13162
            return $str;
13163
        }
13164
13165
        /** @phpstan-var TToUtf8 $str */
13166
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13167
13168
        return $str;
13169
    }
13170
13171
    /**
13172
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13173
     *
13174
     * <ul>
13175 44
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13176 4
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13177 4
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13178
     * case.</li>
13179
     * </ul>
13180 4
     *
13181
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13182
     *
13183
     * @param string $str                        <p>Any string.</p>
13184 44
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13185
     *
13186 44
     * @psalm-pure
13187
     *
13188
     * @return string
13189
     *                <p>The UTF-8 encoded string</p>
13190
     *
13191
     * @noinspection SuspiciousBinaryOperationInspection
13192
     */
13193
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13194
    {
13195
        if ($str === '') {
13196
            return $str;
13197
        }
13198
13199
        $max = \strlen($str);
13200
        $buf = '';
13201
13202
        for ($i = 0; $i < $max; ++$i) {
13203
            $c1 = $str[$i];
13204
13205
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13206
13207
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13208
13209
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13210
13211
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13212
                        $buf .= $c1 . $c2;
13213 44
                        ++$i;
13214 7
                    } else { // not valid UTF8 - convert it
13215
                        $buf .= self::to_utf8_convert_helper($c1);
13216
                    }
13217 44
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13218 44
13219
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13220 44
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13221 44
13222
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13223 44
                        $buf .= $c1 . $c2 . $c3;
13224
                        $i += 2;
13225 40
                    } else { // not valid UTF8 - convert it
13226
                        $buf .= self::to_utf8_convert_helper($c1);
13227 36
                    }
13228
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13229 36
13230 22
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13231 22
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13232
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13233 36
13234
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13235 37
                        $buf .= $c1 . $c2 . $c3 . $c4;
13236
                        $i += 3;
13237 36
                    } else { // not valid UTF8 - convert it
13238 36
                        $buf .= self::to_utf8_convert_helper($c1);
13239
                    }
13240 36
                } else { // doesn't look like UTF8, but should be converted
13241 17
13242 17
                    $buf .= self::to_utf8_convert_helper($c1);
13243
                }
13244 36
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13245
13246 28
                $buf .= self::to_utf8_convert_helper($c1);
13247
            } else { // it doesn't need conversion
13248 28
13249 28
                $buf .= $c1;
13250 28
            }
13251
        }
13252 28
13253 10
        // decode unicode escape sequences + unicode surrogate pairs
13254 10
        $buf = \preg_replace_callback(
13255
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13256 28
            /**
13257
             * @param array $matches
13258
             *
13259
             * @psalm-pure
13260 40
             *
13261
             * @return string
13262 41
             */
13263
            static function (array $matches): string {
13264 4
                if (isset($matches[3])) {
13265
                    $cp = (int) \hexdec($matches[3]);
13266
                } else {
13267 41
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13268
                    $cp = ((int) \hexdec($matches[1]) << 10)
13269
                          + (int) \hexdec($matches[2])
13270
                          + 0x10000
13271
                          - (0xD800 << 10)
13272 44
                          - 0xDC00;
13273 44
                }
13274
13275
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13276
                //
13277
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13278
13279
                if ($cp < 0x80) {
13280
                    return (string) self::chr($cp);
13281
                }
13282 13
13283 13
                if ($cp < 0xA0) {
13284
                    /** @noinspection UnnecessaryCastingInspection */
13285
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13286 1
                }
13287 1
13288 1
                return self::decimal_to_chr($cp);
13289 1
            },
13290 1
            $buf
13291
        );
13292
13293
        if ($buf === null) {
13294
            return '';
13295
        }
13296
13297 13
        // decode UTF-8 codepoints
13298 8
        if ($decode_html_entity_to_utf8) {
13299
            $buf = self::html_entity_decode($buf);
13300
        }
13301 10
13302
        return $buf;
13303
    }
13304
13305
    /**
13306 10
     * Returns the given string as an integer, or null if the string isn't numeric.
13307 44
     *
13308 44
     * @param string $str
13309
     *
13310
     * @psalm-pure
13311 44
     *
13312
     * @return int|null
13313
     *                  <p>null if the string isn't numeric</p>
13314
     */
13315
    public static function to_int(string $str)
13316 44
    {
13317 3
        if (\is_numeric($str)) {
13318
            return (int) $str;
13319
        }
13320 44
13321
        return null;
13322
    }
13323
13324
    /**
13325
     * Returns the given input as string, or null if the input isn't int|float|string
13326
     * and do not implement the "__toString()" method.
13327
     *
13328
     * @param float|int|object|string|null $input
13329
     *
13330
     * @psalm-pure
13331
     *
13332
     * @return string|null
13333
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13334
     */
13335 1
    public static function to_string($input)
13336 1
    {
13337
        if ($input === null) {
13338
            return null;
13339 1
        }
13340
13341
        /** @var string $input_type - hack for psalm */
13342
        $input_type = \gettype($input);
13343
13344
        if (
13345
            $input_type === 'string'
13346
            ||
13347
            $input_type === 'integer'
13348
            ||
13349
            $input_type === 'float'
13350
            ||
13351
            $input_type === 'double'
13352
        ) {
13353
            return (string) $input;
13354
        }
13355 1
13356
        if ($input_type === 'object') {
13357
            /** @noinspection PhpSillyAssignmentInspection */
13358
            /** @var object $input - hack for psalm / phpstan */
13359
            $input = $input;
13360 1
            /** @noinspection NestedPositiveIfStatementsInspection */
13361
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13362
            if (\method_exists($input, '__toString')) {
13363 1
                return (string) $input;
13364
            }
13365 1
        }
13366
13367 1
        return null;
13368
    }
13369 1
13370
    /**
13371 1
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13372
     *
13373
     * INFO: This is slower then "trim()"
13374 1
     *
13375
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13376
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13377 1
     *
13378
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13379
     *
13380 1
     * @param string      $str   <p>The string to be trimmed</p>
13381 1
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13382
     *
13383
     * @psalm-pure
13384
     *
13385 1
     * @return string
13386
     *                <p>The trimmed string.</p>
13387
     */
13388
    public static function trim(string $str = '', string $chars = null): string
13389
    {
13390
        if ($str === '') {
13391
            return '';
13392
        }
13393
13394
        if (self::$SUPPORT['mbstring'] === true) {
13395
            if ($chars !== null) {
13396
                /** @noinspection PregQuoteUsageInspection */
13397
                $chars = \preg_quote($chars);
13398
                $pattern = "^[${chars}]+|[${chars}]+\$";
13399
            } else {
13400
                $pattern = '^[\\s]+|[\\s]+$';
13401
            }
13402
13403
            /** @noinspection PhpComposerExtensionStubsInspection */
13404
            return (string) \mb_ereg_replace($pattern, '', $str);
13405
        }
13406
13407
        if ($chars !== null) {
13408 57
            $chars = \preg_quote($chars, '/');
13409 9
            $pattern = "^[${chars}]+|[${chars}]+\$";
13410
        } else {
13411
            $pattern = '^[\\s]+|[\\s]+$';
13412 50
        }
13413 50
13414
        return self::regex_replace($str, $pattern, '');
13415 28
    }
13416 28
13417
    /**
13418 22
     * Makes string's first char uppercase.
13419
     *
13420
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13421
     *
13422 50
     * @param string      $str                           <p>The input string.</p>
13423
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13424
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13425 8
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13426
     *                                                   tr</p>
13427
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13428
     *                                                   -> ß</p>
13429 8
     *
13430
     * @psalm-pure
13431
     *
13432 8
     * @return string
13433
     *                <p>The resulting string with with char uppercase.</p>
13434
     */
13435
    public static function ucfirst(
13436
        string $str,
13437
        string $encoding = 'UTF-8',
13438
        bool $clean_utf8 = false,
13439
        string $lang = null,
13440
        bool $try_to_keep_the_string_length = false
13441
    ): string {
13442
        if ($str === '') {
13443
            return '';
13444
        }
13445
13446
        if ($clean_utf8) {
13447
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13448
            // if invalid characters are found in $haystack before $needle
13449
            $str = self::clean($str);
13450
        }
13451
13452
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13453
13454
        if ($encoding === 'UTF-8') {
13455
            $str_part_two = (string) \mb_substr($str, 1);
13456
13457
            if ($use_mb_functions) {
13458
                $str_part_one = \mb_strtoupper(
13459
                    (string) \mb_substr($str, 0, 1)
13460 69
                );
13461 3
            } else {
13462
                $str_part_one = self::strtoupper(
13463
                    (string) \mb_substr($str, 0, 1),
13464 68
                    $encoding,
13465
                    false,
13466
                    $lang,
13467 1
                    $try_to_keep_the_string_length
13468
                );
13469
            }
13470 68
        } else {
13471
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13472 68
13473 22
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13474
13475 22
            if ($use_mb_functions) {
13476 22
                $str_part_one = \mb_strtoupper(
13477 22
                    (string) \mb_substr($str, 0, 1, $encoding),
13478
                    $encoding
13479
                );
13480
            } else {
13481
                $str_part_one = self::strtoupper(
13482
                    (string) self::substr($str, 0, 1, $encoding),
13483
                    $encoding,
13484
                    false,
13485 22
                    $lang,
13486
                    $try_to_keep_the_string_length
13487
                );
13488
            }
13489 47
        }
13490
13491 47
        return $str_part_one . $str_part_two;
13492
    }
13493 47
13494 47
    /**
13495 47
     * alias for "UTF8::ucfirst()"
13496 47
     *
13497
     * @param string $str
13498
     * @param string $encoding
13499
     * @param bool   $clean_utf8
13500
     *
13501
     * @psalm-pure
13502
     *
13503
     * @return string
13504
     *
13505
     * @see        UTF8::ucfirst()
13506
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13507
     */
13508
    public static function ucword(
13509 68
        string $str,
13510
        string $encoding = 'UTF-8',
13511
        bool $clean_utf8 = false
13512
    ): string {
13513
        return self::ucfirst($str, $encoding, $clean_utf8);
13514
    }
13515
13516
    /**
13517
     * Uppercase for all words in the string.
13518
     *
13519
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13520
     *
13521
     * @param string   $str        <p>The input string.</p>
13522
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13523
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13524
     *                             word.</p>
13525
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13526
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13527
     *
13528
     * @psalm-pure
13529
     *
13530
     * @return string
13531 1
     */
13532
    public static function ucwords(
13533
        string $str,
13534
        array $exceptions = [],
13535
        string $char_list = '',
13536
        string $encoding = 'UTF-8',
13537
        bool $clean_utf8 = false
13538
    ): string {
13539
        if (!$str) {
13540
            return '';
13541
        }
13542
13543
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13544
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13545
13546
        if ($clean_utf8) {
13547
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13548
            // if invalid characters are found in $haystack before $needle
13549
            $str = self::clean($str);
13550
        }
13551
13552
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13553
13554
        if (
13555
            $use_php_default_functions
13556
            &&
13557 8
            ASCII::is_ascii($str)
13558 2
        ) {
13559
            return \ucwords($str);
13560
        }
13561
13562
        $words = self::str_to_words($str, $char_list);
13563
        $use_exceptions = $exceptions !== [];
13564 7
13565
        $words_str = '';
13566
        foreach ($words as &$word) {
13567 1
            if (!$word) {
13568
                continue;
13569
            }
13570 7
13571
            if (
13572
                !$use_exceptions
13573 7
                ||
13574
                !\in_array($word, $exceptions, true)
13575 7
            ) {
13576
                $words_str .= self::ucfirst($word, $encoding);
13577
            } else {
13578
                $words_str .= $word;
13579
            }
13580 7
        }
13581 7
13582
        return $words_str;
13583 7
    }
13584 7
13585 7
    /**
13586 7
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13587
     *
13588
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13589
     *
13590 7
     * e.g:
13591
     * 'test+test'                     => 'test test'
13592 7
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13593
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13594 7
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13595
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13596 7
     * 'Düsseldorf'                   => 'Düsseldorf'
13597
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13598
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13599
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13600 7
     *
13601
     * @param string $str          <p>The input string.</p>
13602
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13603
     *
13604
     * @psalm-pure
13605
     *
13606
     * @return string
13607
     */
13608
    public static function urldecode(string $str, bool $multi_decode = true): string
13609
    {
13610
        if ($str === '') {
13611
            return '';
13612
        }
13613
13614
        $str = self::urldecode_unicode_helper($str);
13615
13616
        if ($multi_decode) {
13617
            do {
13618
                $str_compare = $str;
13619
13620
                /**
13621
                 * @psalm-suppress PossiblyInvalidArgument
13622
                 */
13623
                $str = \urldecode(
13624
                    self::html_entity_decode(
13625
                        self::to_utf8($str),
13626
                        \ENT_QUOTES | \ENT_HTML5
13627
                    )
13628 4
                );
13629 3
            } while ($str_compare !== $str);
13630
        } else {
13631
            /**
13632
             * @psalm-suppress PossiblyInvalidArgument
13633 4
             */
13634
            $str = \urldecode(
13635 4
                self::html_entity_decode(
13636
                    self::to_utf8($str),
13637 4
                    \ENT_QUOTES | \ENT_HTML5
13638
                )
13639 4
            );
13640
        }
13641 3
13642
        return self::fix_simple_utf8($str);
13643
    }
13644 4
13645
    /**
13646 4
     * Return a array with "urlencoded"-win1252 -> UTF-8
13647
     *
13648 3
     * @psalm-pure
13649
     *
13650
     * @return string[]
13651
     *
13652
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13653 3
     */
13654 3
    public static function urldecode_fix_win1252_chars(): array
13655 3
    {
13656 3
        return [
13657 3
            '%20' => ' ',
13658
            '%21' => '!',
13659
            '%22' => '"',
13660
            '%23' => '#',
13661 3
            '%24' => '$',
13662
            '%25' => '%',
13663
            '%26' => '&',
13664
            '%27' => "'",
13665
            '%28' => '(',
13666 1
            '%29' => ')',
13667 1
            '%2A' => '*',
13668 1
            '%2B' => '+',
13669 1
            '%2C' => ',',
13670 1
            '%2D' => '-',
13671
            '%2E' => '.',
13672
            '%2F' => '/',
13673
            '%30' => '0',
13674
            '%31' => '1',
13675
            '%32' => '2',
13676 4
            '%33' => '3',
13677
            '%34' => '4',
13678
            '%35' => '5',
13679
            '%36' => '6',
13680
            '%37' => '7',
13681
            '%38' => '8',
13682
            '%39' => '9',
13683
            '%3A' => ':',
13684
            '%3B' => ';',
13685
            '%3C' => '<',
13686
            '%3D' => '=',
13687
            '%3E' => '>',
13688
            '%3F' => '?',
13689
            '%40' => '@',
13690
            '%41' => 'A',
13691 2
            '%42' => 'B',
13692
            '%43' => 'C',
13693
            '%44' => 'D',
13694
            '%45' => 'E',
13695
            '%46' => 'F',
13696
            '%47' => 'G',
13697
            '%48' => 'H',
13698
            '%49' => 'I',
13699
            '%4A' => 'J',
13700
            '%4B' => 'K',
13701
            '%4C' => 'L',
13702
            '%4D' => 'M',
13703
            '%4E' => 'N',
13704
            '%4F' => 'O',
13705
            '%50' => 'P',
13706
            '%51' => 'Q',
13707
            '%52' => 'R',
13708
            '%53' => 'S',
13709
            '%54' => 'T',
13710
            '%55' => 'U',
13711
            '%56' => 'V',
13712
            '%57' => 'W',
13713
            '%58' => 'X',
13714
            '%59' => 'Y',
13715
            '%5A' => 'Z',
13716
            '%5B' => '[',
13717
            '%5C' => '\\',
13718
            '%5D' => ']',
13719
            '%5E' => '^',
13720
            '%5F' => '_',
13721
            '%60' => '`',
13722
            '%61' => 'a',
13723
            '%62' => 'b',
13724
            '%63' => 'c',
13725
            '%64' => 'd',
13726
            '%65' => 'e',
13727
            '%66' => 'f',
13728
            '%67' => 'g',
13729
            '%68' => 'h',
13730
            '%69' => 'i',
13731
            '%6A' => 'j',
13732
            '%6B' => 'k',
13733
            '%6C' => 'l',
13734
            '%6D' => 'm',
13735
            '%6E' => 'n',
13736
            '%6F' => 'o',
13737
            '%70' => 'p',
13738
            '%71' => 'q',
13739
            '%72' => 'r',
13740
            '%73' => 's',
13741
            '%74' => 't',
13742
            '%75' => 'u',
13743
            '%76' => 'v',
13744
            '%77' => 'w',
13745
            '%78' => 'x',
13746
            '%79' => 'y',
13747
            '%7A' => 'z',
13748
            '%7B' => '{',
13749
            '%7C' => '|',
13750
            '%7D' => '}',
13751
            '%7E' => '~',
13752
            '%7F' => '',
13753
            '%80' => '`',
13754
            '%81' => '',
13755
            '%82' => '‚',
13756
            '%83' => 'ƒ',
13757
            '%84' => '„',
13758
            '%85' => '…',
13759
            '%86' => '†',
13760
            '%87' => '‡',
13761
            '%88' => 'ˆ',
13762
            '%89' => '‰',
13763
            '%8A' => 'Š',
13764
            '%8B' => '‹',
13765
            '%8C' => 'Œ',
13766
            '%8D' => '',
13767
            '%8E' => 'Ž',
13768
            '%8F' => '',
13769
            '%90' => '',
13770
            '%91' => '‘',
13771
            '%92' => '’',
13772
            '%93' => '“',
13773
            '%94' => '”',
13774
            '%95' => '•',
13775
            '%96' => '–',
13776
            '%97' => '—',
13777
            '%98' => '˜',
13778
            '%99' => '™',
13779
            '%9A' => 'š',
13780
            '%9B' => '›',
13781
            '%9C' => 'œ',
13782
            '%9D' => '',
13783
            '%9E' => 'ž',
13784
            '%9F' => 'Ÿ',
13785
            '%A0' => '',
13786
            '%A1' => '¡',
13787
            '%A2' => '¢',
13788
            '%A3' => '£',
13789
            '%A4' => '¤',
13790
            '%A5' => '¥',
13791
            '%A6' => '¦',
13792
            '%A7' => '§',
13793
            '%A8' => '¨',
13794
            '%A9' => '©',
13795
            '%AA' => 'ª',
13796
            '%AB' => '«',
13797
            '%AC' => '¬',
13798
            '%AD' => '',
13799
            '%AE' => '®',
13800
            '%AF' => '¯',
13801
            '%B0' => '°',
13802
            '%B1' => '±',
13803
            '%B2' => '²',
13804
            '%B3' => '³',
13805
            '%B4' => '´',
13806
            '%B5' => 'µ',
13807
            '%B6' => '¶',
13808
            '%B7' => '·',
13809
            '%B8' => '¸',
13810
            '%B9' => '¹',
13811
            '%BA' => 'º',
13812
            '%BB' => '»',
13813
            '%BC' => '¼',
13814
            '%BD' => '½',
13815
            '%BE' => '¾',
13816
            '%BF' => '¿',
13817
            '%C0' => 'À',
13818
            '%C1' => 'Á',
13819
            '%C2' => 'Â',
13820
            '%C3' => 'Ã',
13821
            '%C4' => 'Ä',
13822
            '%C5' => 'Å',
13823
            '%C6' => 'Æ',
13824
            '%C7' => 'Ç',
13825
            '%C8' => 'È',
13826
            '%C9' => 'É',
13827
            '%CA' => 'Ê',
13828
            '%CB' => 'Ë',
13829
            '%CC' => 'Ì',
13830
            '%CD' => 'Í',
13831
            '%CE' => 'Î',
13832
            '%CF' => 'Ï',
13833
            '%D0' => 'Ð',
13834
            '%D1' => 'Ñ',
13835
            '%D2' => 'Ò',
13836
            '%D3' => 'Ó',
13837
            '%D4' => 'Ô',
13838
            '%D5' => 'Õ',
13839
            '%D6' => 'Ö',
13840
            '%D7' => '×',
13841
            '%D8' => 'Ø',
13842
            '%D9' => 'Ù',
13843
            '%DA' => 'Ú',
13844
            '%DB' => 'Û',
13845
            '%DC' => 'Ü',
13846
            '%DD' => 'Ý',
13847
            '%DE' => 'Þ',
13848
            '%DF' => 'ß',
13849
            '%E0' => 'à',
13850
            '%E1' => 'á',
13851
            '%E2' => 'â',
13852
            '%E3' => 'ã',
13853
            '%E4' => 'ä',
13854
            '%E5' => 'å',
13855
            '%E6' => 'æ',
13856
            '%E7' => 'ç',
13857
            '%E8' => 'è',
13858
            '%E9' => 'é',
13859
            '%EA' => 'ê',
13860
            '%EB' => 'ë',
13861
            '%EC' => 'ì',
13862
            '%ED' => 'í',
13863
            '%EE' => 'î',
13864
            '%EF' => 'ï',
13865
            '%F0' => 'ð',
13866
            '%F1' => 'ñ',
13867
            '%F2' => 'ò',
13868
            '%F3' => 'ó',
13869
            '%F4' => 'ô',
13870
            '%F5' => 'õ',
13871
            '%F6' => 'ö',
13872
            '%F7' => '÷',
13873
            '%F8' => 'ø',
13874
            '%F9' => 'ù',
13875
            '%FA' => 'ú',
13876
            '%FB' => 'û',
13877
            '%FC' => 'ü',
13878
            '%FD' => 'ý',
13879
            '%FE' => 'þ',
13880
            '%FF' => 'ÿ',
13881
        ];
13882
    }
13883
13884
    /**
13885
     * Decodes a UTF-8 string to ISO-8859-1.
13886
     *
13887
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13888
     *
13889
     * @param string $str             <p>The input string.</p>
13890
     * @param bool   $keep_utf8_chars
13891
     *
13892
     * @psalm-pure
13893
     *
13894
     * @return string
13895
     *
13896
     * @noinspection SuspiciousBinaryOperationInspection
13897
     */
13898
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13899
    {
13900
        if ($str === '') {
13901
            return '';
13902
        }
13903
13904
        // save for later comparision
13905
        $str_backup = $str;
13906
        $len = \strlen($str);
13907
13908
        if (self::$ORD === null) {
13909
            self::$ORD = self::getData('ord');
13910
        }
13911
13912
        if (self::$CHR === null) {
13913
            self::$CHR = self::getData('chr');
13914
        }
13915
13916
        $no_char_found = '?';
13917
        /** @noinspection ForeachInvariantsInspection */
13918
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13919
            switch ($str[$i] & "\xF0") {
13920
                case "\xC0":
13921
                case "\xD0":
13922
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13923
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13924
13925
                    break;
13926
13927
                /** @noinspection PhpMissingBreakStatementInspection */
13928
                case "\xF0":
13929
                    ++$i;
13930
13931
                // no break
13932
13933
                case "\xE0":
13934 14
                    $str[$j] = $no_char_found;
13935 6
                    $i += 2;
13936
13937
                    break;
13938
13939 14
                default:
13940 14
                    $str[$j] = $str[$i];
13941
            }
13942 14
        }
13943
13944
        /** @var false|string $return - needed for PhpStan (stubs error) */
13945
        $return = \substr($str, 0, $j);
13946 14
        if ($return === false) {
13947
            $return = '';
13948
        }
13949
13950 14
        if (
13951
            $keep_utf8_chars
13952 14
            &&
13953 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13954 14
        ) {
13955 13
            return $str_backup;
13956 13
        }
13957 13
13958
        return $return;
13959 13
    }
13960
13961
    /**
13962 13
     * Encodes an ISO-8859-1 string to UTF-8.
13963
     *
13964
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13965
     *
13966
     * @param string $str <p>The input string.</p>
13967 13
     *
13968 11
     * @psalm-pure
13969 11
     *
13970
     * @return string
13971 11
     */
13972
    public static function utf8_encode(string $str): string
13973
    {
13974 12
        if ($str === '') {
13975
            return '';
13976
        }
13977
13978
        /** @var false|string $str - the polyfill maybe return false */
13979 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13979
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13980 14
13981
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13982
        /** @psalm-suppress TypeDoesNotContainType */
13983
        if ($str === false) {
13984
            return '';
13985 14
        }
13986
13987 14
        return $str;
13988
    }
13989 2
13990
    /**
13991
     * fix -> utf8-win1252 chars
13992 14
     *
13993
     * @param string $str <p>The input string.</p>
13994
     *
13995
     * @psalm-pure
13996
     *
13997
     * @return string
13998
     *
13999
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
14000
     */
14001
    public static function utf8_fix_win1252_chars(string $str): string
14002
    {
14003
        return self::fix_simple_utf8($str);
14004
    }
14005
14006
    /**
14007
     * Returns an array with all utf8 whitespace characters.
14008 16
     *
14009 14
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14010
     *
14011
     * @psalm-pure
14012
     *
14013 16
     * @return string[]
14014
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14015
     *                  as defined in above URL
14016
     */
14017 16
    public static function whitespace_table(): array
14018
    {
14019
        return self::$WHITESPACE_TABLE;
14020
    }
14021 16
14022
    /**
14023
     * Limit the number of words in a string.
14024
     *
14025
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14026
     *
14027
     * @param string $str        <p>The input string.</p>
14028
     * @param int    $limit      <p>The limit of words as integer.</p>
14029
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14030
     *
14031
     * @psalm-pure
14032
     *
14033
     * @return string
14034
     */
14035
    public static function words_limit(
14036
        string $str,
14037 2
        int $limit = 100,
14038
        string $str_add_on = '…'
14039
    ): string {
14040
        if ($str === '' || $limit < 1) {
14041
            return '';
14042
        }
14043
14044
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14045
14046
        if (
14047
            !isset($matches[0])
14048
            ||
14049
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14050
        ) {
14051
            return $str;
14052
        }
14053 2
14054
        return \rtrim($matches[0]) . $str_add_on;
14055
    }
14056
14057
    /**
14058
     * Wraps a string to a given number of characters
14059
     *
14060
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14061
     *
14062
     * @see http://php.net/manual/en/function.wordwrap.php
14063
     *
14064
     * @param string $str   <p>The input string.</p>
14065
     * @param int    $width [optional] <p>The column width.</p>
14066
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14067
     * @param bool   $cut   [optional] <p>
14068
     *                      If the cut is set to true, the string is
14069
     *                      always wrapped at or before the specified width. So if you have
14070
     *                      a word that is larger than the given width, it is broken apart.
14071
     *                      </p>
14072
     *
14073
     * @psalm-pure
14074 2
     *
14075 2
     * @return string
14076
     *                <p>The given string wrapped at the specified column.</p>
14077
     */
14078 2
    public static function wordwrap(
14079
        string $str,
14080
        int $width = 75,
14081 2
        string $break = "\n",
14082
        bool $cut = false
14083 2
    ): string {
14084
        if ($str === '' || $break === '') {
14085 2
            return '';
14086
        }
14087
14088 2
        $str_split = \explode($break, $str);
14089
        if ($str_split === false) {
14090
            return '';
14091
        }
14092
14093
        /** @var string[] $charsArray */
14094
        $charsArray = [];
14095
        $word_split = '';
14096
        foreach ($str_split as $i => $i_value) {
14097
            if ($i) {
14098
                $charsArray[] = $break;
14099
                $word_split .= '#';
14100
            }
14101
14102
            foreach (self::str_split($i_value) as $c) {
14103
                $charsArray[] = $c;
14104
                if ($c === ' ') {
14105
                    $word_split .= ' ';
14106
                } else {
14107
                    $word_split .= '?';
14108
                }
14109
            }
14110
        }
14111
14112
        $str_return = '';
14113
        $j = 0;
14114
        $b = -1;
14115
        $i = -1;
14116
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14117
14118 12
        $max = \mb_strlen($word_split);
14119 4
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14120
            for (++$i; $i < $b; ++$i) {
14121
                if (isset($charsArray[$j])) {
14122 10
                    $str_return .= $charsArray[$j];
14123 10
                    unset($charsArray[$j]);
14124
                }
14125
                ++$j;
14126
14127
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14128 10
                if ($i > $max) {
14129 10
                    break 2;
14130 10
                }
14131 10
            }
14132 3
14133 3
            if (
14134
                $break === $charsArray[$j]
14135
                ||
14136 10
                $charsArray[$j] === ' '
14137 10
            ) {
14138 10
                unset($charsArray[$j++]);
14139 3
            }
14140
14141 10
            $str_return .= $break;
14142
14143
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14144
            if ($b > $max) {
14145
                break;
14146 10
            }
14147 10
        }
14148 10
14149 10
        return $str_return . \implode('', $charsArray);
14150 10
    }
14151
14152 10
    /**
14153 10
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14154 8
     *    ... so that we wrap the per line.
14155 8
     *
14156 8
     * @param string      $str             <p>The input string.</p>
14157 8
     * @param int         $width           [optional] <p>The column width.</p>
14158
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14159 8
     * @param bool        $cut             [optional] <p>
14160
     *                                     If the cut is set to true, the string is
14161
     *                                     always wrapped at or before the specified width. So if you have
14162 8
     *                                     a word that is larger than the given width, it is broken apart.
14163
     *                                     </p>
14164
     * @param bool        $add_final_break [optional] <p>
14165
     *                                     If this flag is true, then the method will add a $break at the end
14166
     *                                     of the result string.
14167
     *                                     </p>
14168 8
     * @param string|null $delimiter       [optional] <p>
14169
     *                                     You can change the default behavior, where we split the string by newline.
14170 8
     *                                     </p>
14171
     *
14172 5
     * @psalm-pure
14173
     *
14174
     * @return string
14175 8
     */
14176
    public static function wordwrap_per_line(
14177
        string $str,
14178 8
        int $width = 75,
14179
        string $break = "\n",
14180
        bool $cut = false,
14181
        bool $add_final_break = true,
14182
        string $delimiter = null
14183 10
    ): string {
14184
        if ($delimiter === null) {
14185
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14186
        } else {
14187
            $strings = \explode($delimiter, $str);
14188
        }
14189
14190
        $string_helper_array = [];
14191
        if ($strings !== false) {
14192
            foreach ($strings as $value) {
14193
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14194
            }
14195
        }
14196
14197
        if ($add_final_break) {
14198
            $final_break = $break;
14199
        } else {
14200
            $final_break = '';
14201
        }
14202
14203
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14204
    }
14205
14206
    /**
14207
     * Returns an array of Unicode White Space characters.
14208
     *
14209
     * @psalm-pure
14210
     *
14211
     * @return string[]
14212
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14213
     */
14214
    public static function ws(): array
14215
    {
14216
        return self::$WHITESPACE;
14217
    }
14218 1
14219 1
    /**
14220
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14221 1
     *
14222
     * EXAMPLE: <code>
14223
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14224 1
     * //
14225 1
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14226 1
     * </code>
14227 1
     *
14228
     * @see          http://hsivonen.iki.fi/php-utf8/
14229
     *
14230
     * @param string $str    <p>The string to be checked.</p>
14231 1
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14232 1
     *
14233
     * @psalm-pure
14234 1
     *
14235
     * @return bool
14236
     *
14237 1
     * @noinspection ReturnTypeCanBeDeclaredInspection
14238
     */
14239
    private static function is_utf8_string(string $str, bool $strict = false)
14240
    {
14241
        if ($str === '') {
14242
            return true;
14243
        }
14244
14245
        if ($strict) {
14246
            $is_binary = self::is_binary($str, true);
14247
14248
            if ($is_binary && self::is_utf16($str, false) !== false) {
14249
                return false;
14250 2
            }
14251
14252
            if ($is_binary && self::is_utf32($str, false) !== false) {
14253
                return false;
14254
            }
14255
        }
14256
14257
        if (self::$SUPPORT['pcre_utf8']) {
14258
            // If even just the first character can be matched, when the /u
14259
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14260
            // invalid, nothing at all will match, even if the string contains
14261
            // some valid sequences
14262
            return \preg_match('/^./us', $str) === 1;
14263
        }
14264
14265
        $mState = 0; // cached expected number of octets after the current octet
14266
        // until the beginning of the next UTF8 character sequence
14267
        $mUcs4 = 0; // cached Unicode character
14268
        $mBytes = 1; // cached expected number of octets in the current sequence
14269
14270
        if (self::$ORD === null) {
14271
            self::$ORD = self::getData('ord');
14272
        }
14273
14274
        $len = \strlen($str);
14275 110
        /** @noinspection ForeachInvariantsInspection */
14276 15
        for ($i = 0; $i < $len; ++$i) {
14277
            $in = self::$ORD[$str[$i]];
14278
14279 103
            if ($mState === 0) {
14280 2
                // When mState is zero we expect either a US-ASCII character or a
14281
                // multi-octet sequence.
14282 2
                if ((0x80 & $in) === 0) {
14283 2
                    // US-ASCII, pass straight through.
14284
                    $mBytes = 1;
14285
                } elseif ((0xE0 & $in) === 0xC0) {
14286
                    // First octet of 2 octet sequence.
14287
                    $mUcs4 = $in;
14288
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14289
                    $mState = 1;
14290
                    $mBytes = 2;
14291 103
                } elseif ((0xF0 & $in) === 0xE0) {
14292
                    // First octet of 3 octet sequence.
14293
                    $mUcs4 = $in;
14294
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14295
                    $mState = 2;
14296 103
                    $mBytes = 3;
14297
                } elseif ((0xF8 & $in) === 0xF0) {
14298
                    // First octet of 4 octet sequence.
14299 2
                    $mUcs4 = $in;
14300
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14301 2
                    $mState = 3;
14302 2
                    $mBytes = 4;
14303
                } elseif ((0xFC & $in) === 0xF8) {
14304 2
                    /* First octet of 5 octet sequence.
14305
                     *
14306
                     * This is illegal because the encoded codepoint must be either
14307
                     * (a) not the shortest form or
14308 2
                     * (b) outside the Unicode range of 0-0x10FFFF.
14309
                     * Rather than trying to resynchronize, we will carry on until the end
14310 2
                     * of the sequence and let the later error handling code catch it.
14311 2
                     */
14312
                    $mUcs4 = $in;
14313 2
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14314
                    $mState = 4;
14315
                    $mBytes = 5;
14316 2
                } elseif ((0xFE & $in) === 0xFC) {
14317
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14318 2
                    $mUcs4 = $in;
14319 2
                    $mUcs4 = ($mUcs4 & 1) << 30;
14320
                    $mState = 5;
14321 2
                    $mBytes = 6;
14322 2
                } else {
14323 2
                    // Current octet is neither in the US-ASCII range nor a legal first
14324 2
                    // octet of a multi-octet sequence.
14325 2
                    return false;
14326
                }
14327 2
            } elseif ((0xC0 & $in) === 0x80) {
14328 2
14329 2
                // When mState is non-zero, we expect a continuation of the multi-octet
14330 2
                // sequence
14331
14332
                // Legal continuation.
14333
                $shift = ($mState - 1) * 6;
14334
                $tmp = $in;
14335
                $tmp = ($tmp & 0x0000003F) << $shift;
14336
                $mUcs4 |= $tmp;
14337
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14338
                // Unicode code point to be output.
14339
                if (--$mState === 0) {
14340
                    // Check for illegal sequences and code points.
14341
                    //
14342
                    // From Unicode 3.1, non-shortest form is illegal
14343
                    if (
14344
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14345
                        ||
14346
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14347
                        ||
14348
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14349
                        ||
14350
                        ($mBytes > 4)
14351
                        ||
14352
                        // From Unicode 3.2, surrogate characters are illegal.
14353
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14354
                        ||
14355
                        // Code points outside the Unicode range are illegal.
14356
                        ($mUcs4 > 0x10FFFF)
14357
                    ) {
14358
                        return false;
14359 2
                    }
14360
                    // initialize UTF8 cache
14361 2
                    $mState = 0;
14362
                    $mUcs4 = 0;
14363
                    $mBytes = 1;
14364
                }
14365
            } else {
14366
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14367 2
                // Incomplete multi-octet sequence.
14368 2
                return false;
14369 2
            }
14370 2
        }
14371
14372
        return $mState === 0;
14373 2
    }
14374
14375
    /**
14376
     * @param string $str
14377
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14378 2
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14379
     *
14380 2
     * @psalm-pure
14381
     *
14382 2
     * @return string
14383
     *
14384 2
     * @noinspection ReturnTypeCanBeDeclaredInspection
14385
     */
14386
    private static function fixStrCaseHelper(
14387 2
        string $str,
14388
        bool $use_lowercase = false,
14389
        bool $use_full_case_fold = false
14390 2
    ) {
14391
        $upper = self::$COMMON_CASE_FOLD['upper'];
14392
        $lower = self::$COMMON_CASE_FOLD['lower'];
14393
14394
        if ($use_lowercase) {
14395 2
            $str = \str_replace(
14396 2
                $upper,
14397 2
                $lower,
14398
                $str
14399
            );
14400
        } else {
14401
            $str = \str_replace(
14402
                $lower,
14403
                $upper,
14404
                $str
14405
            );
14406 2
        }
14407
14408
        if ($use_full_case_fold) {
14409
            /**
14410
             * @psalm-suppress ImpureStaticVariable
14411
             *
14412
             * @var array<mixed>|null
14413
             */
14414
            static $FULL_CASE_FOLD = null;
14415
            if ($FULL_CASE_FOLD === null) {
14416
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14417
            }
14418
14419
            if ($use_lowercase) {
14420
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14421
            } else {
14422
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14423
            }
14424
        }
14425 33
14426 33
        return $str;
14427
    }
14428 33
14429 2
    /**
14430 2
     * get data from "/data/*.php"
14431 2
     *
14432 2
     * @param string $file
14433
     *
14434
     * @psalm-pure
14435 31
     *
14436 31
     * @return array
14437 31
     *
14438 31
     * @noinspection ReturnTypeCanBeDeclaredInspection
14439
     */
14440
    private static function getData(string $file)
14441
    {
14442 33
        /** @noinspection PhpIncludeInspection */
14443
        /** @noinspection UsingInclusionReturnValueInspection */
14444
        /** @psalm-suppress UnresolvableInclude */
14445
        return include __DIR__ . '/data/' . $file . '.php';
14446
    }
14447
14448 31
    /**
14449 31
     * @psalm-pure
14450 1
     *
14451
     * @return true|null
14452
     */
14453 31
    private static function initEmojiData()
14454 2
    {
14455
        if (self::$EMOJI_KEYS_CACHE === null) {
14456 29
            if (self::$EMOJI === null) {
14457
                self::$EMOJI = self::getData('emoji');
14458
            }
14459
14460 33
            /**
14461
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14462
             */
14463
            \uksort(
14464
                self::$EMOJI,
14465
                static function (string $a, string $b): int {
14466
                    return \strlen($b) <=> \strlen($a);
14467
                }
14468
            );
14469
14470
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14471
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14472
14473
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14474
                $tmp_key = \crc32($key);
14475
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14476
            }
14477
14478
            return true;
14479 6
        }
14480
14481
        return null;
14482
    }
14483
14484
    /**
14485
     * Checks whether mbstring "overloaded" is active on the server.
14486
     *
14487
     * @psalm-pure
14488
     *
14489 12
     * @return bool
14490 1
     *
14491 1
     * @noinspection ReturnTypeCanBeDeclaredInspection
14492
     */
14493
    private static function mbstring_overloaded()
14494
    {
14495
        /**
14496
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14497 1
         */
14498 1
14499
        /** @noinspection PhpComposerExtensionStubsInspection */
14500 1
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14501 1
        return \defined('MB_OVERLOAD_STRING')
14502
               &&
14503
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14504 1
    }
14505 1
14506
    /**
14507 1
     * @param array    $strings
14508 1
     * @param bool     $remove_empty_values
14509 1
     * @param int|null $remove_short_values
14510
     *
14511
     * @psalm-pure
14512 1
     *
14513
     * @return array
14514
     *
14515 12
     * @noinspection ReturnTypeCanBeDeclaredInspection
14516
     */
14517
    private static function reduce_string_array(
14518
        array $strings,
14519
        bool $remove_empty_values,
14520
        int $remove_short_values = null
14521
    ) {
14522
        // init
14523
        $return = [];
14524
14525
        foreach ($strings as &$str) {
14526
            if (
14527
                $remove_short_values !== null
14528
                &&
14529
                \mb_strlen($str) <= $remove_short_values
14530
            ) {
14531
                continue;
14532
            }
14533
14534
            if (
14535
                $remove_empty_values
14536
                &&
14537
                \trim($str) === ''
14538
            ) {
14539
                continue;
14540
            }
14541
14542
            $return[] = $str;
14543
        }
14544
14545
        return $return;
14546
    }
14547
14548
    /**
14549
     * rxClass
14550
     *
14551
     * @param string $s
14552
     * @param string $class
14553
     *
14554
     * @psalm-pure
14555
     *
14556
     * @return string
14557 2
     *
14558
     * @noinspection ReturnTypeCanBeDeclaredInspection
14559 2
     */
14560
    private static function rxClass(string $s, string $class = '')
14561 2
    {
14562
        /**
14563 2
         * @psalm-suppress ImpureStaticVariable
14564
         *
14565 2
         * @var array<string,string>
14566
         */
14567
        static $RX_CLASS_CACHE = [];
14568
14569 2
        $cache_key = $s . '_' . $class;
14570
14571 2
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14572
            return $RX_CLASS_CACHE[$cache_key];
14573 2
        }
14574
14575
        /** @var string[] $class_array */
14576 2
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14577
14578
        /** @noinspection SuspiciousLoopInspection */
14579 2
        /** @noinspection AlterInForeachInspection */
14580
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14581
            if ($s === '-') {
14582
                $class_array[0] = '-' . $class_array[0];
14583
            } elseif (!isset($s[2])) {
14584
                $class_array[0] .= \preg_quote($s, '/');
14585
            } elseif (self::strlen($s) === 1) {
14586
                $class_array[0] .= $s;
14587
            } else {
14588
                $class_array[] = $s;
14589
            }
14590
        }
14591
14592
        if ($class_array[0]) {
14593
            $class_array[0] = '[' . $class_array[0] . ']';
14594
        }
14595
14596
        if (\count($class_array) === 1) {
14597
            $return = $class_array[0];
14598
        } else {
14599
            $return = '(?:' . \implode('|', $class_array) . ')';
14600
        }
14601 33
14602
        $RX_CLASS_CACHE[$cache_key] = $return;
14603 33
14604
        return $return;
14605 33
    }
14606 21
14607
    /**
14608
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14609
     *
14610 16
     * @param string $names
14611
     * @param string $delimiter
14612
     * @param string $encoding
14613
     *
14614 16
     * @psalm-pure
14615 15
     *
14616
     * @return string
14617 15
     *
14618 15
     * @noinspection ReturnTypeCanBeDeclaredInspection
14619 1
     */
14620 1
    private static function str_capitalize_name_helper(
14621
        string $names,
14622 15
        string $delimiter,
14623
        string $encoding = 'UTF-8'
14624
    ) {
14625
        // init
14626 16
        $name_helper_array = \explode($delimiter, $names);
14627 16
        if ($name_helper_array === false) {
14628
            return '';
14629
        }
14630 16
14631 16
        $special_cases = [
14632
            'names' => [
14633
                'ab',
14634
                'af',
14635
                'al',
14636 16
                'and',
14637
                'ap',
14638 16
                'bint',
14639
                'binte',
14640
                'da',
14641
                'de',
14642
                'del',
14643
                'den',
14644
                'der',
14645
                'di',
14646
                'dit',
14647
                'ibn',
14648
                'la',
14649
                'mac',
14650
                'nic',
14651
                'of',
14652
                'ter',
14653
                'the',
14654
                'und',
14655
                'van',
14656
                'von',
14657
                'y',
14658
                'zu',
14659
            ],
14660 1
            'prefixes' => [
14661 1
                'al-',
14662
                "d'",
14663
                'ff',
14664
                "l'",
14665
                'mac',
14666 1
                'mc',
14667
                'nic',
14668
            ],
14669
        ];
14670
14671
        foreach ($name_helper_array as &$name) {
14672
            if (\in_array($name, $special_cases['names'], true)) {
14673
                continue;
14674
            }
14675
14676
            $continue = false;
14677
14678
            if ($delimiter === '-') {
14679
                /** @noinspection AlterInForeachInspection */
14680
                foreach ((array) $special_cases['names'] as &$beginning) {
14681
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14682
                        $continue = true;
14683
14684
                        break;
14685
                    }
14686
                }
14687
            }
14688
14689
            /** @noinspection AlterInForeachInspection */
14690
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14691
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14692
                    $continue = true;
14693
14694
                    break;
14695
                }
14696
            }
14697
14698
            if ($continue) {
14699
                continue;
14700
            }
14701
14702
            $name = self::ucfirst($name);
14703
        }
14704
14705 1
        return \implode($delimiter, $name_helper_array);
14706 1
    }
14707 1
14708
    /**
14709
     * Generic case-sensitive transformation for collation matching.
14710 1
     *
14711
     * @param string $str <p>The input string</p>
14712 1
     *
14713
     * @psalm-pure
14714 1
     *
14715 1
     * @return string|null
14716 1
     */
14717
    private static function strtonatfold(string $str)
14718 1
    {
14719
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14720
        /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
14721
        if ($str === false) {
14722
            return '';
14723
        }
14724 1
14725 1
        /** @noinspection PhpUndefinedClassInspection */
14726 1
        return \preg_replace(
14727
            '/\p{Mn}+/u',
14728 1
            '',
14729
            $str
14730
        );
14731
    }
14732 1
14733 1
    /**
14734
     * @param int|string $input
14735
     *
14736 1
     * @psalm-pure
14737
     *
14738
     * @return string
14739 1
     *
14740
     * @noinspection ReturnTypeCanBeDeclaredInspection
14741
     * @noinspection SuspiciousBinaryOperationInspection
14742
     */
14743
    private static function to_utf8_convert_helper($input)
14744
    {
14745
        // init
14746
        $buf = '';
14747
14748
        if (self::$ORD === null) {
14749
            self::$ORD = self::getData('ord');
14750
        }
14751
14752
        if (self::$CHR === null) {
14753 6
            self::$CHR = self::getData('chr');
14754
        }
14755 6
14756 2
        if (self::$WIN1252_TO_UTF8 === null) {
14757
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14758
        }
14759
14760 6
        $ordC1 = self::$ORD[$input];
14761 6
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14762 6
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14763 6
        } else {
14764
            /** @noinspection OffsetOperationsInspection */
14765
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14766
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14767
            $buf .= $cc1 . $cc2;
14768
        }
14769
14770
        return $buf;
14771
    }
14772
14773
    /**
14774
     * @param string $str
14775
     *
14776
     * @psalm-pure
14777
     *
14778
     * @return string
14779
     *
14780 32
     * @noinspection ReturnTypeCanBeDeclaredInspection
14781
     */
14782 32
    private static function urldecode_unicode_helper(string $str)
14783 1
    {
14784
        if (\strpos($str, '%u') === false) {
14785
            return $str;
14786 32
        }
14787 1
14788
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14789
        if (\preg_match($pattern, $str)) {
14790 32
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14791 1
        }
14792
14793
        return $str;
14794 32
    }
14795
}
14796